diff options
Diffstat (limited to 'drivers/net/ethernet/intel')
29 files changed, 626 insertions, 310 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 9954493cd448..62497f5565c5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -1839,7 +1839,7 @@ void i40e_dbg_pf_exit(struct i40e_pf *pf) void i40e_dbg_init(void) { i40e_dbg_root = debugfs_create_dir(i40e_driver_name, NULL); - if (!i40e_dbg_root) + if (IS_ERR(i40e_dbg_root)) pr_info("init of debugfs failed\n"); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 29ad1797adce..a86bfa3bba74 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2609,7 +2609,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) retval = i40e_correct_mac_vlan_filters (vsi, &tmp_add_list, &tmp_del_list, vlan_filters); - else + else if (pf->vf) retval = i40e_correct_vf_mac_vlan_filters (vsi, &tmp_add_list, &tmp_del_list, vlan_filters, pf->vf[vsi->vf_id].trusted); @@ -2782,7 +2782,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) } /* if the VF is not trusted do not do promisc */ - if ((vsi->type == I40E_VSI_SRIOV) && !pf->vf[vsi->vf_id].trusted) { + if (vsi->type == I40E_VSI_SRIOV && pf->vf && + !pf->vf[vsi->vf_id].trusted) { clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); goto out; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 9da0c87f0328..f99c1f7fec40 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -210,11 +210,11 @@ read_nvm_exit: * @hw: pointer to the HW structure. * @module_pointer: module pointer location in words from the NVM beginning * @offset: offset in words from module start - * @words: number of words to write - * @data: buffer with words to write to the Shadow RAM + * @words: number of words to read + * @data: buffer with words to read to the Shadow RAM * @last_command: tells the AdminQ that this is the last command * - * Writes a 16 bit words buffer to the Shadow RAM using the admin command. + * Reads a 16 bit words buffer to the Shadow RAM using the admin command. **/ static int i40e_read_nvm_aq(struct i40e_hw *hw, u8 module_pointer, u32 offset, @@ -234,18 +234,18 @@ static int i40e_read_nvm_aq(struct i40e_hw *hw, */ if ((offset + words) > hw->nvm.sr_size) i40e_debug(hw, I40E_DEBUG_NVM, - "NVM write error: offset %d beyond Shadow RAM limit %d\n", + "NVM read error: offset %d beyond Shadow RAM limit %d\n", (offset + words), hw->nvm.sr_size); else if (words > I40E_SR_SECTOR_SIZE_IN_WORDS) - /* We can write only up to 4KB (one sector), in one AQ write */ + /* We can read only up to 4KB (one sector), in one AQ write */ i40e_debug(hw, I40E_DEBUG_NVM, - "NVM write fail error: tried to write %d words, limit is %d.\n", + "NVM read fail error: tried to read %d words, limit is %d.\n", words, I40E_SR_SECTOR_SIZE_IN_WORDS); else if (((offset + (words - 1)) / I40E_SR_SECTOR_SIZE_IN_WORDS) != (offset / I40E_SR_SECTOR_SIZE_IN_WORDS)) - /* A single write cannot spread over two sectors */ + /* A single read cannot spread over two sectors */ i40e_debug(hw, I40E_DEBUG_NVM, - "NVM write error: cannot spread over two sectors in a single write offset=%d words=%d\n", + "NVM read error: cannot spread over two sectors in a single read offset=%d words=%d\n", offset, words); else ret_code = i40e_aq_read_nvm(hw, module_pointer, diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index f80f2735e688..8cbdebc5b698 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -255,8 +255,10 @@ struct iavf_adapter { struct workqueue_struct *wq; struct work_struct reset_task; struct work_struct adminq_task; + struct work_struct finish_config; struct delayed_work client_task; wait_queue_head_t down_waitqueue; + wait_queue_head_t reset_waitqueue; wait_queue_head_t vc_waitqueue; struct iavf_q_vector *q_vectors; struct list_head vlan_filter_list; @@ -518,8 +520,9 @@ int iavf_up(struct iavf_adapter *adapter); void iavf_down(struct iavf_adapter *adapter); int iavf_process_config(struct iavf_adapter *adapter); int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter); -void iavf_schedule_reset(struct iavf_adapter *adapter); +void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags); void iavf_schedule_request_stats(struct iavf_adapter *adapter); +void iavf_schedule_finish_config(struct iavf_adapter *adapter); void iavf_reset(struct iavf_adapter *adapter); void iavf_set_ethtool_ops(struct net_device *netdev); void iavf_update_stats(struct iavf_adapter *adapter); @@ -582,4 +585,5 @@ void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter); void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter); struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, const u8 *macaddr); +int iavf_wait_for_reset(struct iavf_adapter *adapter); #endif /* _IAVF_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 6f171d1d85b7..a34303ad057d 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -484,6 +484,7 @@ static int iavf_set_priv_flags(struct net_device *netdev, u32 flags) { struct iavf_adapter *adapter = netdev_priv(netdev); u32 orig_flags, new_flags, changed_flags; + int ret = 0; u32 i; orig_flags = READ_ONCE(adapter->flags); @@ -531,12 +532,14 @@ static int iavf_set_priv_flags(struct net_device *netdev, u32 flags) /* issue a reset to force legacy-rx change to take effect */ if (changed_flags & IAVF_FLAG_LEGACY_RX) { if (netif_running(netdev)) { - adapter->flags |= IAVF_FLAG_RESET_NEEDED; - queue_work(adapter->wq, &adapter->reset_task); + iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); + ret = iavf_wait_for_reset(adapter); + if (ret) + netdev_warn(netdev, "Changing private flags timeout or interrupted waiting for reset"); } } - return 0; + return ret; } /** @@ -627,6 +630,7 @@ static int iavf_set_ringparam(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); u32 new_rx_count, new_tx_count; + int ret = 0; if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) return -EINVAL; @@ -671,11 +675,13 @@ static int iavf_set_ringparam(struct net_device *netdev, } if (netif_running(netdev)) { - adapter->flags |= IAVF_FLAG_RESET_NEEDED; - queue_work(adapter->wq, &adapter->reset_task); + iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); + ret = iavf_wait_for_reset(adapter); + if (ret) + netdev_warn(netdev, "Changing ring parameters timeout or interrupted waiting for reset"); } - return 0; + return ret; } /** @@ -1283,6 +1289,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe fltr->ip_mask.src_port = fsp->m_u.tcp_ip4_spec.psrc; fltr->ip_mask.dst_port = fsp->m_u.tcp_ip4_spec.pdst; fltr->ip_mask.tos = fsp->m_u.tcp_ip4_spec.tos; + fltr->ip_ver = 4; break; case AH_V4_FLOW: case ESP_V4_FLOW: @@ -1294,6 +1301,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe fltr->ip_mask.v4_addrs.dst_ip = fsp->m_u.ah_ip4_spec.ip4dst; fltr->ip_mask.spi = fsp->m_u.ah_ip4_spec.spi; fltr->ip_mask.tos = fsp->m_u.ah_ip4_spec.tos; + fltr->ip_ver = 4; break; case IPV4_USER_FLOW: fltr->ip_data.v4_addrs.src_ip = fsp->h_u.usr_ip4_spec.ip4src; @@ -1306,6 +1314,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe fltr->ip_mask.l4_header = fsp->m_u.usr_ip4_spec.l4_4_bytes; fltr->ip_mask.tos = fsp->m_u.usr_ip4_spec.tos; fltr->ip_mask.proto = fsp->m_u.usr_ip4_spec.proto; + fltr->ip_ver = 4; break; case TCP_V6_FLOW: case UDP_V6_FLOW: @@ -1324,6 +1333,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe fltr->ip_mask.src_port = fsp->m_u.tcp_ip6_spec.psrc; fltr->ip_mask.dst_port = fsp->m_u.tcp_ip6_spec.pdst; fltr->ip_mask.tclass = fsp->m_u.tcp_ip6_spec.tclass; + fltr->ip_ver = 6; break; case AH_V6_FLOW: case ESP_V6_FLOW: @@ -1339,6 +1349,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe sizeof(struct in6_addr)); fltr->ip_mask.spi = fsp->m_u.ah_ip6_spec.spi; fltr->ip_mask.tclass = fsp->m_u.ah_ip6_spec.tclass; + fltr->ip_ver = 6; break; case IPV6_USER_FLOW: memcpy(&fltr->ip_data.v6_addrs.src_ip, fsp->h_u.usr_ip6_spec.ip6src, @@ -1355,6 +1366,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe fltr->ip_mask.l4_header = fsp->m_u.usr_ip6_spec.l4_4_bytes; fltr->ip_mask.tclass = fsp->m_u.usr_ip6_spec.tclass; fltr->ip_mask.proto = fsp->m_u.usr_ip6_spec.l4_proto; + fltr->ip_ver = 6; break; case ETHER_FLOW: fltr->eth_data.etype = fsp->h_u.ether_spec.h_proto; @@ -1365,6 +1377,10 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe return -EINVAL; } + err = iavf_validate_fdir_fltr_masks(adapter, fltr); + if (err) + return err; + if (iavf_fdir_is_dup_fltr(adapter, fltr)) return -EEXIST; @@ -1395,14 +1411,15 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx if (fsp->flow_type & FLOW_MAC_EXT) return -EINVAL; + spin_lock_bh(&adapter->fdir_fltr_lock); if (adapter->fdir_active_fltr >= IAVF_MAX_FDIR_FILTERS) { + spin_unlock_bh(&adapter->fdir_fltr_lock); dev_err(&adapter->pdev->dev, "Unable to add Flow Director filter because VF reached the limit of max allowed filters (%u)\n", IAVF_MAX_FDIR_FILTERS); return -ENOSPC; } - spin_lock_bh(&adapter->fdir_fltr_lock); if (iavf_find_fdir_fltr_by_loc(adapter, fsp->location)) { dev_err(&adapter->pdev->dev, "Failed to add Flow Director filter, it already exists\n"); spin_unlock_bh(&adapter->fdir_fltr_lock); @@ -1775,7 +1792,9 @@ static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, case ETHTOOL_GRXCLSRLCNT: if (!FDIR_FLTR_SUPPORT(adapter)) break; + spin_lock_bh(&adapter->fdir_fltr_lock); cmd->rule_cnt = adapter->fdir_active_fltr; + spin_unlock_bh(&adapter->fdir_fltr_lock); cmd->data = IAVF_MAX_FDIR_FILTERS; ret = 0; break; @@ -1830,7 +1849,7 @@ static int iavf_set_channels(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); u32 num_req = ch->combined_count; - int i; + int ret = 0; if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && adapter->num_tc) { @@ -1852,22 +1871,13 @@ static int iavf_set_channels(struct net_device *netdev, adapter->num_req_queues = num_req; adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED; - iavf_schedule_reset(adapter); + iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); - /* wait for the reset is done */ - for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) { - msleep(IAVF_RESET_WAIT_MS); - if (adapter->flags & IAVF_FLAG_RESET_PENDING) - continue; - break; - } - if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) { - adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; - adapter->num_active_queues = num_req; - return -EOPNOTSUPP; - } + ret = iavf_wait_for_reset(adapter); + if (ret) + netdev_warn(netdev, "Changing channel count timeout or interrupted waiting for reset"); - return 0; + return ret; } /** diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c index 6146203efd84..03e774bd2a5b 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_fdir.c +++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c @@ -18,6 +18,79 @@ static const struct in6_addr ipv6_addr_full_mask = { } }; +static const struct in6_addr ipv6_addr_zero_mask = { + .in6_u = { + .u6_addr8 = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + } + } +}; + +/** + * iavf_validate_fdir_fltr_masks - validate Flow Director filter fields masks + * @adapter: pointer to the VF adapter structure + * @fltr: Flow Director filter data structure + * + * Returns 0 if all masks of packet fields are either full or empty. Returns + * error on at least one partial mask. + */ +int iavf_validate_fdir_fltr_masks(struct iavf_adapter *adapter, + struct iavf_fdir_fltr *fltr) +{ + if (fltr->eth_mask.etype && fltr->eth_mask.etype != htons(U16_MAX)) + goto partial_mask; + + if (fltr->ip_ver == 4) { + if (fltr->ip_mask.v4_addrs.src_ip && + fltr->ip_mask.v4_addrs.src_ip != htonl(U32_MAX)) + goto partial_mask; + + if (fltr->ip_mask.v4_addrs.dst_ip && + fltr->ip_mask.v4_addrs.dst_ip != htonl(U32_MAX)) + goto partial_mask; + + if (fltr->ip_mask.tos && fltr->ip_mask.tos != U8_MAX) + goto partial_mask; + } else if (fltr->ip_ver == 6) { + if (memcmp(&fltr->ip_mask.v6_addrs.src_ip, &ipv6_addr_zero_mask, + sizeof(struct in6_addr)) && + memcmp(&fltr->ip_mask.v6_addrs.src_ip, &ipv6_addr_full_mask, + sizeof(struct in6_addr))) + goto partial_mask; + + if (memcmp(&fltr->ip_mask.v6_addrs.dst_ip, &ipv6_addr_zero_mask, + sizeof(struct in6_addr)) && + memcmp(&fltr->ip_mask.v6_addrs.dst_ip, &ipv6_addr_full_mask, + sizeof(struct in6_addr))) + goto partial_mask; + + if (fltr->ip_mask.tclass && fltr->ip_mask.tclass != U8_MAX) + goto partial_mask; + } + + if (fltr->ip_mask.proto && fltr->ip_mask.proto != U8_MAX) + goto partial_mask; + + if (fltr->ip_mask.src_port && fltr->ip_mask.src_port != htons(U16_MAX)) + goto partial_mask; + + if (fltr->ip_mask.dst_port && fltr->ip_mask.dst_port != htons(U16_MAX)) + goto partial_mask; + + if (fltr->ip_mask.spi && fltr->ip_mask.spi != htonl(U32_MAX)) + goto partial_mask; + + if (fltr->ip_mask.l4_header && + fltr->ip_mask.l4_header != htonl(U32_MAX)) + goto partial_mask; + + return 0; + +partial_mask: + dev_err(&adapter->pdev->dev, "Failed to add Flow Director filter, partial masks are not supported\n"); + return -EOPNOTSUPP; +} + /** * iavf_pkt_udp_no_pay_len - the length of UDP packet without payload * @fltr: Flow Director filter data structure @@ -263,8 +336,6 @@ iavf_fill_fdir_ip4_hdr(struct iavf_fdir_fltr *fltr, VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, DST); } - fltr->ip_ver = 4; - return 0; } @@ -309,8 +380,6 @@ iavf_fill_fdir_ip6_hdr(struct iavf_fdir_fltr *fltr, VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, DST); } - fltr->ip_ver = 6; - return 0; } @@ -722,7 +791,9 @@ void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *f bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr) { struct iavf_fdir_fltr *tmp; + bool ret = false; + spin_lock_bh(&adapter->fdir_fltr_lock); list_for_each_entry(tmp, &adapter->fdir_list_head, list) { if (tmp->flow_type != fltr->flow_type) continue; @@ -732,11 +803,14 @@ bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr * !memcmp(&tmp->ip_data, &fltr->ip_data, sizeof(fltr->ip_data)) && !memcmp(&tmp->ext_data, &fltr->ext_data, - sizeof(fltr->ext_data))) - return true; + sizeof(fltr->ext_data))) { + ret = true; + break; + } } + spin_unlock_bh(&adapter->fdir_fltr_lock); - return false; + return ret; } /** diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.h b/drivers/net/ethernet/intel/iavf/iavf_fdir.h index 33c55c366315..9eb9f73f6adf 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_fdir.h +++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.h @@ -110,6 +110,8 @@ struct iavf_fdir_fltr { struct virtchnl_fdir_add vc_add_msg; }; +int iavf_validate_fdir_fltr_masks(struct iavf_adapter *adapter, + struct iavf_fdir_fltr *fltr); int iavf_fill_fdir_add_msg(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr); void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr); bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index a483eb185c99..9610ca770349 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -167,6 +167,45 @@ static struct iavf_adapter *iavf_pdev_to_adapter(struct pci_dev *pdev) } /** + * iavf_is_reset_in_progress - Check if a reset is in progress + * @adapter: board private structure + */ +static bool iavf_is_reset_in_progress(struct iavf_adapter *adapter) +{ + if (adapter->state == __IAVF_RESETTING || + adapter->flags & (IAVF_FLAG_RESET_PENDING | + IAVF_FLAG_RESET_NEEDED)) + return true; + + return false; +} + +/** + * iavf_wait_for_reset - Wait for reset to finish. + * @adapter: board private structure + * + * Returns 0 if reset finished successfully, negative on timeout or interrupt. + */ +int iavf_wait_for_reset(struct iavf_adapter *adapter) +{ + int ret = wait_event_interruptible_timeout(adapter->reset_waitqueue, + !iavf_is_reset_in_progress(adapter), + msecs_to_jiffies(5000)); + + /* If ret < 0 then it means wait was interrupted. + * If ret == 0 then it means we got a timeout while waiting + * for reset to finish. + * If ret > 0 it means reset has finished. + */ + if (ret > 0) + return 0; + else if (ret < 0) + return -EINTR; + else + return -EBUSY; +} + +/** * iavf_allocate_dma_mem_d - OS specific memory alloc for shared code * @hw: pointer to the HW structure * @mem: ptr to mem struct to fill out @@ -262,12 +301,14 @@ static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs) /** * iavf_schedule_reset - Set the flags and schedule a reset event * @adapter: board private structure + * @flags: IAVF_FLAG_RESET_PENDING or IAVF_FLAG_RESET_NEEDED **/ -void iavf_schedule_reset(struct iavf_adapter *adapter) +void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags) { - if (!(adapter->flags & - (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) { - adapter->flags |= IAVF_FLAG_RESET_NEEDED; + if (!test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section) && + !(adapter->flags & + (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) { + adapter->flags |= flags; queue_work(adapter->wq, &adapter->reset_task); } } @@ -295,7 +336,7 @@ static void iavf_tx_timeout(struct net_device *netdev, unsigned int txqueue) struct iavf_adapter *adapter = netdev_priv(netdev); adapter->tx_timeout_count++; - iavf_schedule_reset(adapter); + iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); } /** @@ -1651,10 +1692,10 @@ static int iavf_set_interrupt_capability(struct iavf_adapter *adapter) adapter->msix_entries[vector].entry = vector; err = iavf_acquire_msix_vectors(adapter, v_budget); + if (!err) + iavf_schedule_finish_config(adapter); out: - netif_set_real_num_rx_queues(adapter->netdev, pairs); - netif_set_real_num_tx_queues(adapter->netdev, pairs); return err; } @@ -1828,19 +1869,16 @@ static int iavf_alloc_q_vectors(struct iavf_adapter *adapter) static void iavf_free_q_vectors(struct iavf_adapter *adapter) { int q_idx, num_q_vectors; - int napi_vectors; if (!adapter->q_vectors) return; num_q_vectors = adapter->num_msix_vectors - NONQ_VECS; - napi_vectors = adapter->num_active_queues; for (q_idx = 0; q_idx < num_q_vectors; q_idx++) { struct iavf_q_vector *q_vector = &adapter->q_vectors[q_idx]; - if (q_idx < napi_vectors) - netif_napi_del(&q_vector->napi); + netif_napi_del(&q_vector->napi); } kfree(adapter->q_vectors); adapter->q_vectors = NULL; @@ -1877,9 +1915,7 @@ static int iavf_init_interrupt_scheme(struct iavf_adapter *adapter) goto err_alloc_queues; } - rtnl_lock(); err = iavf_set_interrupt_capability(adapter); - rtnl_unlock(); if (err) { dev_err(&adapter->pdev->dev, "Unable to setup interrupt capabilities\n"); @@ -1932,15 +1968,16 @@ static void iavf_free_rss(struct iavf_adapter *adapter) /** * iavf_reinit_interrupt_scheme - Reallocate queues and vectors * @adapter: board private structure + * @running: true if adapter->state == __IAVF_RUNNING * * Returns 0 on success, negative on failure **/ -static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter) +static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter, bool running) { struct net_device *netdev = adapter->netdev; int err; - if (netif_running(netdev)) + if (running) iavf_free_traffic_irqs(adapter); iavf_free_misc_irq(adapter); iavf_reset_interrupt_capability(adapter); @@ -1965,6 +2002,78 @@ err: } /** + * iavf_finish_config - do all netdev work that needs RTNL + * @work: our work_struct + * + * Do work that needs both RTNL and crit_lock. + **/ +static void iavf_finish_config(struct work_struct *work) +{ + struct iavf_adapter *adapter; + int pairs, err; + + adapter = container_of(work, struct iavf_adapter, finish_config); + + /* Always take RTNL first to prevent circular lock dependency */ + rtnl_lock(); + mutex_lock(&adapter->crit_lock); + + if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) && + adapter->netdev_registered && + !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) { + netdev_update_features(adapter->netdev); + adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES; + } + + switch (adapter->state) { + case __IAVF_DOWN: + if (!adapter->netdev_registered) { + err = register_netdevice(adapter->netdev); + if (err) { + dev_err(&adapter->pdev->dev, "Unable to register netdev (%d)\n", + err); + + /* go back and try again.*/ + iavf_free_rss(adapter); + iavf_free_misc_irq(adapter); + iavf_reset_interrupt_capability(adapter); + iavf_change_state(adapter, + __IAVF_INIT_CONFIG_ADAPTER); + goto out; + } + adapter->netdev_registered = true; + } + + /* Set the real number of queues when reset occurs while + * state == __IAVF_DOWN + */ + fallthrough; + case __IAVF_RUNNING: + pairs = adapter->num_active_queues; + netif_set_real_num_rx_queues(adapter->netdev, pairs); + netif_set_real_num_tx_queues(adapter->netdev, pairs); + break; + + default: + break; + } + +out: + mutex_unlock(&adapter->crit_lock); + rtnl_unlock(); +} + +/** + * iavf_schedule_finish_config - Set the flags and schedule a reset event + * @adapter: board private structure + **/ +void iavf_schedule_finish_config(struct iavf_adapter *adapter) +{ + if (!test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) + queue_work(adapter->wq, &adapter->finish_config); +} + +/** * iavf_process_aq_command - process aq_required flags * and sends aq command * @adapter: pointer to iavf adapter structure @@ -2371,7 +2480,7 @@ int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter) adapter->vsi_res->num_queue_pairs); adapter->flags |= IAVF_FLAG_REINIT_MSIX_NEEDED; adapter->num_req_queues = adapter->vsi_res->num_queue_pairs; - iavf_schedule_reset(adapter); + iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); return -EAGAIN; } @@ -2601,22 +2710,8 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) netif_carrier_off(netdev); adapter->link_up = false; - - /* set the semaphore to prevent any callbacks after device registration - * up to time when state of driver will be set to __IAVF_DOWN - */ - rtnl_lock(); - if (!adapter->netdev_registered) { - err = register_netdevice(netdev); - if (err) { - rtnl_unlock(); - goto err_register; - } - } - - adapter->netdev_registered = true; - netif_tx_stop_all_queues(netdev); + if (CLIENT_ALLOWED(adapter)) { err = iavf_lan_add_device(adapter); if (err) @@ -2629,7 +2724,6 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) iavf_change_state(adapter, __IAVF_DOWN); set_bit(__IAVF_VSI_DOWN, adapter->vsi.state); - rtnl_unlock(); iavf_misc_irq_enable(adapter); wake_up(&adapter->down_waitqueue); @@ -2649,10 +2743,11 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) /* request initial VLAN offload settings */ iavf_set_vlan_offload_features(adapter, 0, netdev->features); + iavf_schedule_finish_config(adapter); return; + err_mem: iavf_free_rss(adapter); -err_register: iavf_free_misc_irq(adapter); err_sw_init: iavf_reset_interrupt_capability(adapter); @@ -2679,26 +2774,9 @@ static void iavf_watchdog_task(struct work_struct *work) goto restart_watchdog; } - if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) && - adapter->netdev_registered && - !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section) && - rtnl_trylock()) { - netdev_update_features(adapter->netdev); - rtnl_unlock(); - adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES; - } - if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) iavf_change_state(adapter, __IAVF_COMM_FAILED); - if (adapter->flags & IAVF_FLAG_RESET_NEEDED) { - adapter->aq_required = 0; - adapter->current_op = VIRTCHNL_OP_UNKNOWN; - mutex_unlock(&adapter->crit_lock); - queue_work(adapter->wq, &adapter->reset_task); - return; - } - switch (adapter->state) { case __IAVF_STARTUP: iavf_startup(adapter); @@ -2826,11 +2904,10 @@ static void iavf_watchdog_task(struct work_struct *work) /* check for hw reset */ reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK; if (!reg_val) { - adapter->flags |= IAVF_FLAG_RESET_PENDING; adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; dev_err(&adapter->pdev->dev, "Hardware reset detected\n"); - queue_work(adapter->wq, &adapter->reset_task); + iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING); mutex_unlock(&adapter->crit_lock); queue_delayed_work(adapter->wq, &adapter->watchdog_task, HZ * 2); @@ -2940,11 +3017,6 @@ static void iavf_reset_task(struct work_struct *work) int i = 0, err; bool running; - /* Detach interface to avoid subsequent NDO callbacks */ - rtnl_lock(); - netif_device_detach(netdev); - rtnl_unlock(); - /* When device is being removed it doesn't make sense to run the reset * task, just return in such a case. */ @@ -2952,7 +3024,7 @@ static void iavf_reset_task(struct work_struct *work) if (adapter->state != __IAVF_REMOVE) queue_work(adapter->wq, &adapter->reset_task); - goto reset_finish; + return; } while (!mutex_trylock(&adapter->client_lock)) @@ -3010,11 +3082,6 @@ static void iavf_reset_task(struct work_struct *work) iavf_disable_vf(adapter); mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); - if (netif_running(netdev)) { - rtnl_lock(); - dev_close(netdev); - rtnl_unlock(); - } return; /* Do not attempt to reinit. It's dead, Jim. */ } @@ -3056,7 +3123,7 @@ continue_reset: if ((adapter->flags & IAVF_FLAG_REINIT_MSIX_NEEDED) || (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED)) { - err = iavf_reinit_interrupt_scheme(adapter); + err = iavf_reinit_interrupt_scheme(adapter, running); if (err) goto reset_err; } @@ -3151,10 +3218,11 @@ continue_reset: adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; + wake_up(&adapter->reset_waitqueue); mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); - goto reset_finish; + return; reset_err: if (running) { set_bit(__IAVF_VSI_DOWN, adapter->vsi.state); @@ -3164,21 +3232,7 @@ reset_err: mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); - - if (netif_running(netdev)) { - /* Close device to ensure that Tx queues will not be started - * during netif_device_attach() at the end of the reset task. - */ - rtnl_lock(); - dev_close(netdev); - rtnl_unlock(); - } - dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); -reset_finish: - rtnl_lock(); - netif_device_attach(netdev); - rtnl_unlock(); } /** @@ -3196,9 +3250,6 @@ static void iavf_adminq_task(struct work_struct *work) u32 val, oldval; u16 pending; - if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) - goto out; - if (!mutex_trylock(&adapter->crit_lock)) { if (adapter->state == __IAVF_REMOVE) return; @@ -3207,10 +3258,13 @@ static void iavf_adminq_task(struct work_struct *work) goto out; } + if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) + goto unlock; + event.buf_len = IAVF_MAX_AQ_BUF_SIZE; event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); if (!event.msg_buf) - goto out; + goto unlock; do { ret = iavf_clean_arq_element(hw, &event, &pending); @@ -3225,11 +3279,8 @@ static void iavf_adminq_task(struct work_struct *work) if (pending != 0) memset(event.msg_buf, 0, IAVF_MAX_AQ_BUF_SIZE); } while (pending); - mutex_unlock(&adapter->crit_lock); - if ((adapter->flags & - (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) || - adapter->state == __IAVF_RESETTING) + if (iavf_is_reset_in_progress(adapter)) goto freedom; /* check for error indications */ @@ -3271,6 +3322,8 @@ static void iavf_adminq_task(struct work_struct *work) freedom: kfree(event.msg_buf); +unlock: + mutex_unlock(&adapter->crit_lock); out: /* re-enable Admin queue interrupt cause */ iavf_misc_irq_enable(adapter); @@ -4315,6 +4368,7 @@ static int iavf_close(struct net_device *netdev) static int iavf_change_mtu(struct net_device *netdev, int new_mtu) { struct iavf_adapter *adapter = netdev_priv(netdev); + int ret = 0; netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu); @@ -4325,11 +4379,15 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu) } if (netif_running(netdev)) { - adapter->flags |= IAVF_FLAG_RESET_NEEDED; - queue_work(adapter->wq, &adapter->reset_task); + iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED); + ret = iavf_wait_for_reset(adapter); + if (ret < 0) + netdev_warn(netdev, "MTU change interrupted waiting for reset"); + else if (ret) + netdev_warn(netdev, "MTU change timed out waiting for reset"); } - return 0; + return ret; } #define NETIF_VLAN_OFFLOAD_FEATURES (NETIF_F_HW_VLAN_CTAG_RX | \ @@ -4922,6 +4980,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&adapter->reset_task, iavf_reset_task); INIT_WORK(&adapter->adminq_task, iavf_adminq_task); + INIT_WORK(&adapter->finish_config, iavf_finish_config); INIT_DELAYED_WORK(&adapter->watchdog_task, iavf_watchdog_task); INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task); queue_delayed_work(adapter->wq, &adapter->watchdog_task, @@ -4930,6 +4989,9 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Setup the wait queue for indicating transition to down status */ init_waitqueue_head(&adapter->down_waitqueue); + /* Setup the wait queue for indicating transition to running state */ + init_waitqueue_head(&adapter->reset_waitqueue); + /* Setup the wait queue for indicating virtchannel events */ init_waitqueue_head(&adapter->vc_waitqueue); @@ -5061,13 +5123,15 @@ static void iavf_remove(struct pci_dev *pdev) usleep_range(500, 1000); } cancel_delayed_work_sync(&adapter->watchdog_task); + cancel_work_sync(&adapter->finish_config); + rtnl_lock(); if (adapter->netdev_registered) { - rtnl_lock(); unregister_netdevice(netdev); adapter->netdev_registered = false; - rtnl_unlock(); } + rtnl_unlock(); + if (CLIENT_ALLOWED(adapter)) { err = iavf_lan_del_device(adapter); if (err) diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 7c0578b5457b..be3c007ce90a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -1961,9 +1961,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, case VIRTCHNL_EVENT_RESET_IMPENDING: dev_info(&adapter->pdev->dev, "Reset indication received from the PF\n"); if (!(adapter->flags & IAVF_FLAG_RESET_PENDING)) { - adapter->flags |= IAVF_FLAG_RESET_PENDING; dev_info(&adapter->pdev->dev, "Scheduling reset task\n"); - queue_work(adapter->wq, &adapter->reset_task); + iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING); } break; default: @@ -2237,6 +2236,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, iavf_process_config(adapter); adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES; + iavf_schedule_finish_config(adapter); iavf_set_queue_vlan_tag_loc(adapter); @@ -2285,6 +2285,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, case VIRTCHNL_OP_ENABLE_QUEUES: /* enable transmits */ iavf_irq_enable(adapter, true); + wake_up(&adapter->reset_waitqueue); adapter->flags &= ~IAVF_FLAG_QUEUES_DISABLED; break; case VIRTCHNL_OP_DISABLE_QUEUES: diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 4a12316f7b46..074bf9403cd1 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -435,7 +435,8 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) /* Receive Packet Data Buffer Size. * The Packet Data Buffer Size is defined in 128 byte units. */ - rlan_ctx.dbuf = ring->rx_buf_len >> ICE_RLAN_CTX_DBUF_S; + rlan_ctx.dbuf = DIV_ROUND_UP(ring->rx_buf_len, + BIT_ULL(ICE_RLAN_CTX_DBUF_S)); /* use 32 byte descriptors */ rlan_ctx.dsize = 1; @@ -800,6 +801,8 @@ void ice_vsi_free_q_vectors(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, v_idx) ice_free_q_vector(vsi, v_idx); + + vsi->num_q_vectors = 0; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index ad0a007b7398..8f232c41a89e 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -538,6 +538,12 @@ ice_eswitch_mode_set(struct devlink *devlink, u16 mode, break; case DEVLINK_ESWITCH_MODE_SWITCHDEV: { + if (ice_is_adq_active(pf)) { + dev_err(ice_pf_to_dev(pf), "Couldn't change eswitch mode to switchdev - ADQ is active. Delete ADQ configs and try again, e.g. tc qdisc del dev $PF root"); + NL_SET_ERR_MSG_MOD(extack, "Couldn't change eswitch mode to switchdev - ADQ is active. Delete ADQ configs and try again, e.g. tc qdisc del dev $PF root"); + return -EOPNOTSUPP; + } + dev_info(ice_pf_to_dev(pf), "PF %d changed eswitch mode to switchdev", pf->hw.pf_id); NL_SET_ERR_MSG_MOD(extack, "Changed eswitch mode to switchdev"); diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 8d5cbbd0b3d5..ad4d4702129f 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -2681,8 +2681,13 @@ ice_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, ring->rx_max_pending = ICE_MAX_NUM_DESC; ring->tx_max_pending = ICE_MAX_NUM_DESC; - ring->rx_pending = vsi->rx_rings[0]->count; - ring->tx_pending = vsi->tx_rings[0]->count; + if (vsi->tx_rings && vsi->rx_rings) { + ring->rx_pending = vsi->rx_rings[0]->count; + ring->tx_pending = vsi->tx_rings[0]->count; + } else { + ring->rx_pending = 0; + ring->tx_pending = 0; + } /* Rx mini and jumbo rings are not supported */ ring->rx_mini_max_pending = 0; @@ -2716,6 +2721,10 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, return -EINVAL; } + /* Return if there is no rings (device is reloading) */ + if (!vsi->tx_rings || !vsi->rx_rings) + return -EBUSY; + new_tx_cnt = ALIGN(ring->tx_pending, ICE_REQ_DESC_MULTIPLE); if (new_tx_cnt != ring->tx_pending) netdev_info(netdev, "Requested Tx descriptor count rounded up to %d\n", diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index ead6d50fc0ad..8c6e13f87b7d 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -1281,16 +1281,21 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp, ICE_FLOW_FLD_OFF_INVAL); } - /* add filter for outer headers */ fltr_idx = ice_ethtool_flow_to_fltr(fsp->flow_type & ~FLOW_EXT); + + assign_bit(fltr_idx, hw->fdir_perfect_fltr, perfect_filter); + + /* add filter for outer headers */ ret = ice_fdir_set_hw_fltr_rule(pf, seg, fltr_idx, ICE_FD_HW_SEG_NON_TUN); - if (ret == -EEXIST) - /* Rule already exists, free memory and continue */ - devm_kfree(dev, seg); - else if (ret) + if (ret == -EEXIST) { + /* Rule already exists, free memory and count as success */ + ret = 0; + goto err_exit; + } else if (ret) { /* could not write filter, free memory */ goto err_exit; + } /* make tunneled filter HW entries if possible */ memcpy(&tun_seg[1], seg, sizeof(*seg)); @@ -1305,18 +1310,13 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp, devm_kfree(dev, tun_seg); } - if (perfect_filter) - set_bit(fltr_idx, hw->fdir_perfect_fltr); - else - clear_bit(fltr_idx, hw->fdir_perfect_fltr); - return ret; err_exit: devm_kfree(dev, tun_seg); devm_kfree(dev, seg); - return -EOPNOTSUPP; + return ret; } /** @@ -1914,7 +1914,9 @@ int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd) input->comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL; /* input struct is added to the HW filter list */ - ice_fdir_update_list_entry(pf, input, fsp->location); + ret = ice_fdir_update_list_entry(pf, input, fsp->location); + if (ret) + goto release_lock; ret = ice_fdir_write_all_fltr(pf, input, true); if (ret) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 00e3afd507a4..0054d7e64ec3 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2972,39 +2972,12 @@ int ice_vsi_release(struct ice_vsi *vsi) return -ENODEV; pf = vsi->back; - /* do not unregister while driver is in the reset recovery pending - * state. Since reset/rebuild happens through PF service task workqueue, - * it's not a good idea to unregister netdev that is associated to the - * PF that is running the work queue items currently. This is done to - * avoid check_flush_dependency() warning on this wq - */ - if (vsi->netdev && !ice_is_reset_in_progress(pf->state) && - (test_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state))) { - unregister_netdev(vsi->netdev); - clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); - } - - if (vsi->type == ICE_VSI_PF) - ice_devlink_destroy_pf_port(pf); - if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) ice_rss_clean(vsi); ice_vsi_close(vsi); ice_vsi_decfg(vsi); - if (vsi->netdev) { - if (test_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state)) { - unregister_netdev(vsi->netdev); - clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); - } - if (test_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state)) { - free_netdev(vsi->netdev); - vsi->netdev = NULL; - clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); - } - } - /* retain SW VSI data structure since it is needed to unregister and * free VSI netdev when PF is not in reset recovery pending state,\ * for ex: during rmmod. diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 93979ab18bc1..b40dfe6ae321 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4430,9 +4430,9 @@ static int ice_start_eth(struct ice_vsi *vsi) if (err) return err; - rtnl_lock(); err = ice_vsi_open(vsi); - rtnl_unlock(); + if (err) + ice_fltr_remove_all(vsi); return err; } @@ -4895,6 +4895,7 @@ int ice_load(struct ice_pf *pf) params = ice_vsi_to_params(vsi); params.flags = ICE_VSI_FLAG_INIT; + rtnl_lock(); err = ice_vsi_cfg(vsi, ¶ms); if (err) goto err_vsi_cfg; @@ -4902,6 +4903,7 @@ int ice_load(struct ice_pf *pf) err = ice_start_eth(ice_get_main_vsi(pf)); if (err) goto err_start_eth; + rtnl_unlock(); err = ice_init_rdma(pf); if (err) @@ -4916,9 +4918,11 @@ int ice_load(struct ice_pf *pf) err_init_rdma: ice_vsi_close(ice_get_main_vsi(pf)); + rtnl_lock(); err_start_eth: ice_vsi_decfg(ice_get_main_vsi(pf)); err_vsi_cfg: + rtnl_unlock(); ice_deinit_dev(pf); return err; } @@ -4931,8 +4935,10 @@ void ice_unload(struct ice_pf *pf) { ice_deinit_features(pf); ice_deinit_rdma(pf); + rtnl_lock(); ice_stop_eth(ice_get_main_vsi(pf)); ice_vsi_decfg(ice_get_main_vsi(pf)); + rtnl_unlock(); ice_deinit_dev(pf); } @@ -5739,6 +5745,13 @@ ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate) q_handle = vsi->tx_rings[queue_index]->q_handle; tc = ice_dcb_get_tc(vsi, queue_index); + vsi = ice_locate_vsi_using_queue(vsi, queue_index); + if (!vsi) { + netdev_err(netdev, "Invalid VSI for given queue %d\n", + queue_index); + return -EINVAL; + } + /* Set BW back to default, when user set maxrate to 0 */ if (!maxrate) status = ice_cfg_q_bw_dflt_lmt(vsi->port_info, vsi->idx, tc, @@ -7872,10 +7885,10 @@ static int ice_validate_mqprio_qopt(struct ice_vsi *vsi, struct tc_mqprio_qopt_offload *mqprio_qopt) { - u64 sum_max_rate = 0, sum_min_rate = 0; int non_power_of_2_qcount = 0; struct ice_pf *pf = vsi->back; int max_rss_q_cnt = 0; + u64 sum_min_rate = 0; struct device *dev; int i, speed; u8 num_tc; @@ -7891,6 +7904,7 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi, dev = ice_pf_to_dev(pf); vsi->ch_rss_size = 0; num_tc = mqprio_qopt->qopt.num_tc; + speed = ice_get_link_speed_kbps(vsi); for (i = 0; num_tc; i++) { int qcount = mqprio_qopt->qopt.count[i]; @@ -7931,7 +7945,6 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi, */ max_rate = mqprio_qopt->max_rate[i]; max_rate = div_u64(max_rate, ICE_BW_KBPS_DIVISOR); - sum_max_rate += max_rate; /* min_rate is minimum guaranteed rate and it can't be zero */ min_rate = mqprio_qopt->min_rate[i]; @@ -7944,6 +7957,12 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi, return -EINVAL; } + if (max_rate && max_rate > speed) { + dev_err(dev, "TC%d: max_rate(%llu Kbps) > link speed of %u Kbps\n", + i, max_rate, speed); + return -EINVAL; + } + iter_div_u64_rem(min_rate, ICE_MIN_BW_LIMIT, &rem); if (rem) { dev_err(dev, "TC%d: Min Rate not multiple of %u Kbps", @@ -7981,12 +8000,6 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi, (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) return -EINVAL; - speed = ice_get_link_speed_kbps(vsi); - if (sum_max_rate && sum_max_rate > (u64)speed) { - dev_err(dev, "Invalid max Tx rate(%llu) Kbps > speed(%u) Kbps specified\n", - sum_max_rate, speed); - return -EINVAL; - } if (sum_min_rate && sum_min_rate > (u64)speed) { dev_err(dev, "Invalid min Tx rate(%llu) Kbps > speed (%u) Kbps specified\n", sum_min_rate, speed); @@ -8800,6 +8813,7 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type, { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_pf *pf = np->vsi->back; + bool locked = false; int err; switch (type) { @@ -8809,10 +8823,32 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type, ice_setup_tc_block_cb, np, np, true); case TC_SETUP_QDISC_MQPRIO: + if (ice_is_eswitch_mode_switchdev(pf)) { + netdev_err(netdev, "TC MQPRIO offload not supported, switchdev is enabled\n"); + return -EOPNOTSUPP; + } + + if (pf->adev) { + mutex_lock(&pf->adev_mutex); + device_lock(&pf->adev->dev); + locked = true; + if (pf->adev->dev.driver) { + netdev_err(netdev, "Cannot change qdisc when RDMA is active\n"); + err = -EBUSY; + goto adev_unlock; + } + } + /* setup traffic classifier for receive side */ mutex_lock(&pf->tc_mutex); err = ice_setup_tc_mqprio_qdisc(netdev, type_data); mutex_unlock(&pf->tc_mutex); + +adev_unlock: + if (locked) { + device_unlock(&pf->adev->dev); + mutex_unlock(&pf->adev_mutex); + } return err; default: return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 1f66914c7a20..31314e7540f8 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1131,7 +1131,7 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena) if (!vf) return -EINVAL; - ret = ice_check_vf_ready_for_reset(vf); + ret = ice_check_vf_ready_for_cfg(vf); if (ret) goto out_put_vf; @@ -1246,7 +1246,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) goto out_put_vf; } - ret = ice_check_vf_ready_for_reset(vf); + ret = ice_check_vf_ready_for_cfg(vf); if (ret) goto out_put_vf; @@ -1300,7 +1300,7 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) return -EOPNOTSUPP; } - ret = ice_check_vf_ready_for_reset(vf); + ret = ice_check_vf_ready_for_cfg(vf); if (ret) goto out_put_vf; @@ -1613,7 +1613,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, if (!vf) return -EINVAL; - ret = ice_check_vf_ready_for_reset(vf); + ret = ice_check_vf_ready_for_cfg(vf); if (ret) goto out_put_vf; diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index b54052ef6050..4a34ef5f58d3 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -750,17 +750,16 @@ exit: /** * ice_locate_vsi_using_queue - locate VSI using queue (forward to queue action) * @vsi: Pointer to VSI - * @tc_fltr: Pointer to tc_flower_filter + * @queue: Queue index * - * Locate the VSI using specified queue. When ADQ is not enabled, always - * return input VSI, otherwise locate corresponding VSI based on per channel - * offset and qcount + * Locate the VSI using specified "queue". When ADQ is not enabled, + * always return input VSI, otherwise locate corresponding + * VSI based on per channel "offset" and "qcount" */ -static struct ice_vsi * -ice_locate_vsi_using_queue(struct ice_vsi *vsi, - struct ice_tc_flower_fltr *tc_fltr) +struct ice_vsi * +ice_locate_vsi_using_queue(struct ice_vsi *vsi, int queue) { - int num_tc, tc, queue; + int num_tc, tc; /* if ADQ is not active, passed VSI is the candidate VSI */ if (!ice_is_adq_active(vsi->back)) @@ -770,7 +769,6 @@ ice_locate_vsi_using_queue(struct ice_vsi *vsi, * upon queue number) */ num_tc = vsi->mqprio_qopt.qopt.num_tc; - queue = tc_fltr->action.fwd.q.queue; for (tc = 0; tc < num_tc; tc++) { int qcount = vsi->mqprio_qopt.qopt.count[tc]; @@ -812,6 +810,7 @@ ice_tc_forward_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *tc_fltr) struct ice_pf *pf = vsi->back; struct device *dev; u32 tc_class; + int q; dev = ice_pf_to_dev(pf); @@ -840,7 +839,8 @@ ice_tc_forward_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *tc_fltr) /* Determine destination VSI even though the action is * FWD_TO_QUEUE, because QUEUE is associated with VSI */ - dest_vsi = tc_fltr->dest_vsi; + q = tc_fltr->action.fwd.q.queue; + dest_vsi = ice_locate_vsi_using_queue(vsi, q); break; default: dev_err(dev, @@ -1716,7 +1716,7 @@ ice_tc_forward_to_queue(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr, /* If ADQ is configured, and the queue belongs to ADQ VSI, then prepare * ADQ switch filter */ - ch_vsi = ice_locate_vsi_using_queue(vsi, fltr); + ch_vsi = ice_locate_vsi_using_queue(vsi, fltr->action.fwd.q.queue); if (!ch_vsi) return -EINVAL; fltr->dest_vsi = ch_vsi; diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h index 8bbc1a62bdb1..65d387163a46 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h @@ -204,6 +204,7 @@ static inline int ice_chnl_dmac_fltr_cnt(struct ice_pf *pf) return pf->num_dmac_chnl_fltrs; } +struct ice_vsi *ice_locate_vsi_using_queue(struct ice_vsi *vsi, int queue); int ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi, struct flow_cls_offload *cls_flower); diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index b26ce4425f45..ea3310be8354 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -186,25 +186,6 @@ int ice_check_vf_ready_for_cfg(struct ice_vf *vf) } /** - * ice_check_vf_ready_for_reset - check if VF is ready to be reset - * @vf: VF to check if it's ready to be reset - * - * The purpose of this function is to ensure that the VF is not in reset, - * disabled, and is both initialized and active, thus enabling us to safely - * initialize another reset. - */ -int ice_check_vf_ready_for_reset(struct ice_vf *vf) -{ - int ret; - - ret = ice_check_vf_ready_for_cfg(vf); - if (!ret && !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) - ret = -EAGAIN; - - return ret; -} - -/** * ice_trigger_vf_reset - Reset a VF on HW * @vf: pointer to the VF structure * @is_vflr: true if VFLR was issued, false if not @@ -631,11 +612,17 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) return 0; } + if (flags & ICE_VF_RESET_LOCK) + mutex_lock(&vf->cfg_lock); + else + lockdep_assert_held(&vf->cfg_lock); + if (ice_is_vf_disabled(vf)) { vsi = ice_get_vf_vsi(vf); if (!vsi) { dev_dbg(dev, "VF is already removed\n"); - return -EINVAL; + err = -EINVAL; + goto out_unlock; } ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id); @@ -644,14 +631,9 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags) dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n", vf->vf_id); - return 0; + goto out_unlock; } - if (flags & ICE_VF_RESET_LOCK) - mutex_lock(&vf->cfg_lock); - else - lockdep_assert_held(&vf->cfg_lock); - /* Set VF disable bit state here, before triggering reset */ set_bit(ICE_VF_STATE_DIS, vf->vf_states); ice_trigger_vf_reset(vf, flags & ICE_VF_RESET_VFLR, false); diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h index 67172fdd9bc2..48fea6fa0362 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h @@ -215,7 +215,6 @@ u16 ice_get_num_vfs(struct ice_pf *pf); struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf); bool ice_is_vf_disabled(struct ice_vf *vf); int ice_check_vf_ready_for_cfg(struct ice_vf *vf); -int ice_check_vf_ready_for_reset(struct ice_vf *vf); void ice_set_vf_state_dis(struct ice_vf *vf); bool ice_is_any_vf_in_unicast_promisc(struct ice_pf *pf); void diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index efbc2968a7bf..dcf628b1fccd 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -3947,7 +3947,6 @@ error_handler: ice_vc_notify_vf_link_state(vf); break; case VIRTCHNL_OP_RESET_VF: - clear_bit(ICE_VF_STATE_ACTIVE, vf->vf_states); ops->reset_vf(vf); break; case VIRTCHNL_OP_ADD_ETH_ADDR: diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 405886ee5261..319c544b9f04 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -1385,18 +1385,6 @@ void igb_ptp_init(struct igb_adapter *adapter) return; } - spin_lock_init(&adapter->tmreg_lock); - INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work); - - if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK) - INIT_DELAYED_WORK(&adapter->ptp_overflow_work, - igb_ptp_overflow_check); - - adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; - adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF; - - igb_ptp_reset(adapter); - adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps, &adapter->pdev->dev); if (IS_ERR(adapter->ptp_clock)) { @@ -1406,6 +1394,18 @@ void igb_ptp_init(struct igb_adapter *adapter) dev_info(&adapter->pdev->dev, "added PHC on %s\n", adapter->netdev->name); adapter->ptp_flags |= IGB_PTP_ENABLED; + + spin_lock_init(&adapter->tmreg_lock); + INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work); + + if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK) + INIT_DELAYED_WORK(&adapter->ptp_overflow_work, + igb_ptp_overflow_check); + + adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE; + adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF; + + igb_ptp_reset(adapter); } } diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 00a5ee487812..38901d2a4680 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -14,6 +14,7 @@ #include <linux/timecounter.h> #include <linux/net_tstamp.h> #include <linux/bitfield.h> +#include <linux/hrtimer.h> #include "igc_hw.h" @@ -101,6 +102,8 @@ struct igc_ring { u32 start_time; u32 end_time; u32 max_sdu; + bool oper_gate_closed; /* Operating gate. True if the TX Queue is closed */ + bool admin_gate_closed; /* Future gate. True if the TX Queue will be closed */ /* CBS parameters */ bool cbs_enable; /* indicates if CBS is enabled */ @@ -160,6 +163,7 @@ struct igc_adapter { struct timer_list watchdog_timer; struct timer_list dma_err_timer; struct timer_list phy_info_timer; + struct hrtimer hrtimer; u32 wol; u32 en_mng_pt; @@ -184,10 +188,17 @@ struct igc_adapter { u32 max_frame_size; u32 min_frame_size; + int tc_setup_type; ktime_t base_time; ktime_t cycle_time; - bool qbv_enable; + bool taprio_offload_enable; u32 qbv_config_change_errors; + bool qbv_transition; + unsigned int qbv_count; + /* Access to oper_gate_closed, admin_gate_closed and qbv_transition + * are protected by the qbv_tx_lock. + */ + spinlock_t qbv_tx_lock; /* OS defined structs */ struct pci_dev *pdev; diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 44a507029946..2f780cc90883 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -546,7 +546,7 @@ #define IGC_PTM_CTRL_START_NOW BIT(29) /* Start PTM Now */ #define IGC_PTM_CTRL_EN BIT(30) /* Enable PTM */ #define IGC_PTM_CTRL_TRIG BIT(31) /* PTM Cycle trigger */ -#define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x2f) << 2) +#define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x3f) << 2) #define IGC_PTM_CTRL_PTM_TO(usec) (((usec) & 0xff) << 8) #define IGC_PTM_SHORT_CYC_DEFAULT 10 /* Default Short/interrupted cycle interval */ diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 0e2cb00622d1..93bce729be76 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1708,6 +1708,8 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev, /* twisted pair */ cmd->base.port = PORT_TP; cmd->base.phy_address = hw->phy.addr; + ethtool_link_ksettings_add_link_mode(cmd, supported, TP); + ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); /* advertising link modes */ if (hw->phy.autoneg_advertised & ADVERTISE_10_HALF) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 019ce91c45aa..6f557e843e49 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -316,6 +316,33 @@ static void igc_clean_all_tx_rings(struct igc_adapter *adapter) igc_clean_tx_ring(adapter->tx_ring[i]); } +static void igc_disable_tx_ring_hw(struct igc_ring *ring) +{ + struct igc_hw *hw = &ring->q_vector->adapter->hw; + u8 idx = ring->reg_idx; + u32 txdctl; + + txdctl = rd32(IGC_TXDCTL(idx)); + txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE; + txdctl |= IGC_TXDCTL_SWFLUSH; + wr32(IGC_TXDCTL(idx), txdctl); +} + +/** + * igc_disable_all_tx_rings_hw - Disable all transmit queue operation + * @adapter: board private structure + */ +static void igc_disable_all_tx_rings_hw(struct igc_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + igc_disable_tx_ring_hw(tx_ring); + } +} + /** * igc_setup_tx_resources - allocate Tx resources (Descriptors) * @tx_ring: tx descriptor ring (for a specific queue) to setup @@ -711,7 +738,6 @@ static void igc_configure_tx_ring(struct igc_adapter *adapter, /* disable the queue */ wr32(IGC_TXDCTL(reg_idx), 0); wrfl(); - mdelay(10); wr32(IGC_TDLEN(reg_idx), ring->count * sizeof(union igc_adv_tx_desc)); @@ -1017,7 +1043,7 @@ static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime, ktime_t base_time = adapter->base_time; ktime_t now = ktime_get_clocktai(); ktime_t baset_est, end_of_cycle; - u32 launchtime; + s32 launchtime; s64 n; n = div64_s64(ktime_sub_ns(now, base_time), cycle_time); @@ -1030,7 +1056,7 @@ static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime, *first_flag = true; ring->last_ff_cycle = baset_est; - if (ktime_compare(txtime, ring->last_tx_cycle) > 0) + if (ktime_compare(end_of_cycle, ring->last_tx_cycle) > 0) *insert_empty = true; } } @@ -1573,16 +1599,12 @@ done: first->bytecount = skb->len; first->gso_segs = 1; - if (tx_ring->max_sdu > 0) { - u32 max_sdu = 0; - - max_sdu = tx_ring->max_sdu + - (skb_vlan_tagged(first->skb) ? VLAN_HLEN : 0); + if (adapter->qbv_transition || tx_ring->oper_gate_closed) + goto out_drop; - if (first->bytecount > max_sdu) { - adapter->stats.txdrop++; - goto out_drop; - } + if (tx_ring->max_sdu > 0 && first->bytecount > tx_ring->max_sdu) { + adapter->stats.txdrop++; + goto out_drop; } if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) && @@ -2833,9 +2855,8 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring) struct netdev_queue *nq = txring_txq(ring); union igc_adv_tx_desc *tx_desc = NULL; int cpu = smp_processor_id(); - u16 ntu = ring->next_to_use; struct xdp_desc xdp_desc; - u16 budget; + u16 budget, ntu; if (!netif_carrier_ok(ring->netdev)) return; @@ -2845,6 +2866,7 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring) /* Avoid transmit queue timeout since we share it with the slow path */ txq_trans_cond_update(nq); + ntu = ring->next_to_use; budget = igc_desc_unused(ring); while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) { @@ -3012,8 +3034,8 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) time_after(jiffies, tx_buffer->time_stamp + (adapter->tx_timeout_factor * HZ)) && !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF) && - (rd32(IGC_TDH(tx_ring->reg_idx)) != - readl(tx_ring->tail))) { + (rd32(IGC_TDH(tx_ring->reg_idx)) != readl(tx_ring->tail)) && + !tx_ring->oper_gate_closed) { /* detected Tx unit hang */ netdev_err(tx_ring->netdev, "Detected Tx Unit Hang\n" @@ -4779,6 +4801,7 @@ static int igc_sw_init(struct igc_adapter *adapter) adapter->nfc_rule_count = 0; spin_lock_init(&adapter->stats64_lock); + spin_lock_init(&adapter->qbv_tx_lock); /* Assume MSI-X interrupts, will be checked during IRQ allocation */ adapter->flags |= IGC_FLAG_HAS_MSIX; @@ -5063,6 +5086,7 @@ void igc_down(struct igc_adapter *adapter) /* clear VLAN promisc flag so VFTA will be updated if necessary */ adapter->flags &= ~IGC_FLAG_VLAN_PROMISC; + igc_disable_all_tx_rings_hw(adapter); igc_clean_all_tx_rings(adapter); igc_clean_all_rx_rings(adapter); } @@ -6096,13 +6120,16 @@ static int igc_tsn_enable_launchtime(struct igc_adapter *adapter, return igc_tsn_offload_apply(adapter); } -static int igc_tsn_clear_schedule(struct igc_adapter *adapter) +static int igc_qbv_clear_schedule(struct igc_adapter *adapter) { + unsigned long flags; int i; adapter->base_time = 0; adapter->cycle_time = NSEC_PER_SEC; + adapter->taprio_offload_enable = false; adapter->qbv_config_change_errors = 0; + adapter->qbv_count = 0; for (i = 0; i < adapter->num_tx_queues; i++) { struct igc_ring *ring = adapter->tx_ring[i]; @@ -6112,6 +6139,26 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter) ring->max_sdu = 0; } + spin_lock_irqsave(&adapter->qbv_tx_lock, flags); + + adapter->qbv_transition = false; + + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *ring = adapter->tx_ring[i]; + + ring->oper_gate_closed = false; + ring->admin_gate_closed = false; + } + + spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); + + return 0; +} + +static int igc_tsn_clear_schedule(struct igc_adapter *adapter) +{ + igc_qbv_clear_schedule(adapter); + return 0; } @@ -6121,27 +6168,21 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, bool queue_configured[IGC_MAX_TX_QUEUES] = { }; struct igc_hw *hw = &adapter->hw; u32 start_time = 0, end_time = 0; + struct timespec64 now; + unsigned long flags; size_t n; int i; - switch (qopt->cmd) { - case TAPRIO_CMD_REPLACE: - adapter->qbv_enable = true; - break; - case TAPRIO_CMD_DESTROY: - adapter->qbv_enable = false; - break; - default: - return -EOPNOTSUPP; - } - - if (!adapter->qbv_enable) + if (qopt->cmd == TAPRIO_CMD_DESTROY) return igc_tsn_clear_schedule(adapter); + if (qopt->cmd != TAPRIO_CMD_REPLACE) + return -EOPNOTSUPP; + if (qopt->base_time < 0) return -ERANGE; - if (igc_is_device_id_i225(hw) && adapter->base_time) + if (igc_is_device_id_i225(hw) && adapter->taprio_offload_enable) return -EALREADY; if (!validate_schedule(adapter, qopt)) @@ -6149,6 +6190,9 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, adapter->cycle_time = qopt->cycle_time; adapter->base_time = qopt->base_time; + adapter->taprio_offload_enable = true; + + igc_ptp_read(adapter, &now); for (n = 0; n < qopt->num_entries; n++) { struct tc_taprio_sched_entry *e = &qopt->entries[n]; @@ -6184,30 +6228,49 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, ring->start_time = start_time; ring->end_time = end_time; - queue_configured[i] = true; + if (ring->start_time >= adapter->cycle_time) + queue_configured[i] = false; + else + queue_configured[i] = true; } start_time += e->interval; } + spin_lock_irqsave(&adapter->qbv_tx_lock, flags); + /* Check whether a queue gets configured. * If not, set the start and end time to be end time. */ for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *ring = adapter->tx_ring[i]; + + if (!is_base_time_past(qopt->base_time, &now)) { + ring->admin_gate_closed = false; + } else { + ring->oper_gate_closed = false; + ring->admin_gate_closed = false; + } + if (!queue_configured[i]) { - struct igc_ring *ring = adapter->tx_ring[i]; + if (!is_base_time_past(qopt->base_time, &now)) + ring->admin_gate_closed = true; + else + ring->oper_gate_closed = true; ring->start_time = end_time; ring->end_time = end_time; } } + spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); + for (i = 0; i < adapter->num_tx_queues; i++) { struct igc_ring *ring = adapter->tx_ring[i]; struct net_device *dev = adapter->netdev; if (qopt->max_sdu[i]) - ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len; + ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len - ETH_TLEN; else ring->max_sdu = 0; } @@ -6327,6 +6390,8 @@ static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type, { struct igc_adapter *adapter = netdev_priv(dev); + adapter->tc_setup_type = type; + switch (type) { case TC_QUERY_CAPS: return igc_tc_query_caps(adapter, type_data); @@ -6574,6 +6639,33 @@ static const struct xdp_metadata_ops igc_xdp_metadata_ops = { .xmo_rx_timestamp = igc_xdp_rx_timestamp, }; +static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer) +{ + struct igc_adapter *adapter = container_of(timer, struct igc_adapter, + hrtimer); + unsigned long flags; + unsigned int i; + + spin_lock_irqsave(&adapter->qbv_tx_lock, flags); + + adapter->qbv_transition = true; + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + if (tx_ring->admin_gate_closed) { + tx_ring->admin_gate_closed = false; + tx_ring->oper_gate_closed = true; + } else { + tx_ring->oper_gate_closed = false; + } + } + adapter->qbv_transition = false; + + spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags); + + return HRTIMER_NORESTART; +} + /** * igc_probe - Device Initialization Routine * @pdev: PCI device information struct @@ -6752,6 +6844,9 @@ static int igc_probe(struct pci_dev *pdev, INIT_WORK(&adapter->reset_task, igc_reset_task); INIT_WORK(&adapter->watchdog_task, igc_watchdog_task); + hrtimer_init(&adapter->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + adapter->hrtimer.function = &igc_qbv_scheduling_timer; + /* Initialize link properties that are user-changeable */ adapter->fc_autoneg = true; hw->mac.autoneg = true; @@ -6855,6 +6950,7 @@ static void igc_remove(struct pci_dev *pdev) cancel_work_sync(&adapter->reset_task); cancel_work_sync(&adapter->watchdog_task); + hrtimer_cancel(&adapter->hrtimer); /* Release control of h/w to f/w. If f/w is AMT enabled, this * would have already happened in close and is redundant. @@ -7252,18 +7348,6 @@ void igc_enable_rx_ring(struct igc_ring *ring) igc_alloc_rx_buffers(ring, igc_desc_unused(ring)); } -static void igc_disable_tx_ring_hw(struct igc_ring *ring) -{ - struct igc_hw *hw = &ring->q_vector->adapter->hw; - u8 idx = ring->reg_idx; - u32 txdctl; - - txdctl = rd32(IGC_TXDCTL(idx)); - txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE; - txdctl |= IGC_TXDCTL_SWFLUSH; - wr32(IGC_TXDCTL(idx), txdctl); -} - void igc_disable_tx_ring(struct igc_ring *ring) { igc_disable_tx_ring_hw(ring); diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 32ef112f8291..f0b979a70655 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -356,16 +356,35 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, tsim &= ~IGC_TSICR_TT0; } if (on) { + struct timespec64 safe_start; int i = rq->perout.index; igc_pin_perout(igc, i, pin, use_freq); - igc->perout[i].start.tv_sec = rq->perout.start.sec; + igc_ptp_read(igc, &safe_start); + + /* PPS output start time is triggered by Target time(TT) + * register. Programming any past time value into TT + * register will cause PPS to never start. Need to make + * sure we program the TT register a time ahead in + * future. There isn't a stringent need to fire PPS out + * right away. Adding +2 seconds should take care of + * corner cases. Let's say if the SYSTIML is close to + * wrap up and the timer keeps ticking as we program the + * register, adding +2seconds is safe bet. + */ + safe_start.tv_sec += 2; + + if (rq->perout.start.sec < safe_start.tv_sec) + igc->perout[i].start.tv_sec = safe_start.tv_sec; + else + igc->perout[i].start.tv_sec = rq->perout.start.sec; igc->perout[i].start.tv_nsec = rq->perout.start.nsec; igc->perout[i].period.tv_sec = ts.tv_sec; igc->perout[i].period.tv_nsec = ts.tv_nsec; - wr32(trgttimh, rq->perout.start.sec); + wr32(trgttimh, (u32)igc->perout[i].start.tv_sec); /* For now, always select timer 0 as source. */ - wr32(trgttiml, rq->perout.start.nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); + wr32(trgttiml, (u32)(igc->perout[i].start.tv_nsec | + IGC_TT_IO_TIMER_SEL_SYSTIM0)); if (use_freq) wr32(freqout, ns); tsauxc |= tsauxc_mask; diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 94a2b0dfb54d..a9c08321aca9 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -37,7 +37,7 @@ static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter) { unsigned int new_flags = adapter->flags & ~IGC_FLAG_TSN_ANY_ENABLED; - if (adapter->qbv_enable) + if (adapter->taprio_offload_enable) new_flags |= IGC_FLAG_TSN_QBV_ENABLED; if (is_any_launchtime(adapter)) @@ -114,7 +114,6 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) static int igc_tsn_enable_offload(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; - bool tsn_mode_reconfig = false; u32 tqavctrl, baset_l, baset_h; u32 sec, nsec, cycle; ktime_t base_time, systim; @@ -133,8 +132,28 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) wr32(IGC_STQT(i), ring->start_time); wr32(IGC_ENDQT(i), ring->end_time); - txqctl |= IGC_TXQCTL_STRICT_CYCLE | - IGC_TXQCTL_STRICT_END; + if (adapter->taprio_offload_enable) { + /* If taprio_offload_enable is set we are in "taprio" + * mode and we need to be strict about the + * cycles: only transmit a packet if it can be + * completed during that cycle. + * + * If taprio_offload_enable is NOT true when + * enabling TSN offload, the cycle should have + * no external effects, but is only used internally + * to adapt the base time register after a second + * has passed. + * + * Enabling strict mode in this case would + * unnecessarily prevent the transmission of + * certain packets (i.e. at the boundary of a + * second) and thus interfere with the launchtime + * feature that promises transmission at a + * certain point in time. + */ + txqctl |= IGC_TXQCTL_STRICT_CYCLE | + IGC_TXQCTL_STRICT_END; + } if (ring->launchtime_enable) txqctl |= IGC_TXQCTL_QUEUE_MODE_LAUNCHT; @@ -228,11 +247,10 @@ skip_cbs: tqavctrl = rd32(IGC_TQAVCTRL) & ~IGC_TQAVCTRL_FUTSCDDIS; - if (tqavctrl & IGC_TQAVCTRL_TRANSMIT_MODE_TSN) - tsn_mode_reconfig = true; - tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV; + adapter->qbv_count++; + cycle = adapter->cycle_time; base_time = adapter->base_time; @@ -249,17 +267,29 @@ skip_cbs: * Gate Control List (GCL) is running. */ if ((rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) && - tsn_mode_reconfig) + (adapter->tc_setup_type == TC_SETUP_QDISC_TAPRIO) && + (adapter->qbv_count > 1)) adapter->qbv_config_change_errors++; } else { - /* According to datasheet section 7.5.2.9.3.3, FutScdDis bit - * has to be configured before the cycle time and base time. - * Tx won't hang if there is a GCL is already running, - * so in this case we don't need to set FutScdDis. - */ - if (igc_is_device_id_i226(hw) && - !(rd32(IGC_BASET_H) || rd32(IGC_BASET_L))) - tqavctrl |= IGC_TQAVCTRL_FUTSCDDIS; + if (igc_is_device_id_i226(hw)) { + ktime_t adjust_time, expires_time; + + /* According to datasheet section 7.5.2.9.3.3, FutScdDis bit + * has to be configured before the cycle time and base time. + * Tx won't hang if a GCL is already running, + * so in this case we don't need to set FutScdDis. + */ + if (!(rd32(IGC_BASET_H) || rd32(IGC_BASET_L))) + tqavctrl |= IGC_TQAVCTRL_FUTSCDDIS; + + nsec = rd32(IGC_SYSTIML); + sec = rd32(IGC_SYSTIMH); + systim = ktime_set(sec, nsec); + + adjust_time = adapter->base_time; + expires_time = ktime_sub_ns(adjust_time, systim); + hrtimer_start(&adapter->hrtimer, expires_time, HRTIMER_MODE_REL); + } } wr32(IGC_TQAVCTRL, tqavctrl); @@ -305,7 +335,11 @@ int igc_tsn_offload_apply(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; - if (netif_running(adapter->netdev) && igc_is_device_id_i225(hw)) { + /* Per I225/6 HW Design Section 7.5.2.1, transmit mode + * cannot be changed dynamically. Require reset the adapter. + */ + if (netif_running(adapter->netdev) && + (igc_is_device_id_i225(hw) || !adapter->qbv_count)) { schedule_work(&adapter->reset_task); return 0; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 1726297f2e0d..8eb9839a3ca6 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8479,7 +8479,7 @@ static void ixgbe_atr(struct ixgbe_ring *ring, struct ixgbe_adapter *adapter = q_vector->adapter; if (unlikely(skb_tail_pointer(skb) < hdr.network + - VXLAN_HEADROOM)) + vxlan_headroom(0))) return; /* verify the port is recognized as VXLAN */ |