summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/net/stmmac.txt14
-rw-r--r--Documentation/networking/phy.txt9
-rw-r--r--Documentation/networking/stmmac.txt24
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c4
-rw-r--r--arch/s390/net/bpf_jit_comp.c2
-rw-r--r--arch/x86/net/bpf_jit_comp.c2
-rw-r--r--drivers/net/ethernet/alacritech/slicoss.c13
-rw-r--r--drivers/net/ethernet/apm/xgene/xgene_enet_main.c1
-rw-r--r--drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c21
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_vf_main.c2070
-rw-r--r--drivers/net/ethernet/cavium/liquidio/liquidio_common.h1
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_device.c5
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_droq.c10
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_network.h1
-rw-r--r--drivers/net/ethernet/cavium/liquidio/request_manager.c6
-rw-r--r--drivers/net/ethernet/cavium/liquidio/response_manager.c3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c41
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c9
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c70
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c12
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c4
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c5
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac1000.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c26
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c7
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c25
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c14
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c33
-rw-r--r--drivers/net/ethernet/xilinx/ll_temac_main.c11
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet_main.c13
-rw-r--r--drivers/net/macsec.c25
-rw-r--r--drivers/net/phy/phy_device.c47
-rw-r--r--drivers/net/usb/Kconfig5
-rw-r--r--drivers/net/usb/lan78xx.c109
-rw-r--r--drivers/net/usb/lan78xx.h14
-rw-r--r--drivers/vhost/vhost.c2
-rw-r--r--drivers/vhost/vsock.c3
-rw-r--r--include/linux/bpf_verifier.h14
-rw-r--r--include/linux/filter.h6
-rw-r--r--include/linux/netdevice.h1
-rw-r--r--include/linux/phy.h4
-rw-r--r--include/linux/skbuff.h2
-rw-r--r--include/linux/stmmac.h3
-rw-r--r--include/net/flow_dissector.h17
-rw-r--r--include/net/sock.h28
-rw-r--r--include/uapi/linux/bpf.h11
-rw-r--r--include/uapi/linux/pkt_cls.h17
-rw-r--r--kernel/bpf/core.c2
-rw-r--r--kernel/bpf/syscall.c2
-rw-r--r--kernel/bpf/verifier.c4
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/filter.c28
-rw-r--r--net/core/flow_dissector.c31
-rw-r--r--net/core/skbuff.c28
-rw-r--r--net/core/sysctl_net_core.c5
-rw-r--r--net/ipv4/icmp.c4
-rw-r--r--net/ipv4/udp.c12
-rw-r--r--net/sched/cls_flower.c129
-rw-r--r--samples/bpf/Makefile4
-rw-r--r--samples/bpf/bpf_helpers.h2
-rw-r--r--samples/bpf/bpf_load.c94
-rw-r--r--samples/bpf/bpf_load.h1
-rw-r--r--samples/bpf/xdp1_user.c93
-rw-r--r--samples/bpf/xdp_tx_iptunnel_common.h37
-rw-r--r--samples/bpf/xdp_tx_iptunnel_kern.c236
-rw-r--r--samples/bpf/xdp_tx_iptunnel_user.c256
75 files changed, 3471 insertions, 331 deletions
diff --git a/Documentation/devicetree/bindings/net/stmmac.txt b/Documentation/devicetree/bindings/net/stmmac.txt
index 41b49e6075f5..128da752fec9 100644
--- a/Documentation/devicetree/bindings/net/stmmac.txt
+++ b/Documentation/devicetree/bindings/net/stmmac.txt
@@ -1,7 +1,7 @@
* STMicroelectronics 10/100/1000 Ethernet driver (GMAC)
Required properties:
-- compatible: Should be "snps,dwmac-<ip_version>" "snps,dwmac"
+- compatible: Should be "snps,dwmac-<ip_version>", "snps,dwmac"
For backwards compatibility: "st,spear600-gmac" is also supported.
- reg: Address and length of the register set for the device
- interrupt-parent: Should be the phandle for the interrupt controller
@@ -34,7 +34,13 @@ Optional properties:
platforms.
- tx-fifo-depth: See ethernet.txt file in the same directory
- rx-fifo-depth: See ethernet.txt file in the same directory
-- snps,pbl Programmable Burst Length
+- snps,pbl Programmable Burst Length (tx and rx)
+- snps,txpbl Tx Programmable Burst Length. Only for GMAC and newer.
+ If set, DMA tx will use this value rather than snps,pbl.
+- snps,rxpbl Rx Programmable Burst Length. Only for GMAC and newer.
+ If set, DMA rx will use this value rather than snps,pbl.
+- snps,no-pbl-x8 Don't multiply the pbl/txpbl/rxpbl values by 8.
+ For core rev < 3.50, don't multiply the values by 4.
- snps,aal Address-Aligned Beats
- snps,fixed-burst Program the DMA to use the fixed burst mode
- snps,mixed-burst Program the DMA to use the mixed burst mode
@@ -50,6 +56,8 @@ Optional properties:
- snps,ps-speed: port selection speed that can be passed to the core when
PCS is supported. For example, this is used in case of SGMII
and MAC2MAC connection.
+- snps,tso: this enables the TSO feature otherwise it will be managed by
+ MAC HW capability register. Only for GMAC4 and newer.
- AXI BUS Mode parameters: below the list of all the parameters to program the
AXI register inside the DMA module:
- snps,lpi_en: enable Low Power Interface
@@ -62,8 +70,6 @@ Optional properties:
- snps,fb: fixed-burst
- snps,mb: mixed-burst
- snps,rb: rebuild INCRx Burst
- - snps,tso: this enables the TSO feature otherwise it will be managed by
- MAC HW capability register.
- mdio: with compatible = "snps,dwmac-mdio", create and register mdio bus.
Examples:
diff --git a/Documentation/networking/phy.txt b/Documentation/networking/phy.txt
index e017d933d530..16f90d817224 100644
--- a/Documentation/networking/phy.txt
+++ b/Documentation/networking/phy.txt
@@ -407,6 +407,15 @@ Board Fixups
The stubs set one of the two matching criteria, and set the other one to
match anything.
+ When phy_register_fixup() or *_for_uid()/*_for_id() is called at module,
+ unregister fixup and free allocate memory are required.
+
+ Call one of following function before unloading module.
+
+ int phy_unregister_fixup(const char *phy_id, u32 phy_uid, u32 phy_uid_mask);
+ int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask);
+ int phy_register_fixup_for_id(const char *phy_id);
+
Standards
IEEE Standard 802.3: CSMA/CD Access Method and Physical Layer Specifications, Section Two:
diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt
index 014f4f756cb7..2bb07078f535 100644
--- a/Documentation/networking/stmmac.txt
+++ b/Documentation/networking/stmmac.txt
@@ -152,8 +152,10 @@ Where:
o dma_cfg: internal DMA parameters
o pbl: the Programmable Burst Length is maximum number of beats to
be transferred in one DMA transaction.
- GMAC also enables the 4xPBL by default.
- o fixed_burst/mixed_burst/burst_len
+ GMAC also enables the 4xPBL by default. (8xPBL for GMAC 3.50 and newer)
+ o txpbl/rxpbl: GMAC and newer supports independent DMA pbl for tx/rx.
+ o pblx8: Enable 8xPBL (4xPBL for core rev < 3.50). Enabled by default.
+ o fixed_burst/mixed_burst/aal
o clk_csr: fixed CSR Clock range selection.
o has_gmac: uses the GMAC core.
o enh_desc: if sets the MAC will use the enhanced descriptor structure.
@@ -205,16 +207,24 @@ tuned according to the HW capabilities.
struct stmmac_dma_cfg {
int pbl;
+ int txpbl;
+ int rxpbl;
+ bool pblx8;
int fixed_burst;
- int burst_len_supported;
+ int mixed_burst;
+ bool aal;
};
Where:
- o pbl: Programmable Burst Length
+ o pbl: Programmable Burst Length (tx and rx)
+ o txpbl: Transmit Programmable Burst Length. Only for GMAC and newer.
+ If set, DMA tx will use this value rather than pbl.
+ o rxpbl: Receive Programmable Burst Length. Only for GMAC and newer.
+ If set, DMA rx will use this value rather than pbl.
+ o pblx8: Enable 8xPBL (4xPBL for core rev < 3.50). Enabled by default.
o fixed_burst: program the DMA to use the fixed burst mode
- o burst_len: this is the value we put in the register
- supported values are provided as macros in
- linux/stmmac.h header file.
+ o mixed_burst: program the DMA to use the mixed burst mode
+ o aal: Address-Aligned Beats
---
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 0fe98a567125..73a5cf18fd84 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -766,7 +766,7 @@ emit_clear:
func = (u8 *) __bpf_call_base + imm;
/* Save skb pointer if we need to re-cache skb data */
- if (bpf_helper_changes_skb_data(func))
+ if (bpf_helper_changes_pkt_data(func))
PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
bpf_jit_emit_func_call(image, ctx, (u64)func);
@@ -775,7 +775,7 @@ emit_clear:
PPC_MR(b2p[BPF_REG_0], 3);
/* refresh skb cache */
- if (bpf_helper_changes_skb_data(func)) {
+ if (bpf_helper_changes_pkt_data(func)) {
/* reload skb pointer to r3 */
PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
bpf_jit_emit_skb_loads(image, ctx);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index bee281f3163d..167b31b186c1 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -981,7 +981,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
EMIT2(0x0d00, REG_14, REG_W1);
/* lgr %b0,%r2: load return value into %b0 */
EMIT4(0xb9040000, BPF_REG_0, REG_2);
- if (bpf_helper_changes_skb_data((void *)func)) {
+ if (bpf_helper_changes_pkt_data((void *)func)) {
jit->seen |= SEEN_SKB_CHANGE;
/* lg %b1,ST_OFF_SKBP(%r15) */
EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index fe04a04dab8e..e76d1af60f7a 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -853,7 +853,7 @@ xadd: if (is_imm8(insn->off))
func = (u8 *) __bpf_call_base + imm32;
jmp_offset = func - (image + addrs[i]);
if (seen_ld_abs) {
- reload_skb_data = bpf_helper_changes_skb_data(func);
+ reload_skb_data = bpf_helper_changes_pkt_data(func);
if (reload_skb_data) {
EMIT1(0x57); /* push %rdi */
jmp_offset += 22; /* pop, mov, sub, mov */
diff --git a/drivers/net/ethernet/alacritech/slicoss.c b/drivers/net/ethernet/alacritech/slicoss.c
index e77ecd5b307c..b9fbd0107008 100644
--- a/drivers/net/ethernet/alacritech/slicoss.c
+++ b/drivers/net/ethernet/alacritech/slicoss.c
@@ -1863,18 +1863,7 @@ static struct pci_driver slic_driver = {
.remove = slic_remove,
};
-static int __init slic_init_module(void)
-{
- return pci_register_driver(&slic_driver);
-}
-
-static void __exit slic_cleanup_module(void)
-{
- pci_unregister_driver(&slic_driver);
-}
-
-module_init(slic_init_module);
-module_exit(slic_cleanup_module);
+module_pci_driver(slic_driver);
MODULE_DESCRIPTION("Alacritech non-accelerated SLIC driver");
MODULE_AUTHOR("Lino Sanfilippo <LinoSanfilippo@gmx.de>");
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index 6c7eea8b36af..884a334e82d0 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -635,6 +635,7 @@ static void xgene_enet_free_pagepool(struct xgene_enet_desc_ring *buf_pool,
return;
dev = ndev_to_dev(buf_pool->ndev);
+ slots = buf_pool->slots - 1;
head = buf_pool->head;
for (i = 0; i < 4; i++) {
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c
index 108e4878e608..b6117b6a1de2 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c
@@ -529,6 +529,26 @@ static u64 cn23xx_vf_msix_interrupt_handler(void *dev)
return ret;
}
+static u32 cn23xx_update_read_index(struct octeon_instr_queue *iq)
+{
+ u32 pkt_in_done = readl(iq->inst_cnt_reg);
+ u32 last_done;
+ u32 new_idx;
+
+ last_done = pkt_in_done - iq->pkt_in_done;
+ iq->pkt_in_done = pkt_in_done;
+
+ /* Modulo of the new index with the IQ size will give us
+ * the new index. The iq->reset_instr_cnt is always zero for
+ * cn23xx, so no extra adjustments are needed.
+ */
+ new_idx = (iq->octeon_read_index +
+ (u32)(last_done & CN23XX_PKT_IN_DONE_CNT_MASK)) %
+ iq->max_count;
+
+ return new_idx;
+}
+
static void cn23xx_enable_vf_interrupt(struct octeon_device *oct, u8 intr_flag)
{
struct octeon_cn23xx_vf *cn23xx = (struct octeon_cn23xx_vf *)oct->chip;
@@ -660,6 +680,7 @@ int cn23xx_setup_octeon_vf_device(struct octeon_device *oct)
oct->fn_list.msix_interrupt_handler = cn23xx_vf_msix_interrupt_handler;
oct->fn_list.setup_device_regs = cn23xx_setup_vf_device_regs;
+ oct->fn_list.update_iq_read_idx = cn23xx_update_read_index;
oct->fn_list.enable_interrupt = cn23xx_enable_vf_interrupt;
oct->fn_list.disable_interrupt = cn23xx_disable_vf_interrupt;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index e6321f35399c..9989ac393e94 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -22,7 +22,9 @@
#include "octeon_iq.h"
#include "response_manager.h"
#include "octeon_device.h"
+#include "octeon_nic.h"
#include "octeon_main.h"
+#include "octeon_network.h"
#include "cn23xx_vf_device.h"
MODULE_AUTHOR("Cavium Networks, <support@cavium.com>");
@@ -30,6 +32,76 @@ MODULE_DESCRIPTION("Cavium LiquidIO Intelligent Server Adapter Virtual Function
MODULE_LICENSE("GPL");
MODULE_VERSION(LIQUIDIO_VERSION);
+static int debug = -1;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "NETIF_MSG debug bits");
+
+#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
+
+/* Bit mask values for lio->ifstate */
+#define LIO_IFSTATE_DROQ_OPS 0x01
+#define LIO_IFSTATE_REGISTERED 0x02
+#define LIO_IFSTATE_RUNNING 0x04
+
+struct liquidio_if_cfg_context {
+ int octeon_id;
+
+ wait_queue_head_t wc;
+
+ int cond;
+};
+
+struct liquidio_if_cfg_resp {
+ u64 rh;
+ struct liquidio_if_cfg_info cfg_info;
+ u64 status;
+};
+
+struct liquidio_rx_ctl_context {
+ int octeon_id;
+
+ wait_queue_head_t wc;
+
+ int cond;
+};
+
+union tx_info {
+ u64 u64;
+ struct {
+#ifdef __BIG_ENDIAN_BITFIELD
+ u16 gso_size;
+ u16 gso_segs;
+ u32 reserved;
+#else
+ u32 reserved;
+ u16 gso_segs;
+ u16 gso_size;
+#endif
+ } s;
+};
+
+#define OCTNIC_MAX_SG (MAX_SKB_FRAGS)
+
+#define OCTNIC_GSO_MAX_HEADER_SIZE 128
+#define OCTNIC_GSO_MAX_SIZE \
+ (CN23XX_DEFAULT_INPUT_JABBER - OCTNIC_GSO_MAX_HEADER_SIZE)
+
+struct octnic_gather {
+ /* List manipulation. Next and prev pointers. */
+ struct list_head list;
+
+ /* Size of the gather component at sg in bytes. */
+ int sg_size;
+
+ /* Number of bytes that sg was adjusted to make it 8B-aligned. */
+ int adjust;
+
+ /* Gather component that can accommodate max sized fragment list
+ * received from the IP layer.
+ */
+ struct octeon_sg_entry *sg;
+};
+
struct octeon_device_priv {
/* Tasklet structures for this device. */
struct tasklet_struct droq_tasklet;
@@ -40,6 +112,7 @@ static int
liquidio_vf_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
static void liquidio_vf_remove(struct pci_dev *pdev);
static int octeon_device_init(struct octeon_device *oct);
+static int liquidio_stop(struct net_device *netdev);
static int lio_wait_for_oq_pkts(struct octeon_device *oct)
{
@@ -113,6 +186,375 @@ static struct pci_driver liquidio_vf_pci_driver = {
.remove = liquidio_vf_remove,
};
+/**
+ * \brief check interface state
+ * @param lio per-network private data
+ * @param state_flag flag state to check
+ */
+static int ifstate_check(struct lio *lio, int state_flag)
+{
+ return atomic_read(&lio->ifstate) & state_flag;
+}
+
+/**
+ * \brief set interface state
+ * @param lio per-network private data
+ * @param state_flag flag state to set
+ */
+static void ifstate_set(struct lio *lio, int state_flag)
+{
+ atomic_set(&lio->ifstate, (atomic_read(&lio->ifstate) | state_flag));
+}
+
+/**
+ * \brief clear interface state
+ * @param lio per-network private data
+ * @param state_flag flag state to clear
+ */
+static void ifstate_reset(struct lio *lio, int state_flag)
+{
+ atomic_set(&lio->ifstate, (atomic_read(&lio->ifstate) & ~(state_flag)));
+}
+
+/**
+ * \brief Stop Tx queues
+ * @param netdev network device
+ */
+static void txqs_stop(struct net_device *netdev)
+{
+ if (netif_is_multiqueue(netdev)) {
+ int i;
+
+ for (i = 0; i < netdev->num_tx_queues; i++)
+ netif_stop_subqueue(netdev, i);
+ } else {
+ netif_stop_queue(netdev);
+ }
+}
+
+/**
+ * \brief Start Tx queues
+ * @param netdev network device
+ */
+static void txqs_start(struct net_device *netdev)
+{
+ if (netif_is_multiqueue(netdev)) {
+ int i;
+
+ for (i = 0; i < netdev->num_tx_queues; i++)
+ netif_start_subqueue(netdev, i);
+ } else {
+ netif_start_queue(netdev);
+ }
+}
+
+/**
+ * \brief Wake Tx queues
+ * @param netdev network device
+ */
+static void txqs_wake(struct net_device *netdev)
+{
+ struct lio *lio = GET_LIO(netdev);
+
+ if (netif_is_multiqueue(netdev)) {
+ int i;
+
+ for (i = 0; i < netdev->num_tx_queues; i++) {
+ int qno = lio->linfo.txpciq[i % (lio->linfo.num_txpciq)]
+ .s.q_no;
+ if (__netif_subqueue_stopped(netdev, i)) {
+ INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, qno,
+ tx_restart, 1);
+ netif_wake_subqueue(netdev, i);
+ }
+ }
+ } else {
+ INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, lio->txq,
+ tx_restart, 1);
+ netif_wake_queue(netdev);
+ }
+}
+
+/**
+ * \brief Start Tx queue
+ * @param netdev network device
+ */
+static void start_txq(struct net_device *netdev)
+{
+ struct lio *lio = GET_LIO(netdev);
+
+ if (lio->linfo.link.s.link_up) {
+ txqs_start(netdev);
+ return;
+ }
+}
+
+/**
+ * \brief Wake a queue
+ * @param netdev network device
+ * @param q which queue to wake
+ */
+static void wake_q(struct net_device *netdev, int q)
+{
+ if (netif_is_multiqueue(netdev))
+ netif_wake_subqueue(netdev, q);
+ else
+ netif_wake_queue(netdev);
+}
+
+/**
+ * \brief Stop a queue
+ * @param netdev network device
+ * @param q which queue to stop
+ */
+static void stop_q(struct net_device *netdev, int q)
+{
+ if (netif_is_multiqueue(netdev))
+ netif_stop_subqueue(netdev, q);
+ else
+ netif_stop_queue(netdev);
+}
+
+/**
+ * Remove the node at the head of the list. The list would be empty at
+ * the end of this call if there are no more nodes in the list.
+ */
+static struct list_head *list_delete_head(struct list_head *root)
+{
+ struct list_head *node;
+
+ if ((root->prev == root) && (root->next == root))
+ node = NULL;
+ else
+ node = root->next;
+
+ if (node)
+ list_del(node);
+
+ return node;
+}
+
+/**
+ * \brief Delete gather lists
+ * @param lio per-network private data
+ */
+static void delete_glists(struct lio *lio)
+{
+ struct octnic_gather *g;
+ int i;
+
+ if (!lio->glist)
+ return;
+
+ for (i = 0; i < lio->linfo.num_txpciq; i++) {
+ do {
+ g = (struct octnic_gather *)
+ list_delete_head(&lio->glist[i]);
+ if (g) {
+ if (g->sg)
+ kfree((void *)((unsigned long)g->sg -
+ g->adjust));
+ kfree(g);
+ }
+ } while (g);
+ }
+
+ kfree(lio->glist);
+ kfree(lio->glist_lock);
+}
+
+/**
+ * \brief Setup gather lists
+ * @param lio per-network private data
+ */
+static int setup_glists(struct lio *lio, int num_iqs)
+{
+ struct octnic_gather *g;
+ int i, j;
+
+ lio->glist_lock =
+ kzalloc(sizeof(*lio->glist_lock) * num_iqs, GFP_KERNEL);
+ if (!lio->glist_lock)
+ return 1;
+
+ lio->glist =
+ kzalloc(sizeof(*lio->glist) * num_iqs, GFP_KERNEL);
+ if (!lio->glist) {
+ kfree(lio->glist_lock);
+ return 1;
+ }
+
+ for (i = 0; i < num_iqs; i++) {
+ spin_lock_init(&lio->glist_lock[i]);
+
+ INIT_LIST_HEAD(&lio->glist[i]);
+
+ for (j = 0; j < lio->tx_qsize; j++) {
+ g = kzalloc(sizeof(*g), GFP_KERNEL);
+ if (!g)
+ break;
+
+ g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) *
+ OCT_SG_ENTRY_SIZE);
+
+ g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
+ if (!g->sg) {
+ kfree(g);
+ break;
+ }
+
+ /* The gather component should be aligned on 64-bit
+ * boundary
+ */
+ if (((unsigned long)g->sg) & 7) {
+ g->adjust = 8 - (((unsigned long)g->sg) & 7);
+ g->sg = (struct octeon_sg_entry *)
+ ((unsigned long)g->sg + g->adjust);
+ }
+ list_add_tail(&g->list, &lio->glist[i]);
+ }
+
+ if (j != lio->tx_qsize) {
+ delete_glists(lio);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * \brief Print link information
+ * @param netdev network device
+ */
+static void print_link_info(struct net_device *netdev)
+{
+ struct lio *lio = GET_LIO(netdev);
+
+ if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED) {
+ struct oct_link_info *linfo = &lio->linfo;
+
+ if (linfo->link.s.link_up) {
+ netif_info(lio, link, lio->netdev, "%d Mbps %s Duplex UP\n",
+ linfo->link.s.speed,
+ (linfo->link.s.duplex) ? "Full" : "Half");
+ } else {
+ netif_info(lio, link, lio->netdev, "Link Down\n");
+ }
+ }
+}
+
+/**
+ * \brief Routine to notify MTU change
+ * @param work work_struct data structure
+ */
+static void octnet_link_status_change(struct work_struct *work)
+{
+ struct cavium_wk *wk = (struct cavium_wk *)work;
+ struct lio *lio = (struct lio *)wk->ctxptr;
+
+ rtnl_lock();
+ call_netdevice_notifiers(NETDEV_CHANGEMTU, lio->netdev);
+ rtnl_unlock();
+}
+
+/**
+ * \brief Sets up the mtu status change work
+ * @param netdev network device
+ */
+static int setup_link_status_change_wq(struct net_device *netdev)
+{
+ struct lio *lio = GET_LIO(netdev);
+ struct octeon_device *oct = lio->oct_dev;
+
+ lio->link_status_wq.wq = alloc_workqueue("link-status",
+ WQ_MEM_RECLAIM, 0);
+ if (!lio->link_status_wq.wq) {
+ dev_err(&oct->pci_dev->dev, "unable to create cavium link status wq\n");
+ return -1;
+ }
+ INIT_DELAYED_WORK(&lio->link_status_wq.wk.work,
+ octnet_link_status_change);
+ lio->link_status_wq.wk.ctxptr = lio;
+
+ return 0;
+}
+
+static void cleanup_link_status_change_wq(struct net_device *netdev)
+{
+ struct lio *lio = GET_LIO(netdev);
+
+ if (lio->link_status_wq.wq) {
+ cancel_delayed_work_sync(&lio->link_status_wq.wk.work);
+ destroy_workqueue(lio->link_status_wq.wq);
+ }
+}
+
+/**
+ * \brief Update link status
+ * @param netdev network device
+ * @param ls link status structure
+ *
+ * Called on receipt of a link status response from the core application to
+ * update each interface's link status.
+ */
+static void update_link_status(struct net_device *netdev,
+ union oct_link_status *ls)
+{
+ struct lio *lio = GET_LIO(netdev);
+ struct octeon_device *oct = lio->oct_dev;
+
+ if ((lio->intf_open) && (lio->linfo.link.u64 != ls->u64)) {
+ lio->linfo.link.u64 = ls->u64;
+
+ print_link_info(netdev);
+ lio->link_changes++;
+
+ if (lio->linfo.link.s.link_up) {
+ netif_carrier_on(netdev);
+ txqs_wake(netdev);
+ } else {
+ netif_carrier_off(netdev);
+ txqs_stop(netdev);
+ }
+
+ if (lio->linfo.link.s.mtu < netdev->mtu) {
+ dev_warn(&oct->pci_dev->dev,
+ "PF has changed the MTU for gmx port. Reducing the mtu from %d to %d\n",
+ netdev->mtu, lio->linfo.link.s.mtu);
+ lio->mtu = lio->linfo.link.s.mtu;
+ netdev->mtu = lio->linfo.link.s.mtu;
+ queue_delayed_work(lio->link_status_wq.wq,
+ &lio->link_status_wq.wk.work, 0);
+ }
+ }
+}
+
+static void update_txq_status(struct octeon_device *oct, int iq_num)
+{
+ struct octeon_instr_queue *iq = oct->instr_queue[iq_num];
+ struct net_device *netdev;
+ struct lio *lio;
+
+ netdev = oct->props[iq->ifidx].netdev;
+ lio = GET_LIO(netdev);
+ if (netif_is_multiqueue(netdev)) {
+ if (__netif_subqueue_stopped(netdev, iq->q_index) &&
+ lio->linfo.link.s.link_up &&
+ (!octnet_iq_is_full(oct, iq_num))) {
+ netif_wake_subqueue(netdev, iq->q_index);
+ INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq_num,
+ tx_restart, 1);
+ } else {
+ if (!octnet_iq_is_full(oct, lio->txq)) {
+ INCR_INSTRQUEUE_PKT_COUNT(
+ lio->oct_dev, lio->txq, tx_restart, 1);
+ wake_q(netdev, lio->txq);
+ }
+ }
+ }
+}
+
static
int liquidio_schedule_msix_droq_pkt_handler(struct octeon_droq *droq, u64 ret)
{
@@ -316,6 +758,7 @@ static void octeon_destroy_resources(struct octeon_device *oct)
/* No more instructions will be forwarded. */
atomic_set(&oct->status, OCT_DEV_IN_RESET);
+ oct->app_mode = CVM_DRV_INVALID_APP;
dev_dbg(&oct->pci_dev->dev, "Device state is now %s\n",
lio_get_state_string(&oct->status));
@@ -420,6 +863,174 @@ static void octeon_destroy_resources(struct octeon_device *oct)
}
/**
+ * \brief Callback for rx ctrl
+ * @param status status of request
+ * @param buf pointer to resp structure
+ */
+static void rx_ctl_callback(struct octeon_device *oct,
+ u32 status, void *buf)
+{
+ struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
+ struct liquidio_rx_ctl_context *ctx;
+
+ ctx = (struct liquidio_rx_ctl_context *)sc->ctxptr;
+
+ oct = lio_get_device(ctx->octeon_id);
+ if (status)
+ dev_err(&oct->pci_dev->dev, "rx ctl instruction failed. Status: %llx\n",
+ CVM_CAST64(status));
+ WRITE_ONCE(ctx->cond, 1);
+
+ /* This barrier is required to be sure that the response has been
+ * written fully before waking up the handler
+ */
+ wmb();
+
+ wake_up_interruptible(&ctx->wc);
+}
+
+/**
+ * \brief Send Rx control command
+ * @param lio per-network private data
+ * @param start_stop whether to start or stop
+ */
+static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
+{
+ struct octeon_device *oct = (struct octeon_device *)lio->oct_dev;
+ int ctx_size = sizeof(struct liquidio_rx_ctl_context);
+ struct liquidio_rx_ctl_context *ctx;
+ struct octeon_soft_command *sc;
+ union octnet_cmd *ncmd;
+ int retval;
+
+ if (oct->props[lio->ifidx].rx_on == start_stop)
+ return;
+
+ sc = (struct octeon_soft_command *)
+ octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE,
+ 16, ctx_size);
+
+ ncmd = (union octnet_cmd *)sc->virtdptr;
+ ctx = (struct liquidio_rx_ctl_context *)sc->ctxptr;
+
+ WRITE_ONCE(ctx->cond, 0);
+ ctx->octeon_id = lio_get_device_id(oct);
+ init_waitqueue_head(&ctx->wc);
+
+ ncmd->u64 = 0;
+ ncmd->s.cmd = OCTNET_CMD_RX_CTL;
+ ncmd->s.param1 = start_stop;
+
+ octeon_swap_8B_data((u64 *)ncmd, (OCTNET_CMD_SIZE >> 3));
+
+ sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+ octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+ OPCODE_NIC_CMD, 0, 0, 0);
+
+ sc->callback = rx_ctl_callback;
+ sc->callback_arg = sc;
+ sc->wait_time = 5000;
+
+ retval = octeon_send_soft_command(oct, sc);
+ if (retval == IQ_SEND_FAILED) {
+ netif_info(lio, rx_err, lio->netdev, "Failed to send RX Control message\n");
+ } else {
+ /* Sleep on a wait queue till the cond flag indicates that the
+ * response arrived or timed-out.
+ */
+ if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR)
+ return;
+ oct->props[lio->ifidx].rx_on = start_stop;
+ }
+
+ octeon_free_soft_command(oct, sc);
+}
+
+/**
+ * \brief Destroy NIC device interface
+ * @param oct octeon device
+ * @param ifidx which interface to destroy
+ *
+ * Cleanup associated with each interface for an Octeon device when NIC
+ * module is being unloaded or if initialization fails during load.
+ */
+static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
+{
+ struct net_device *netdev = oct->props[ifidx].netdev;
+ struct napi_struct *napi, *n;
+ struct lio *lio;
+
+ if (!netdev) {
+ dev_err(&oct->pci_dev->dev, "%s No netdevice ptr for index %d\n",
+ __func__, ifidx);
+ return;
+ }
+
+ lio = GET_LIO(netdev);
+
+ dev_dbg(&oct->pci_dev->dev, "NIC device cleanup\n");
+
+ if (atomic_read(&lio->ifstate) & LIO_IFSTATE_RUNNING)
+ liquidio_stop(netdev);
+
+ if (oct->props[lio->ifidx].napi_enabled == 1) {
+ list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
+ napi_disable(napi);
+
+ oct->props[lio->ifidx].napi_enabled = 0;
+
+ oct->droq[0]->ops.poll_mode = 0;
+ }
+
+ if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED)
+ unregister_netdev(netdev);
+
+ cleanup_link_status_change_wq(netdev);
+
+ delete_glists(lio);
+
+ free_netdev(netdev);
+
+ oct->props[ifidx].gmxport = -1;
+
+ oct->props[ifidx].netdev = NULL;
+}
+
+/**
+ * \brief Stop complete NIC functionality
+ * @param oct octeon device
+ */
+static int liquidio_stop_nic_module(struct octeon_device *oct)
+{
+ struct lio *lio;
+ int i, j;
+
+ dev_dbg(&oct->pci_dev->dev, "Stopping network interfaces\n");
+ if (!oct->ifcount) {
+ dev_err(&oct->pci_dev->dev, "Init for Octeon was not completed\n");
+ return 1;
+ }
+
+ spin_lock_bh(&oct->cmd_resp_wqlock);
+ oct->cmd_resp_state = OCT_DRV_OFFLINE;
+ spin_unlock_bh(&oct->cmd_resp_wqlock);
+
+ for (i = 0; i < oct->ifcount; i++) {
+ lio = GET_LIO(oct->props[i].netdev);
+ for (j = 0; j < lio->linfo.num_rxpciq; j++)
+ octeon_unregister_droq_ops(oct,
+ lio->linfo.rxpciq[j].s.q_no);
+ }
+
+ for (i = 0; i < oct->ifcount; i++)
+ liquidio_destroy_nic_device(oct, i);
+
+ dev_dbg(&oct->pci_dev->dev, "Network interfaces stopped\n");
+ return 0;
+}
+
+/**
* \brief Cleans up resources at unload time
* @param pdev PCI device structure
*/
@@ -429,6 +1040,9 @@ static void liquidio_vf_remove(struct pci_dev *pdev)
dev_dbg(&oct_dev->pci_dev->dev, "Stopping device\n");
+ if (oct_dev->app_mode == CVM_DRV_NIC_APP)
+ liquidio_stop_nic_module(oct_dev);
+
/* Reset the octeon device and cleanup all memory allocated for
* the octeon device by driver.
*/
@@ -471,6 +1085,1457 @@ static int octeon_pci_os_setup(struct octeon_device *oct)
return 0;
}
+static int skb_iq(struct lio *lio, struct sk_buff *skb)
+{
+ int q = 0;
+
+ if (netif_is_multiqueue(lio->netdev))
+ q = skb->queue_mapping % lio->linfo.num_txpciq;
+
+ return q;
+}
+
+/**
+ * \brief Check Tx queue state for a given network buffer
+ * @param lio per-network private data
+ * @param skb network buffer
+ */
+static int check_txq_state(struct lio *lio, struct sk_buff *skb)
+{
+ int q = 0, iq = 0;
+
+ if (netif_is_multiqueue(lio->netdev)) {
+ q = skb->queue_mapping;
+ iq = lio->linfo.txpciq[(q % (lio->linfo.num_txpciq))].s.q_no;
+ } else {
+ iq = lio->txq;
+ q = iq;
+ }
+
+ if (octnet_iq_is_full(lio->oct_dev, iq))
+ return 0;
+
+ if (__netif_subqueue_stopped(lio->netdev, q)) {
+ INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq, tx_restart, 1);
+ wake_q(lio->netdev, q);
+ }
+
+ return 1;
+}
+
+/**
+ * \brief Unmap and free network buffer
+ * @param buf buffer
+ */
+static void free_netbuf(void *buf)
+{
+ struct octnet_buf_free_info *finfo;
+ struct sk_buff *skb;
+ struct lio *lio;
+
+ finfo = (struct octnet_buf_free_info *)buf;
+ skb = finfo->skb;
+ lio = finfo->lio;
+
+ dma_unmap_single(&lio->oct_dev->pci_dev->dev, finfo->dptr, skb->len,
+ DMA_TO_DEVICE);
+
+ check_txq_state(lio, skb);
+
+ tx_buffer_free(skb);
+}
+
+/**
+ * \brief Unmap and free gather buffer
+ * @param buf buffer
+ */
+static void free_netsgbuf(void *buf)
+{
+ struct octnet_buf_free_info *finfo;
+ struct octnic_gather *g;
+ struct sk_buff *skb;
+ int i, frags, iq;
+ struct lio *lio;
+
+ finfo = (struct octnet_buf_free_info *)buf;
+ skb = finfo->skb;
+ lio = finfo->lio;
+ g = finfo->g;
+ frags = skb_shinfo(skb)->nr_frags;
+
+ dma_unmap_single(&lio->oct_dev->pci_dev->dev,
+ g->sg[0].ptr[0], (skb->len - skb->data_len),
+ DMA_TO_DEVICE);
+
+ i = 1;
+ while (frags--) {
+ struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
+
+ pci_unmap_page((lio->oct_dev)->pci_dev,
+ g->sg[(i >> 2)].ptr[(i & 3)],
+ frag->size, DMA_TO_DEVICE);
+ i++;
+ }
+
+ dma_unmap_single(&lio->oct_dev->pci_dev->dev,
+ finfo->dptr, g->sg_size,
+ DMA_TO_DEVICE);
+
+ iq = skb_iq(lio, skb);
+
+ spin_lock(&lio->glist_lock[iq]);
+ list_add_tail(&g->list, &lio->glist[iq]);
+ spin_unlock(&lio->glist_lock[iq]);
+
+ check_txq_state(lio, skb); /* mq support: sub-queue state check */
+
+ tx_buffer_free(skb);
+}
+
+/**
+ * \brief Unmap and free gather buffer with response
+ * @param buf buffer
+ */
+static void free_netsgbuf_with_resp(void *buf)
+{
+ struct octnet_buf_free_info *finfo;
+ struct octeon_soft_command *sc;
+ struct octnic_gather *g;
+ struct sk_buff *skb;
+ int i, frags, iq;
+ struct lio *lio;
+
+ sc = (struct octeon_soft_command *)buf;
+ skb = (struct sk_buff *)sc->callback_arg;
+ finfo = (struct octnet_buf_free_info *)&skb->cb;
+
+ lio = finfo->lio;
+ g = finfo->g;
+ frags = skb_shinfo(skb)->nr_frags;
+
+ dma_unmap_single(&lio->oct_dev->pci_dev->dev,
+ g->sg[0].ptr[0], (skb->len - skb->data_len),
+ DMA_TO_DEVICE);
+
+ i = 1;
+ while (frags--) {
+ struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
+
+ pci_unmap_page((lio->oct_dev)->pci_dev,
+ g->sg[(i >> 2)].ptr[(i & 3)],
+ frag->size, DMA_TO_DEVICE);
+ i++;
+ }
+
+ dma_unmap_single(&lio->oct_dev->pci_dev->dev,
+ finfo->dptr, g->sg_size,
+ DMA_TO_DEVICE);
+
+ iq = skb_iq(lio, skb);
+
+ spin_lock(&lio->glist_lock[iq]);
+ list_add_tail(&g->list, &lio->glist[iq]);
+ spin_unlock(&lio->glist_lock[iq]);
+
+ /* Don't free the skb yet */
+
+ check_txq_state(lio, skb);
+}
+
+/**
+ * \brief Setup output queue
+ * @param oct octeon device
+ * @param q_no which queue
+ * @param num_descs how many descriptors
+ * @param desc_size size of each descriptor
+ * @param app_ctx application context
+ */
+static int octeon_setup_droq(struct octeon_device *oct, int q_no, int num_descs,
+ int desc_size, void *app_ctx)
+{
+ int ret_val;
+
+ dev_dbg(&oct->pci_dev->dev, "Creating Droq: %d\n", q_no);
+ /* droq creation and local register settings. */
+ ret_val = octeon_create_droq(oct, q_no, num_descs, desc_size, app_ctx);
+ if (ret_val < 0)
+ return ret_val;
+
+ if (ret_val == 1) {
+ dev_dbg(&oct->pci_dev->dev, "Using default droq %d\n", q_no);
+ return 0;
+ }
+
+ /* Enable the droq queues */
+ octeon_set_droq_pkt_op(oct, q_no, 1);
+
+ /* Send Credit for Octeon Output queues. Credits are always
+ * sent after the output queue is enabled.
+ */
+ writel(oct->droq[q_no]->max_count, oct->droq[q_no]->pkts_credit_reg);
+
+ return ret_val;
+}
+
+/**
+ * \brief Callback for getting interface configuration
+ * @param status status of request
+ * @param buf pointer to resp structure
+ */
+static void if_cfg_callback(struct octeon_device *oct,
+ u32 status __attribute__((unused)), void *buf)
+{
+ struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
+ struct liquidio_if_cfg_context *ctx;
+ struct liquidio_if_cfg_resp *resp;
+
+ resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
+ ctx = (struct liquidio_if_cfg_context *)sc->ctxptr;
+
+ oct = lio_get_device(ctx->octeon_id);
+ if (resp->status)
+ dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: %llx\n",
+ CVM_CAST64(resp->status));
+ WRITE_ONCE(ctx->cond, 1);
+
+ snprintf(oct->fw_info.liquidio_firmware_version, 32, "%s",
+ resp->cfg_info.liquidio_firmware_version);
+
+ /* This barrier is required to be sure that the response has been
+ * written fully before waking up the handler
+ */
+ wmb();
+
+ wake_up_interruptible(&ctx->wc);
+}
+
+/**
+ * \brief Select queue based on hash
+ * @param dev Net device
+ * @param skb sk_buff structure
+ * @returns selected queue number
+ */
+static u16 select_q(struct net_device *dev, struct sk_buff *skb,
+ void *accel_priv __attribute__((unused)),
+ select_queue_fallback_t fallback __attribute__((unused)))
+{
+ struct lio *lio;
+ u32 qindex;
+
+ lio = GET_LIO(dev);
+
+ qindex = skb_tx_hash(dev, skb);
+
+ return (u16)(qindex % (lio->linfo.num_txpciq));
+}
+
+/** Routine to push packets arriving on Octeon interface upto network layer.
+ * @param oct_id - octeon device id.
+ * @param skbuff - skbuff struct to be passed to network layer.
+ * @param len - size of total data received.
+ * @param rh - Control header associated with the packet
+ * @param param - additional control data with the packet
+ * @param arg - farg registered in droq_ops
+ */
+static void
+liquidio_push_packet(u32 octeon_id __attribute__((unused)),
+ void *skbuff,
+ u32 len,
+ union octeon_rh *rh,
+ void *param,
+ void *arg)
+{
+ struct napi_struct *napi = param;
+ struct octeon_droq *droq =
+ container_of(param, struct octeon_droq, napi);
+ struct net_device *netdev = (struct net_device *)arg;
+ struct sk_buff *skb = (struct sk_buff *)skbuff;
+
+ if (netdev) {
+ struct lio *lio = GET_LIO(netdev);
+ int packet_was_received;
+
+ /* Do not proceed if the interface is not in RUNNING state. */
+ if (!ifstate_check(lio, LIO_IFSTATE_RUNNING)) {
+ recv_buffer_free(skb);
+ droq->stats.rx_dropped++;
+ return;
+ }
+
+ skb->dev = netdev;
+
+ skb_record_rx_queue(skb, droq->q_no);
+ if (likely(len > MIN_SKB_SIZE)) {
+ struct octeon_skb_page_info *pg_info;
+ unsigned char *va;
+
+ pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+ if (pg_info->page) {
+ /* For Paged allocation use the frags */
+ va = page_address(pg_info->page) +
+ pg_info->page_offset;
+ memcpy(skb->data, va, MIN_SKB_SIZE);
+ skb_put(skb, MIN_SKB_SIZE);
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+ pg_info->page,
+ pg_info->page_offset +
+ MIN_SKB_SIZE,
+ len - MIN_SKB_SIZE,
+ LIO_RXBUFFER_SZ);
+ }
+ } else {
+ struct octeon_skb_page_info *pg_info =
+ ((struct octeon_skb_page_info *)(skb->cb));
+ skb_copy_to_linear_data(skb,
+ page_address(pg_info->page) +
+ pg_info->page_offset, len);
+ skb_put(skb, len);
+ put_page(pg_info->page);
+ }
+
+ skb_pull(skb, rh->r_dh.len * 8);
+ skb->protocol = eth_type_trans(skb, skb->dev);
+
+ if ((netdev->features & NETIF_F_RXCSUM) &&
+ (rh->r_dh.csum_verified & CNNIC_CSUM_VERIFIED))
+ /* checksum has already been verified */
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ else
+ skb->ip_summed = CHECKSUM_NONE;
+
+ packet_was_received = (napi_gro_receive(napi, skb) != GRO_DROP);
+
+ if (packet_was_received) {
+ droq->stats.rx_bytes_received += len;
+ droq->stats.rx_pkts_received++;
+ netdev->last_rx = jiffies;
+ } else {
+ droq->stats.rx_dropped++;
+ netif_info(lio, rx_err, lio->netdev,
+ "droq:%d error rx_dropped:%llu\n",
+ droq->q_no, droq->stats.rx_dropped);
+ }
+
+ } else {
+ recv_buffer_free(skb);
+ }
+}
+
+/**
+ * \brief callback when receive interrupt occurs and we are in NAPI mode
+ * @param arg pointer to octeon output queue
+ */
+static void liquidio_vf_napi_drv_callback(void *arg)
+{
+ struct octeon_droq *droq = arg;
+
+ napi_schedule_irqoff(&droq->napi);
+}
+
+/**
+ * \brief Entry point for NAPI polling
+ * @param napi NAPI structure
+ * @param budget maximum number of items to process
+ */
+static int liquidio_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct octeon_instr_queue *iq;
+ struct octeon_device *oct;
+ struct octeon_droq *droq;
+ int tx_done = 0, iq_no;
+ int work_done;
+
+ droq = container_of(napi, struct octeon_droq, napi);
+ oct = droq->oct_dev;
+ iq_no = droq->q_no;
+
+ /* Handle Droq descriptors */
+ work_done = octeon_process_droq_poll_cmd(oct, droq->q_no,
+ POLL_EVENT_PROCESS_PKTS,
+ budget);
+
+ /* Flush the instruction queue */
+ iq = oct->instr_queue[iq_no];
+ if (iq) {
+ /* Process iq buffers with in the budget limits */
+ tx_done = octeon_flush_iq(oct, iq, 1, budget);
+ /* Update iq read-index rather than waiting for next interrupt.
+ * Return back if tx_done is false.
+ */
+ update_txq_status(oct, iq_no);
+ } else {
+ dev_err(&oct->pci_dev->dev, "%s: iq (%d) num invalid\n",
+ __func__, iq_no);
+ }
+
+ if ((work_done < budget) && (tx_done)) {
+ napi_complete(napi);
+ octeon_process_droq_poll_cmd(droq->oct_dev, droq->q_no,
+ POLL_EVENT_ENABLE_INTR, 0);
+ return 0;
+ }
+
+ return (!tx_done) ? (budget) : (work_done);
+}
+
+/**
+ * \brief Setup input and output queues
+ * @param octeon_dev octeon device
+ * @param ifidx Interface index
+ *
+ * Note: Queues are with respect to the octeon device. Thus
+ * an input queue is for egress packets, and output queues
+ * are for ingress packets.
+ */
+static int setup_io_queues(struct octeon_device *octeon_dev, int ifidx)
+{
+ struct octeon_droq_ops droq_ops;
+ struct net_device *netdev;
+ static int cpu_id_modulus;
+ struct octeon_droq *droq;
+ struct napi_struct *napi;
+ static int cpu_id;
+ int num_tx_descs;
+ struct lio *lio;
+ int retval = 0;
+ int q, q_no;
+
+ netdev = octeon_dev->props[ifidx].netdev;
+
+ lio = GET_LIO(netdev);
+
+ memset(&droq_ops, 0, sizeof(struct octeon_droq_ops));
+
+ droq_ops.fptr = liquidio_push_packet;
+ droq_ops.farg = netdev;
+
+ droq_ops.poll_mode = 1;
+ droq_ops.napi_fn = liquidio_vf_napi_drv_callback;
+ cpu_id = 0;
+ cpu_id_modulus = num_present_cpus();
+
+ /* set up DROQs. */
+ for (q = 0; q < lio->linfo.num_rxpciq; q++) {
+ q_no = lio->linfo.rxpciq[q].s.q_no;
+
+ retval = octeon_setup_droq(
+ octeon_dev, q_no,
+ CFG_GET_NUM_RX_DESCS_NIC_IF(octeon_get_conf(octeon_dev),
+ lio->ifidx),
+ CFG_GET_NUM_RX_BUF_SIZE_NIC_IF(octeon_get_conf(octeon_dev),
+ lio->ifidx),
+ NULL);
+ if (retval) {
+ dev_err(&octeon_dev->pci_dev->dev,
+ "%s : Runtime DROQ(RxQ) creation failed.\n",
+ __func__);
+ return 1;
+ }
+
+ droq = octeon_dev->droq[q_no];
+ napi = &droq->napi;
+ netif_napi_add(netdev, napi, liquidio_napi_poll, 64);
+
+ /* designate a CPU for this droq */
+ droq->cpu_id = cpu_id;
+ cpu_id++;
+ if (cpu_id >= cpu_id_modulus)
+ cpu_id = 0;
+
+ octeon_register_droq_ops(octeon_dev, q_no, &droq_ops);
+ }
+
+ /* 23XX VF can send/recv control messages (via the first VF-owned
+ * droq) from the firmware even if the ethX interface is down,
+ * so that's why poll_mode must be off for the first droq.
+ */
+ octeon_dev->droq[0]->ops.poll_mode = 0;
+
+ /* set up IQs. */
+ for (q = 0; q < lio->linfo.num_txpciq; q++) {
+ num_tx_descs = CFG_GET_NUM_TX_DESCS_NIC_IF(
+ octeon_get_conf(octeon_dev), lio->ifidx);
+ retval = octeon_setup_iq(octeon_dev, ifidx, q,
+ lio->linfo.txpciq[q], num_tx_descs,
+ netdev_get_tx_queue(netdev, q));
+ if (retval) {
+ dev_err(&octeon_dev->pci_dev->dev,
+ " %s : Runtime IQ(TxQ) creation failed.\n",
+ __func__);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * \brief Net device open for LiquidIO
+ * @param netdev network device
+ */
+static int liquidio_open(struct net_device *netdev)
+{
+ struct lio *lio = GET_LIO(netdev);
+ struct octeon_device *oct = lio->oct_dev;
+ struct napi_struct *napi, *n;
+
+ if (!oct->props[lio->ifidx].napi_enabled) {
+ list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
+ napi_enable(napi);
+
+ oct->props[lio->ifidx].napi_enabled = 1;
+
+ oct->droq[0]->ops.poll_mode = 1;
+ }
+
+ ifstate_set(lio, LIO_IFSTATE_RUNNING);
+
+ /* Ready for link status updates */
+ lio->intf_open = 1;
+
+ netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
+ start_txq(netdev);
+
+ /* tell Octeon to start forwarding packets to host */
+ send_rx_ctrl_cmd(lio, 1);
+
+ dev_info(&oct->pci_dev->dev, "%s interface is opened\n", netdev->name);
+
+ return 0;
+}
+
+/**
+ * \brief Net device stop for LiquidIO
+ * @param netdev network device
+ */
+static int liquidio_stop(struct net_device *netdev)
+{
+ struct lio *lio = GET_LIO(netdev);
+ struct octeon_device *oct = lio->oct_dev;
+
+ netif_info(lio, ifdown, lio->netdev, "Stopping interface!\n");
+ /* Inform that netif carrier is down */
+ lio->intf_open = 0;
+ lio->linfo.link.s.link_up = 0;
+
+ netif_carrier_off(netdev);
+ lio->link_changes++;
+
+ /* tell Octeon to stop forwarding packets to host */
+ send_rx_ctrl_cmd(lio, 0);
+
+ ifstate_reset(lio, LIO_IFSTATE_RUNNING);
+
+ txqs_stop(netdev);
+
+ dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name);
+
+ return 0;
+}
+
+/**
+ * \brief Converts a mask based on net device flags
+ * @param netdev network device
+ *
+ * This routine generates a octnet_ifflags mask from the net device flags
+ * received from the OS.
+ */
+static enum octnet_ifflags get_new_flags(struct net_device *netdev)
+{
+ enum octnet_ifflags f = OCTNET_IFFLAG_UNICAST;
+
+ if (netdev->flags & IFF_PROMISC)
+ f |= OCTNET_IFFLAG_PROMISC;
+
+ if (netdev->flags & IFF_ALLMULTI)
+ f |= OCTNET_IFFLAG_ALLMULTI;
+
+ if (netdev->flags & IFF_MULTICAST) {
+ f |= OCTNET_IFFLAG_MULTICAST;
+
+ /* Accept all multicast addresses if there are more than we
+ * can handle
+ */
+ if (netdev_mc_count(netdev) > MAX_OCTEON_MULTICAST_ADDR)
+ f |= OCTNET_IFFLAG_ALLMULTI;
+ }
+
+ if (netdev->flags & IFF_BROADCAST)
+ f |= OCTNET_IFFLAG_BROADCAST;
+
+ return f;
+}
+
+static void liquidio_set_uc_list(struct net_device *netdev)
+{
+ struct lio *lio = GET_LIO(netdev);
+ struct octeon_device *oct = lio->oct_dev;
+ struct octnic_ctrl_pkt nctrl;
+ struct netdev_hw_addr *ha;
+ u64 *mac;
+
+ if (lio->netdev_uc_count == netdev_uc_count(netdev))
+ return;
+
+ if (netdev_uc_count(netdev) > MAX_NCTRL_UDD) {
+ dev_err(&oct->pci_dev->dev, "too many MAC addresses in netdev uc list\n");
+ return;
+ }
+
+ lio->netdev_uc_count = netdev_uc_count(netdev);
+
+ memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+ nctrl.ncmd.s.cmd = OCTNET_CMD_SET_UC_LIST;
+ nctrl.ncmd.s.more = lio->netdev_uc_count;
+ nctrl.ncmd.s.param1 = oct->vf_num;
+ nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+ nctrl.netpndev = (u64)netdev;
+ nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+ /* copy all the addresses into the udd */
+ mac = &nctrl.udd[0];
+ netdev_for_each_uc_addr(ha, netdev) {
+ ether_addr_copy(((u8 *)mac) + 2, ha->addr);
+ mac++;
+ }
+
+ octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+}
+
+/**
+ * \brief Net device set_multicast_list
+ * @param netdev network device
+ */
+static void liquidio_set_mcast_list(struct net_device *netdev)
+{
+ int mc_count = min(netdev_mc_count(netdev), MAX_OCTEON_MULTICAST_ADDR);
+ struct lio *lio = GET_LIO(netdev);
+ struct octeon_device *oct = lio->oct_dev;
+ struct octnic_ctrl_pkt nctrl;
+ struct netdev_hw_addr *ha;
+ u64 *mc;
+ int ret;
+
+ memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+ /* Create a ctrl pkt command to be sent to core app. */
+ nctrl.ncmd.u64 = 0;
+ nctrl.ncmd.s.cmd = OCTNET_CMD_SET_MULTI_LIST;
+ nctrl.ncmd.s.param1 = get_new_flags(netdev);
+ nctrl.ncmd.s.param2 = mc_count;
+ nctrl.ncmd.s.more = mc_count;
+ nctrl.netpndev = (u64)netdev;
+ nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+ /* copy all the addresses into the udd */
+ mc = &nctrl.udd[0];
+ netdev_for_each_mc_addr(ha, netdev) {
+ *mc = 0;
+ ether_addr_copy(((u8 *)mc) + 2, ha->addr);
+ /* no need to swap bytes */
+ if (++mc > &nctrl.udd[mc_count])
+ break;
+ }
+
+ nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+
+ /* Apparently, any activity in this call from the kernel has to
+ * be atomic. So we won't wait for response.
+ */
+ nctrl.wait_time = 0;
+
+ ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+ if (ret < 0) {
+ dev_err(&oct->pci_dev->dev, "DEVFLAGS change failed in core (ret: 0x%x)\n",
+ ret);
+ }
+
+ liquidio_set_uc_list(netdev);
+}
+
+/**
+ * \brief Net device set_mac_address
+ * @param netdev network device
+ */
+static int liquidio_set_mac(struct net_device *netdev, void *p)
+{
+ struct sockaddr *addr = (struct sockaddr *)p;
+ struct lio *lio = GET_LIO(netdev);
+ struct octeon_device *oct = lio->oct_dev;
+ struct octnic_ctrl_pkt nctrl;
+ int ret = 0;
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ if (ether_addr_equal(addr->sa_data, netdev->dev_addr))
+ return 0;
+
+ if (lio->linfo.macaddr_is_admin_asgnd)
+ return -EPERM;
+
+ memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+ nctrl.ncmd.u64 = 0;
+ nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MACADDR;
+ nctrl.ncmd.s.param1 = 0;
+ nctrl.ncmd.s.more = 1;
+ nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+ nctrl.netpndev = (u64)netdev;
+ nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+ nctrl.wait_time = 100;
+
+ nctrl.udd[0] = 0;
+ /* The MAC Address is presented in network byte order. */
+ ether_addr_copy((u8 *)&nctrl.udd[0] + 2, addr->sa_data);
+
+ ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+ if (ret < 0) {
+ dev_err(&oct->pci_dev->dev, "MAC Address change failed\n");
+ return -ENOMEM;
+ }
+ memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+ ether_addr_copy(((u8 *)&lio->linfo.hw_addr) + 2, addr->sa_data);
+
+ return 0;
+}
+
+/**
+ * \brief Net device change_mtu
+ * @param netdev network device
+ */
+static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ struct lio *lio = GET_LIO(netdev);
+ struct octeon_device *oct = lio->oct_dev;
+
+ lio->mtu = new_mtu;
+
+ netif_info(lio, probe, lio->netdev, "MTU Changed from %d to %d\n",
+ netdev->mtu, new_mtu);
+ dev_info(&oct->pci_dev->dev, "%s MTU Changed from %d to %d\n",
+ netdev->name, netdev->mtu, new_mtu);
+
+ netdev->mtu = new_mtu;
+
+ return 0;
+}
+
+/** \brief Transmit networks packets to the Octeon interface
+ * @param skbuff skbuff struct to be passed to network layer.
+ * @param netdev pointer to network device
+ * @returns whether the packet was transmitted to the device okay or not
+ * (NETDEV_TX_OK or NETDEV_TX_BUSY)
+ */
+static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+ struct octnet_buf_free_info *finfo;
+ union octnic_cmd_setup cmdsetup;
+ struct octnic_data_pkt ndata;
+ struct octeon_instr_irh *irh;
+ struct oct_iq_stats *stats;
+ struct octeon_device *oct;
+ int q_idx = 0, iq_no = 0;
+ union tx_info *tx_info;
+ struct lio *lio;
+ int status = 0;
+ u64 dptr = 0;
+ u32 tag = 0;
+ int j;
+
+ lio = GET_LIO(netdev);
+ oct = lio->oct_dev;
+
+ if (netif_is_multiqueue(netdev)) {
+ q_idx = skb->queue_mapping;
+ q_idx = (q_idx % (lio->linfo.num_txpciq));
+ tag = q_idx;
+ iq_no = lio->linfo.txpciq[q_idx].s.q_no;
+ } else {
+ iq_no = lio->txq;
+ }
+
+ stats = &oct->instr_queue[iq_no]->stats;
+
+ /* Check for all conditions in which the current packet cannot be
+ * transmitted.
+ */
+ if (!(atomic_read(&lio->ifstate) & LIO_IFSTATE_RUNNING) ||
+ (!lio->linfo.link.s.link_up) || (skb->len <= 0)) {
+ netif_info(lio, tx_err, lio->netdev, "Transmit failed link_status : %d\n",
+ lio->linfo.link.s.link_up);
+ goto lio_xmit_failed;
+ }
+
+ /* Use space in skb->cb to store info used to unmap and
+ * free the buffers.
+ */
+ finfo = (struct octnet_buf_free_info *)skb->cb;
+ finfo->lio = lio;
+ finfo->skb = skb;
+ finfo->sc = NULL;
+
+ /* Prepare the attributes for the data to be passed to OSI. */
+ memset(&ndata, 0, sizeof(struct octnic_data_pkt));
+
+ ndata.buf = finfo;
+
+ ndata.q_no = iq_no;
+
+ if (netif_is_multiqueue(netdev)) {
+ if (octnet_iq_is_full(oct, ndata.q_no)) {
+ /* defer sending if queue is full */
+ netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
+ ndata.q_no);
+ stats->tx_iq_busy++;
+ return NETDEV_TX_BUSY;
+ }
+ } else {
+ if (octnet_iq_is_full(oct, lio->txq)) {
+ /* defer sending if queue is full */
+ stats->tx_iq_busy++;
+ netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
+ ndata.q_no);
+ return NETDEV_TX_BUSY;
+ }
+ }
+
+ ndata.datasize = skb->len;
+
+ cmdsetup.u64 = 0;
+ cmdsetup.s.iq_no = iq_no;
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ cmdsetup.s.transport_csum = 1;
+
+ if (!skb_shinfo(skb)->nr_frags) {
+ cmdsetup.s.u.datasize = skb->len;
+ octnet_prepare_pci_cmd(oct, &ndata.cmd, &cmdsetup, tag);
+ /* Offload checksum calculation for TCP/UDP packets */
+ dptr = dma_map_single(&oct->pci_dev->dev,
+ skb->data,
+ skb->len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&oct->pci_dev->dev, dptr)) {
+ dev_err(&oct->pci_dev->dev, "%s DMA mapping error 1\n",
+ __func__);
+ return NETDEV_TX_BUSY;
+ }
+
+ ndata.cmd.cmd3.dptr = dptr;
+ finfo->dptr = dptr;
+ ndata.reqtype = REQTYPE_NORESP_NET;
+
+ } else {
+ struct skb_frag_struct *frag;
+ struct octnic_gather *g;
+ int i, frags;
+
+ spin_lock(&lio->glist_lock[q_idx]);
+ g = (struct octnic_gather *)list_delete_head(
+ &lio->glist[q_idx]);
+ spin_unlock(&lio->glist_lock[q_idx]);
+
+ if (!g) {
+ netif_info(lio, tx_err, lio->netdev,
+ "Transmit scatter gather: glist null!\n");
+ goto lio_xmit_failed;
+ }
+
+ cmdsetup.s.gather = 1;
+ cmdsetup.s.u.gatherptrs = (skb_shinfo(skb)->nr_frags + 1);
+ octnet_prepare_pci_cmd(oct, &ndata.cmd, &cmdsetup, tag);
+
+ memset(g->sg, 0, g->sg_size);
+
+ g->sg[0].ptr[0] = dma_map_single(&oct->pci_dev->dev,
+ skb->data,
+ (skb->len - skb->data_len),
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&oct->pci_dev->dev, g->sg[0].ptr[0])) {
+ dev_err(&oct->pci_dev->dev, "%s DMA mapping error 2\n",
+ __func__);
+ return NETDEV_TX_BUSY;
+ }
+ add_sg_size(&g->sg[0], (skb->len - skb->data_len), 0);
+
+ frags = skb_shinfo(skb)->nr_frags;
+ i = 1;
+ while (frags--) {
+ frag = &skb_shinfo(skb)->frags[i - 1];
+
+ g->sg[(i >> 2)].ptr[(i & 3)] =
+ dma_map_page(&oct->pci_dev->dev,
+ frag->page.p,
+ frag->page_offset,
+ frag->size,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&oct->pci_dev->dev,
+ g->sg[i >> 2].ptr[i & 3])) {
+ dma_unmap_single(&oct->pci_dev->dev,
+ g->sg[0].ptr[0],
+ skb->len - skb->data_len,
+ DMA_TO_DEVICE);
+ for (j = 1; j < i; j++) {
+ frag = &skb_shinfo(skb)->frags[j - 1];
+ dma_unmap_page(&oct->pci_dev->dev,
+ g->sg[j >> 2].ptr[j & 3],
+ frag->size,
+ DMA_TO_DEVICE);
+ }
+ dev_err(&oct->pci_dev->dev, "%s DMA mapping error 3\n",
+ __func__);
+ return NETDEV_TX_BUSY;
+ }
+
+ add_sg_size(&g->sg[(i >> 2)], frag->size, (i & 3));
+ i++;
+ }
+
+ dptr = dma_map_single(&oct->pci_dev->dev,
+ g->sg, g->sg_size,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&oct->pci_dev->dev, dptr)) {
+ dev_err(&oct->pci_dev->dev, "%s DMA mapping error 4\n",
+ __func__);
+ dma_unmap_single(&oct->pci_dev->dev, g->sg[0].ptr[0],
+ skb->len - skb->data_len,
+ DMA_TO_DEVICE);
+ for (j = 1; j <= frags; j++) {
+ frag = &skb_shinfo(skb)->frags[j - 1];
+ dma_unmap_page(&oct->pci_dev->dev,
+ g->sg[j >> 2].ptr[j & 3],
+ frag->size, DMA_TO_DEVICE);
+ }
+ return NETDEV_TX_BUSY;
+ }
+
+ ndata.cmd.cmd3.dptr = dptr;
+ finfo->dptr = dptr;
+ finfo->g = g;
+
+ ndata.reqtype = REQTYPE_NORESP_NET_SG;
+ }
+
+ irh = (struct octeon_instr_irh *)&ndata.cmd.cmd3.irh;
+ tx_info = (union tx_info *)&ndata.cmd.cmd3.ossp[0];
+
+ if (skb_shinfo(skb)->gso_size) {
+ tx_info->s.gso_size = skb_shinfo(skb)->gso_size;
+ tx_info->s.gso_segs = skb_shinfo(skb)->gso_segs;
+ }
+
+ status = octnet_send_nic_data_pkt(oct, &ndata);
+ if (status == IQ_SEND_FAILED)
+ goto lio_xmit_failed;
+
+ netif_info(lio, tx_queued, lio->netdev, "Transmit queued successfully\n");
+
+ if (status == IQ_SEND_STOP) {
+ dev_err(&oct->pci_dev->dev, "Rcvd IQ_SEND_STOP signal; stopping IQ-%d\n",
+ iq_no);
+ stop_q(lio->netdev, q_idx);
+ }
+
+ netif_trans_update(netdev);
+
+ if (skb_shinfo(skb)->gso_size)
+ stats->tx_done += skb_shinfo(skb)->gso_segs;
+ else
+ stats->tx_done++;
+ stats->tx_tot_bytes += skb->len;
+
+ return NETDEV_TX_OK;
+
+lio_xmit_failed:
+ stats->tx_dropped++;
+ netif_info(lio, tx_err, lio->netdev, "IQ%d Transmit dropped:%llu\n",
+ iq_no, stats->tx_dropped);
+ if (dptr)
+ dma_unmap_single(&oct->pci_dev->dev, dptr,
+ ndata.datasize, DMA_TO_DEVICE);
+ tx_buffer_free(skb);
+ return NETDEV_TX_OK;
+}
+
+/** \brief Network device Tx timeout
+ * @param netdev pointer to network device
+ */
+static void liquidio_tx_timeout(struct net_device *netdev)
+{
+ struct lio *lio;
+
+ lio = GET_LIO(netdev);
+
+ netif_info(lio, tx_err, lio->netdev,
+ "Transmit timeout tx_dropped:%ld, waking up queues now!!\n",
+ netdev->stats.tx_dropped);
+ netif_trans_update(netdev);
+ txqs_wake(netdev);
+}
+
+/** Sending command to enable/disable RX checksum offload
+ * @param netdev pointer to network device
+ * @param command OCTNET_CMD_TNL_RX_CSUM_CTL
+ * @param rx_cmd_bit OCTNET_CMD_RXCSUM_ENABLE/
+ * OCTNET_CMD_RXCSUM_DISABLE
+ * @returns SUCCESS or FAILURE
+ */
+static int liquidio_set_rxcsum_command(struct net_device *netdev, int command,
+ u8 rx_cmd)
+{
+ struct lio *lio = GET_LIO(netdev);
+ struct octeon_device *oct = lio->oct_dev;
+ struct octnic_ctrl_pkt nctrl;
+ int ret = 0;
+
+ nctrl.ncmd.u64 = 0;
+ nctrl.ncmd.s.cmd = command;
+ nctrl.ncmd.s.param1 = rx_cmd;
+ nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+ nctrl.wait_time = 100;
+ nctrl.netpndev = (u64)netdev;
+ nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+ ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+ if (ret < 0) {
+ dev_err(&oct->pci_dev->dev, "DEVFLAGS RXCSUM change failed in core (ret:0x%x)\n",
+ ret);
+ }
+ return ret;
+}
+
+/** \brief Net device fix features
+ * @param netdev pointer to network device
+ * @param request features requested
+ * @returns updated features list
+ */
+static netdev_features_t liquidio_fix_features(struct net_device *netdev,
+ netdev_features_t request)
+{
+ struct lio *lio = netdev_priv(netdev);
+
+ if ((request & NETIF_F_RXCSUM) &&
+ !(lio->dev_capability & NETIF_F_RXCSUM))
+ request &= ~NETIF_F_RXCSUM;
+
+ if ((request & NETIF_F_HW_CSUM) &&
+ !(lio->dev_capability & NETIF_F_HW_CSUM))
+ request &= ~NETIF_F_HW_CSUM;
+
+ if ((request & NETIF_F_TSO) && !(lio->dev_capability & NETIF_F_TSO))
+ request &= ~NETIF_F_TSO;
+
+ if ((request & NETIF_F_TSO6) && !(lio->dev_capability & NETIF_F_TSO6))
+ request &= ~NETIF_F_TSO6;
+
+ if ((request & NETIF_F_LRO) && !(lio->dev_capability & NETIF_F_LRO))
+ request &= ~NETIF_F_LRO;
+
+ /* Disable LRO if RXCSUM is off */
+ if (!(request & NETIF_F_RXCSUM) && (netdev->features & NETIF_F_LRO) &&
+ (lio->dev_capability & NETIF_F_LRO))
+ request &= ~NETIF_F_LRO;
+
+ return request;
+}
+
+/** \brief Net device set features
+ * @param netdev pointer to network device
+ * @param features features to enable/disable
+ */
+static int liquidio_set_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct lio *lio = netdev_priv(netdev);
+
+ if (!((netdev->features ^ features) & NETIF_F_LRO))
+ return 0;
+
+ if ((features & NETIF_F_LRO) && (lio->dev_capability & NETIF_F_LRO))
+ liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE,
+ OCTNIC_LROIPV4 | OCTNIC_LROIPV6);
+ else if (!(features & NETIF_F_LRO) &&
+ (lio->dev_capability & NETIF_F_LRO))
+ liquidio_set_feature(netdev, OCTNET_CMD_LRO_DISABLE,
+ OCTNIC_LROIPV4 | OCTNIC_LROIPV6);
+ if (!(netdev->features & NETIF_F_RXCSUM) &&
+ (lio->enc_dev_capability & NETIF_F_RXCSUM) &&
+ (features & NETIF_F_RXCSUM))
+ liquidio_set_rxcsum_command(netdev, OCTNET_CMD_TNL_RX_CSUM_CTL,
+ OCTNET_CMD_RXCSUM_ENABLE);
+ else if ((netdev->features & NETIF_F_RXCSUM) &&
+ (lio->enc_dev_capability & NETIF_F_RXCSUM) &&
+ !(features & NETIF_F_RXCSUM))
+ liquidio_set_rxcsum_command(netdev, OCTNET_CMD_TNL_RX_CSUM_CTL,
+ OCTNET_CMD_RXCSUM_DISABLE);
+
+ return 0;
+}
+
+static const struct net_device_ops lionetdevops = {
+ .ndo_open = liquidio_open,
+ .ndo_stop = liquidio_stop,
+ .ndo_start_xmit = liquidio_xmit,
+ .ndo_set_mac_address = liquidio_set_mac,
+ .ndo_set_rx_mode = liquidio_set_mcast_list,
+ .ndo_tx_timeout = liquidio_tx_timeout,
+ .ndo_change_mtu = liquidio_change_mtu,
+ .ndo_fix_features = liquidio_fix_features,
+ .ndo_set_features = liquidio_set_features,
+ .ndo_select_queue = select_q,
+};
+
+static int lio_nic_info(struct octeon_recv_info *recv_info, void *buf)
+{
+ struct octeon_device *oct = (struct octeon_device *)buf;
+ struct octeon_recv_pkt *recv_pkt = recv_info->recv_pkt;
+ union oct_link_status *ls;
+ int gmxport = 0;
+ int i;
+
+ if (recv_pkt->buffer_size[0] != sizeof(*ls)) {
+ dev_err(&oct->pci_dev->dev, "Malformed NIC_INFO, len=%d, ifidx=%d\n",
+ recv_pkt->buffer_size[0],
+ recv_pkt->rh.r_nic_info.gmxport);
+ goto nic_info_err;
+ }
+
+ gmxport = recv_pkt->rh.r_nic_info.gmxport;
+ ls = (union oct_link_status *)get_rbd(recv_pkt->buffer_ptr[0]);
+
+ octeon_swap_8B_data((u64 *)ls, (sizeof(union oct_link_status)) >> 3);
+
+ for (i = 0; i < oct->ifcount; i++) {
+ if (oct->props[i].gmxport == gmxport) {
+ update_link_status(oct->props[i].netdev, ls);
+ break;
+ }
+ }
+
+nic_info_err:
+ for (i = 0; i < recv_pkt->buffer_count; i++)
+ recv_buffer_free(recv_pkt->buffer_ptr[i]);
+ octeon_free_recv_info(recv_info);
+ return 0;
+}
+
+/**
+ * \brief Setup network interfaces
+ * @param octeon_dev octeon device
+ *
+ * Called during init time for each device. It assumes the NIC
+ * is already up and running. The link information for each
+ * interface is passed in link_info.
+ */
+static int setup_nic_devices(struct octeon_device *octeon_dev)
+{
+ int retval, num_iqueues, num_oqueues;
+ struct liquidio_if_cfg_context *ctx;
+ u32 resp_size, ctx_size, data_size;
+ struct liquidio_if_cfg_resp *resp;
+ struct octeon_soft_command *sc;
+ union oct_nic_if_cfg if_cfg;
+ struct octdev_props *props;
+ struct net_device *netdev;
+ struct lio_version *vdata;
+ struct lio *lio = NULL;
+ u8 mac[ETH_ALEN], i, j;
+ u32 ifidx_or_pfnum;
+
+ ifidx_or_pfnum = octeon_dev->pf_num;
+
+ /* This is to handle link status changes */
+ octeon_register_dispatch_fn(octeon_dev, OPCODE_NIC, OPCODE_NIC_INFO,
+ lio_nic_info, octeon_dev);
+
+ /* REQTYPE_RESP_NET and REQTYPE_SOFT_COMMAND do not have free functions.
+ * They are handled directly.
+ */
+ octeon_register_reqtype_free_fn(octeon_dev, REQTYPE_NORESP_NET,
+ free_netbuf);
+
+ octeon_register_reqtype_free_fn(octeon_dev, REQTYPE_NORESP_NET_SG,
+ free_netsgbuf);
+
+ octeon_register_reqtype_free_fn(octeon_dev, REQTYPE_RESP_NET_SG,
+ free_netsgbuf_with_resp);
+
+ for (i = 0; i < octeon_dev->ifcount; i++) {
+ resp_size = sizeof(struct liquidio_if_cfg_resp);
+ ctx_size = sizeof(struct liquidio_if_cfg_context);
+ data_size = sizeof(struct lio_version);
+ sc = (struct octeon_soft_command *)
+ octeon_alloc_soft_command(octeon_dev, data_size,
+ resp_size, ctx_size);
+ resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
+ ctx = (struct liquidio_if_cfg_context *)sc->ctxptr;
+ vdata = (struct lio_version *)sc->virtdptr;
+
+ *((u64 *)vdata) = 0;
+ vdata->major = cpu_to_be16(LIQUIDIO_BASE_MAJOR_VERSION);
+ vdata->minor = cpu_to_be16(LIQUIDIO_BASE_MINOR_VERSION);
+ vdata->micro = cpu_to_be16(LIQUIDIO_BASE_MICRO_VERSION);
+
+ WRITE_ONCE(ctx->cond, 0);
+ ctx->octeon_id = lio_get_device_id(octeon_dev);
+ init_waitqueue_head(&ctx->wc);
+
+ if_cfg.u64 = 0;
+
+ if_cfg.s.num_iqueues = octeon_dev->sriov_info.rings_per_vf;
+ if_cfg.s.num_oqueues = octeon_dev->sriov_info.rings_per_vf;
+ if_cfg.s.base_queue = 0;
+
+ sc->iq_no = 0;
+
+ octeon_prepare_soft_command(octeon_dev, sc, OPCODE_NIC,
+ OPCODE_NIC_IF_CFG, 0, if_cfg.u64,
+ 0);
+
+ sc->callback = if_cfg_callback;
+ sc->callback_arg = sc;
+ sc->wait_time = 5000;
+
+ retval = octeon_send_soft_command(octeon_dev, sc);
+ if (retval == IQ_SEND_FAILED) {
+ dev_err(&octeon_dev->pci_dev->dev,
+ "iq/oq config failed status: %x\n", retval);
+ /* Soft instr is freed by driver in case of failure. */
+ goto setup_nic_dev_fail;
+ }
+
+ /* Sleep on a wait queue till the cond flag indicates that the
+ * response arrived or timed-out.
+ */
+ if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR) {
+ dev_err(&octeon_dev->pci_dev->dev, "Wait interrupted\n");
+ goto setup_nic_wait_intr;
+ }
+
+ retval = resp->status;
+ if (retval) {
+ dev_err(&octeon_dev->pci_dev->dev, "iq/oq config failed\n");
+ goto setup_nic_dev_fail;
+ }
+
+ octeon_swap_8B_data((u64 *)(&resp->cfg_info),
+ (sizeof(struct liquidio_if_cfg_info)) >> 3);
+
+ num_iqueues = hweight64(resp->cfg_info.iqmask);
+ num_oqueues = hweight64(resp->cfg_info.oqmask);
+
+ if (!(num_iqueues) || !(num_oqueues)) {
+ dev_err(&octeon_dev->pci_dev->dev,
+ "Got bad iqueues (%016llx) or oqueues (%016llx) from firmware.\n",
+ resp->cfg_info.iqmask, resp->cfg_info.oqmask);
+ goto setup_nic_dev_fail;
+ }
+ dev_dbg(&octeon_dev->pci_dev->dev,
+ "interface %d, iqmask %016llx, oqmask %016llx, numiqueues %d, numoqueues %d\n",
+ i, resp->cfg_info.iqmask, resp->cfg_info.oqmask,
+ num_iqueues, num_oqueues);
+
+ netdev = alloc_etherdev_mq(LIO_SIZE, num_iqueues);
+
+ if (!netdev) {
+ dev_err(&octeon_dev->pci_dev->dev, "Device allocation failed\n");
+ goto setup_nic_dev_fail;
+ }
+
+ SET_NETDEV_DEV(netdev, &octeon_dev->pci_dev->dev);
+
+ /* Associate the routines that will handle different
+ * netdev tasks.
+ */
+ netdev->netdev_ops = &lionetdevops;
+
+ lio = GET_LIO(netdev);
+
+ memset(lio, 0, sizeof(struct lio));
+
+ lio->ifidx = ifidx_or_pfnum;
+
+ props = &octeon_dev->props[i];
+ props->gmxport = resp->cfg_info.linfo.gmxport;
+ props->netdev = netdev;
+
+ lio->linfo.num_rxpciq = num_oqueues;
+ lio->linfo.num_txpciq = num_iqueues;
+
+ for (j = 0; j < num_oqueues; j++) {
+ lio->linfo.rxpciq[j].u64 =
+ resp->cfg_info.linfo.rxpciq[j].u64;
+ }
+ for (j = 0; j < num_iqueues; j++) {
+ lio->linfo.txpciq[j].u64 =
+ resp->cfg_info.linfo.txpciq[j].u64;
+ }
+
+ lio->linfo.hw_addr = resp->cfg_info.linfo.hw_addr;
+ lio->linfo.gmxport = resp->cfg_info.linfo.gmxport;
+ lio->linfo.link.u64 = resp->cfg_info.linfo.link.u64;
+ lio->linfo.macaddr_is_admin_asgnd =
+ resp->cfg_info.linfo.macaddr_is_admin_asgnd;
+
+ lio->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
+
+ lio->dev_capability = NETIF_F_HIGHDMA
+ | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
+ | NETIF_F_SG | NETIF_F_RXCSUM
+ | NETIF_F_TSO | NETIF_F_TSO6
+ | NETIF_F_GRO
+ | NETIF_F_LRO;
+ netif_set_gso_max_size(netdev, OCTNIC_GSO_MAX_SIZE);
+
+ netdev->features = (lio->dev_capability & ~NETIF_F_LRO);
+
+ netdev->hw_features = lio->dev_capability;
+
+ /* MTU range: 68 - 16000 */
+ netdev->min_mtu = LIO_MIN_MTU_SIZE;
+ netdev->max_mtu = LIO_MAX_MTU_SIZE;
+
+ /* Point to the properties for octeon device to which this
+ * interface belongs.
+ */
+ lio->oct_dev = octeon_dev;
+ lio->octprops = props;
+ lio->netdev = netdev;
+
+ dev_dbg(&octeon_dev->pci_dev->dev,
+ "if%d gmx: %d hw_addr: 0x%llx\n", i,
+ lio->linfo.gmxport, CVM_CAST64(lio->linfo.hw_addr));
+
+ /* 64-bit swap required on LE machines */
+ octeon_swap_8B_data(&lio->linfo.hw_addr, 1);
+ for (j = 0; j < ETH_ALEN; j++)
+ mac[j] = *((u8 *)(((u8 *)&lio->linfo.hw_addr) + 2 + j));
+
+ /* Copy MAC Address to OS network device structure */
+ ether_addr_copy(netdev->dev_addr, mac);
+
+ if (setup_io_queues(octeon_dev, i)) {
+ dev_err(&octeon_dev->pci_dev->dev, "I/O queues creation failed\n");
+ goto setup_nic_dev_fail;
+ }
+
+ ifstate_set(lio, LIO_IFSTATE_DROQ_OPS);
+
+ /* For VFs, enable Octeon device interrupts here,
+ * as this is contingent upon IO queue setup
+ */
+ octeon_dev->fn_list.enable_interrupt(octeon_dev,
+ OCTEON_ALL_INTR);
+
+ /* By default all interfaces on a single Octeon uses the same
+ * tx and rx queues
+ */
+ lio->txq = lio->linfo.txpciq[0].s.q_no;
+ lio->rxq = lio->linfo.rxpciq[0].s.q_no;
+
+ lio->tx_qsize = octeon_get_tx_qsize(octeon_dev, lio->txq);
+ lio->rx_qsize = octeon_get_rx_qsize(octeon_dev, lio->rxq);
+
+ if (setup_glists(lio, num_iqueues)) {
+ dev_err(&octeon_dev->pci_dev->dev,
+ "Gather list allocation failed\n");
+ goto setup_nic_dev_fail;
+ }
+
+ if (netdev->features & NETIF_F_LRO)
+ liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE,
+ OCTNIC_LROIPV4 | OCTNIC_LROIPV6);
+
+ if ((debug != -1) && (debug & NETIF_MSG_HW))
+ liquidio_set_feature(netdev, OCTNET_CMD_VERBOSE_ENABLE,
+ 0);
+
+ if (setup_link_status_change_wq(netdev))
+ goto setup_nic_dev_fail;
+
+ /* Register the network device with the OS */
+ if (register_netdev(netdev)) {
+ dev_err(&octeon_dev->pci_dev->dev, "Device registration failed\n");
+ goto setup_nic_dev_fail;
+ }
+
+ dev_dbg(&octeon_dev->pci_dev->dev,
+ "Setup NIC ifidx:%d mac:%02x%02x%02x%02x%02x%02x\n",
+ i, mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
+ netif_carrier_off(netdev);
+ lio->link_changes++;
+
+ ifstate_set(lio, LIO_IFSTATE_REGISTERED);
+
+ /* Sending command to firmware to enable Rx checksum offload
+ * by default at the time of setup of Liquidio driver for
+ * this device
+ */
+ liquidio_set_rxcsum_command(netdev, OCTNET_CMD_TNL_RX_CSUM_CTL,
+ OCTNET_CMD_RXCSUM_ENABLE);
+ liquidio_set_feature(netdev, OCTNET_CMD_TNL_TX_CSUM_CTL,
+ OCTNET_CMD_TXCSUM_ENABLE);
+
+ dev_dbg(&octeon_dev->pci_dev->dev,
+ "NIC ifidx:%d Setup successful\n", i);
+
+ octeon_free_soft_command(octeon_dev, sc);
+ }
+
+ return 0;
+
+setup_nic_dev_fail:
+
+ octeon_free_soft_command(octeon_dev, sc);
+
+setup_nic_wait_intr:
+
+ while (i--) {
+ dev_err(&octeon_dev->pci_dev->dev,
+ "NIC ifidx:%d Setup failed\n", i);
+ liquidio_destroy_nic_device(octeon_dev, i);
+ }
+ return -ENODEV;
+}
+
+/**
+ * \brief initialize the NIC
+ * @param oct octeon device
+ *
+ * This initialization routine is called once the Octeon device application is
+ * up and running
+ */
+static int liquidio_init_nic_module(struct octeon_device *oct)
+{
+ int num_nic_ports = 1;
+ int i, retval = 0;
+
+ dev_dbg(&oct->pci_dev->dev, "Initializing network interfaces\n");
+
+ /* only default iq and oq were initialized
+ * initialize the rest as well run port_config command for each port
+ */
+ oct->ifcount = num_nic_ports;
+ memset(oct->props, 0,
+ sizeof(struct octdev_props) * num_nic_ports);
+
+ for (i = 0; i < MAX_OCTEON_LINKS; i++)
+ oct->props[i].gmxport = -1;
+
+ retval = setup_nic_devices(oct);
+ if (retval) {
+ dev_err(&oct->pci_dev->dev, "Setup NIC devices failed\n");
+ goto octnet_init_failure;
+ }
+
+octnet_init_failure:
+
+ oct->ifcount = 0;
+
+ return retval;
+}
+
/**
* \brief Device initialization for each Octeon device that is probed
* @param octeon_dev octeon device
@@ -498,6 +2563,8 @@ static int octeon_device_init(struct octeon_device *oct)
atomic_set(&oct->status, OCT_DEV_PCI_MAP_DONE);
+ oct->app_mode = CVM_DRV_NIC_APP;
+
/* Initialize the dispatch mechanism used to push packets arriving on
* Octeon Output queues.
*/
@@ -594,6 +2661,9 @@ static int octeon_device_init(struct octeon_device *oct)
atomic_set(&oct->status, OCT_DEV_RUNNING);
+ if (liquidio_init_nic_module(oct))
+ return 1;
+
return 0;
}
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index f308ee49a754..ba329f6ca779 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -212,6 +212,7 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry,
#define OCTNET_CMD_ID_ACTIVE 0x1a
+#define OCTNET_CMD_SET_UC_LIST 0x1b
#define OCTNET_CMD_SET_VF_LINKSTATE 0x1c
#define OCTNET_CMD_VXLAN_PORT_ADD 0x0
#define OCTNET_CMD_VXLAN_PORT_DEL 0x1
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index 6d54032b10ab..a8df493a5012 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
@@ -1221,6 +1221,9 @@ struct octeon_config *octeon_get_conf(struct octeon_device *oct)
} else if (OCTEON_CN23XX_PF(oct)) {
default_oct_conf = (struct octeon_config *)
(CHIP_CONF(oct, cn23xx_pf));
+ } else if (OCTEON_CN23XX_VF(oct)) {
+ default_oct_conf = (struct octeon_config *)
+ (CHIP_CONF(oct, cn23xx_vf));
}
return default_oct_conf;
}
@@ -1371,7 +1374,7 @@ void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq)
/*write resend. Writing RESEND in SLI_PKTX_CNTS should be enough
*to trigger tx interrupts as well, if they are pending.
*/
- if (oct && OCTEON_CN23XX_PF(oct)) {
+ if (oct && (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct))) {
if (droq)
writeq(CN23XX_INTR_RESEND, droq->pkts_sent_reg);
/*we race with firmrware here. read and write the IN_DONE_CNTS*/
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index 8bf1ac76bcdc..0be87d119a97 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -28,6 +28,7 @@
#include "cn66xx_regs.h"
#include "cn66xx_device.h"
#include "cn23xx_pf_device.h"
+#include "cn23xx_vf_device.h"
struct niclist {
struct list_head list;
@@ -261,6 +262,11 @@ int octeon_init_droq(struct octeon_device *oct,
c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf23);
c_refill_threshold = (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf23);
+ } else if (OCTEON_CN23XX_VF(oct)) {
+ struct octeon_config *conf23 = CHIP_CONF(oct, cn23xx_vf);
+
+ c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf23);
+ c_refill_threshold = (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf23);
} else {
return 1;
}
@@ -889,6 +895,10 @@ octeon_process_droq_poll_cmd(struct octeon_device *oct, u32 q_no, int cmd,
lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
}
break;
+
+ case OCTEON_CN23XX_VF_VID:
+ lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
+ break;
}
return 0;
}
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index e94edc841cad..6bb89419006e 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -123,6 +123,7 @@ struct lio {
/* work queue for link status */
struct cavium_wq link_status_wq;
+ int netdev_uc_count;
};
#define LIO_SIZE (sizeof(struct lio))
diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c
index ea2b7e46631d..3ce66759e80a 100644
--- a/drivers/net/ethernet/cavium/liquidio/request_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c
@@ -394,7 +394,7 @@ lio_process_iq_request_list(struct octeon_device *oct,
case REQTYPE_SOFT_COMMAND:
sc = buf;
- if (OCTEON_CN23XX_PF(oct))
+ if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct))
irh = (struct octeon_instr_irh *)
&sc->cmd.cmd3.irh;
else
@@ -607,7 +607,7 @@ octeon_prepare_soft_command(struct octeon_device *oct,
oct_cfg = octeon_get_conf(oct);
- if (OCTEON_CN23XX_PF(oct)) {
+ if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) {
ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
ih3->pkind = oct->instr_queue[sc->iq_no]->txpciq.s.pkind;
@@ -700,7 +700,7 @@ int octeon_send_soft_command(struct octeon_device *oct,
struct octeon_instr_irh *irh;
u32 len;
- if (OCTEON_CN23XX_PF(oct)) {
+ if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) {
ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
if (ih3->dlengsz) {
WARN_ON(!sc->dmadptr);
diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c
index fdaf742a59cb..2fbaae96b505 100644
--- a/drivers/net/ethernet/cavium/liquidio/response_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c
@@ -84,7 +84,8 @@ int lio_process_ordered_list(struct octeon_device *octeon_dev,
sc = (struct octeon_soft_command *)ordered_sc_list->
head.next;
- if (OCTEON_CN23XX_PF(octeon_dev)) {
+ if (OCTEON_CN23XX_PF(octeon_dev) ||
+ OCTEON_CN23XX_VF(octeon_dev)) {
rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
rptr = sc->cmd.cmd3.rptr;
} else {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 449884f8dd67..48113c6609db 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -134,24 +134,6 @@ MODULE_FIRMWARE(FW5_FNAME);
MODULE_FIRMWARE(FW6_FNAME);
/*
- * Normally we're willing to become the firmware's Master PF but will be happy
- * if another PF has already become the Master and initialized the adapter.
- * Setting "force_init" will cause this driver to forcibly establish itself as
- * the Master PF and initialize the adapter.
- */
-static uint force_init;
-
-module_param(force_init, uint, 0644);
-MODULE_PARM_DESC(force_init, "Forcibly become Master PF and initialize adapter,"
- "deprecated parameter");
-
-static int dflt_msg_enable = DFLT_MSG_ENABLE;
-
-module_param(dflt_msg_enable, int, 0644);
-MODULE_PARM_DESC(dflt_msg_enable, "Chelsio T4 default message enable bitmap, "
- "deprecated parameter");
-
-/*
* The driver uses the best interrupt scheme available on a platform in the
* order MSI-X, MSI, legacy INTx interrupts. This parameter determines which
* of these schemes the driver may consider as follows:
@@ -179,16 +161,6 @@ MODULE_PARM_DESC(msi, "whether to use INTx (0), MSI (1) or MSI-X (2)");
*/
static int rx_dma_offset = 2;
-#ifdef CONFIG_PCI_IOV
-/* Configure the number of PCI-E Virtual Function which are to be instantiated
- * on SR-IOV Capable Physical Functions.
- */
-static unsigned int num_vf[NUM_OF_PF_WITH_SRIOV];
-
-module_param_array(num_vf, uint, NULL, 0644);
-MODULE_PARM_DESC(num_vf, "number of VFs for each of PFs 0-3, deprecated parameter - please use the pci sysfs interface.");
-#endif
-
/* TX Queue select used to determine what algorithm to use for selecting TX
* queue. Select between the kernel provided function (select_queue=0) or user
* cxgb_select_queue function (select_queue=1)
@@ -4729,7 +4701,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
adapter->name = pci_name(pdev);
adapter->mbox = func;
adapter->pf = func;
- adapter->msg_enable = dflt_msg_enable;
+ adapter->msg_enable = DFLT_MSG_ENABLE;
memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
spin_lock_init(&adapter->stats_lock);
@@ -4988,17 +4960,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
sriov:
#ifdef CONFIG_PCI_IOV
- if (func < ARRAY_SIZE(num_vf) && num_vf[func] > 0) {
- dev_warn(&pdev->dev,
- "Enabling SR-IOV VFs using the num_vf module "
- "parameter is deprecated - please use the pci sysfs "
- "interface instead.\n");
- if (pci_enable_sriov(pdev, num_vf[func]) == 0)
- dev_info(&pdev->dev,
- "instantiated %u virtual functions\n",
- num_vf[func]);
- }
-
adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
if (!adapter) {
err = -ENOMEM;
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 5d4da0e8acaa..fa43e06d3a29 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -70,13 +70,6 @@
NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
-static int dflt_msg_enable = DFLT_MSG_ENABLE;
-
-module_param(dflt_msg_enable, int, 0644);
-MODULE_PARM_DESC(dflt_msg_enable,
- "default adapter ethtool message level bitmap, "
- "deprecated parameter");
-
/*
* The driver uses the best interrupt scheme available on a platform in the
* order MSI-X then MSI. This parameter determines which of these schemes the
@@ -2891,7 +2884,7 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
* Initialize adapter level features.
*/
adapter->name = pci_name(pdev);
- adapter->msg_enable = dflt_msg_enable;
+ adapter->msg_enable = DFLT_MSG_ENABLE;
err = adap_init0(adapter);
if (err)
goto err_unmap_bar;
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 5e5b259dd2cc..e05e22705cf7 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -3578,9 +3578,10 @@ static int mvneta_stop(struct net_device *dev)
mvneta_stop_dev(pp);
mvneta_mdio_remove(pp);
- cpuhp_state_remove_instance_nocalls(online_hpstate, &pp->node_online);
- cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
- &pp->node_dead);
+ cpuhp_state_remove_instance_nocalls(online_hpstate,
+ &pp->node_online);
+ cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+ &pp->node_dead);
on_each_cpu(mvneta_percpu_disable, pp, true);
free_percpu_irq(dev->irq, pp->ports);
} else {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 49a81f1fc1d6..bcd955339058 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -51,6 +51,9 @@
#include "mlx4_en.h"
#include "en_port.h"
+#define MLX4_EN_MAX_XDP_MTU ((int)(PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN) - \
+ XDP_PACKET_HEADROOM))
+
int mlx4_en_setup_tc(struct net_device *dev, u8 up)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -2249,6 +2252,19 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
free_netdev(dev);
}
+static bool mlx4_en_check_xdp_mtu(struct net_device *dev, int mtu)
+{
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ if (mtu > MLX4_EN_MAX_XDP_MTU) {
+ en_err(priv, "mtu:%d > max:%d when XDP prog is attached\n",
+ mtu, MLX4_EN_MAX_XDP_MTU);
+ return false;
+ }
+
+ return true;
+}
+
static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -2258,11 +2274,10 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
en_dbg(DRV, priv, "Change MTU called - current:%d new:%d\n",
dev->mtu, new_mtu);
- if (priv->tx_ring_num[TX_XDP] && MLX4_EN_EFF_MTU(new_mtu) > FRAG_SZ0) {
- en_err(priv, "MTU size:%d requires frags but XDP running\n",
- new_mtu);
- return -EOPNOTSUPP;
- }
+ if (priv->tx_ring_num[TX_XDP] &&
+ !mlx4_en_check_xdp_mtu(dev, new_mtu))
+ return -ENOTSUPP;
+
dev->mtu = new_mtu;
if (netif_running(dev)) {
@@ -2710,10 +2725,8 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
return 0;
}
- if (priv->num_frags > 1) {
- en_err(priv, "Cannot set XDP if MTU requires multiple frags\n");
+ if (!mlx4_en_check_xdp_mtu(dev, dev->mtu))
return -EOPNOTSUPP;
- }
tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
if (!tmp)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 6562f78b07f4..3c37e216bbf3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -96,7 +96,6 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS];
const struct mlx4_en_frag_info *frag_info;
struct page *page;
- dma_addr_t dma;
int i;
for (i = 0; i < priv->num_frags; i++) {
@@ -115,9 +114,10 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
for (i = 0; i < priv->num_frags; i++) {
frags[i] = ring_alloc[i];
- dma = ring_alloc[i].dma + ring_alloc[i].page_offset;
+ frags[i].page_offset += priv->frag_info[i].rx_headroom;
+ rx_desc->data[i].addr = cpu_to_be64(frags[i].dma +
+ frags[i].page_offset);
ring_alloc[i] = page_alloc[i];
- rx_desc->data[i].addr = cpu_to_be64(dma);
}
return 0;
@@ -250,7 +250,8 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
if (ring->page_cache.index > 0) {
frags[0] = ring->page_cache.buf[--ring->page_cache.index];
- rx_desc->data[0].addr = cpu_to_be64(frags[0].dma);
+ rx_desc->data[0].addr = cpu_to_be64(frags[0].dma +
+ frags[0].page_offset);
return 0;
}
@@ -889,6 +890,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
if (xdp_prog) {
struct xdp_buff xdp;
dma_addr_t dma;
+ void *orig_data;
u32 act;
dma = be64_to_cpu(rx_desc->data[0].addr);
@@ -896,11 +898,19 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
priv->frag_info[0].frag_size,
DMA_FROM_DEVICE);
- xdp.data = page_address(frags[0].page) +
- frags[0].page_offset;
+ xdp.data_hard_start = page_address(frags[0].page);
+ xdp.data = xdp.data_hard_start + frags[0].page_offset;
xdp.data_end = xdp.data + length;
+ orig_data = xdp.data;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+ if (xdp.data != orig_data) {
+ length = xdp.data_end - xdp.data;
+ frags[0].page_offset = xdp.data -
+ xdp.data_hard_start;
+ }
+
switch (act) {
case XDP_PASS:
break;
@@ -1164,37 +1174,41 @@ static const int frag_sizes[] = {
void mlx4_en_calc_rx_buf(struct net_device *dev)
{
- enum dma_data_direction dma_dir = PCI_DMA_FROMDEVICE;
struct mlx4_en_priv *priv = netdev_priv(dev);
int eff_mtu = MLX4_EN_EFF_MTU(dev->mtu);
- int order = MLX4_EN_ALLOC_PREFER_ORDER;
- u32 align = SMP_CACHE_BYTES;
- int buf_size = 0;
int i = 0;
/* bpf requires buffers to be set up as 1 packet per page.
* This only works when num_frags == 1.
*/
if (priv->tx_ring_num[TX_XDP]) {
- dma_dir = PCI_DMA_BIDIRECTIONAL;
- /* This will gain efficient xdp frame recycling at the expense
- * of more costly truesize accounting
+ priv->frag_info[0].order = 0;
+ priv->frag_info[0].frag_size = eff_mtu;
+ priv->frag_info[0].frag_prefix_size = 0;
+ /* This will gain efficient xdp frame recycling at the
+ * expense of more costly truesize accounting
*/
- align = PAGE_SIZE;
- order = 0;
- }
-
- while (buf_size < eff_mtu) {
- priv->frag_info[i].order = order;
- priv->frag_info[i].frag_size =
- (eff_mtu > buf_size + frag_sizes[i]) ?
- frag_sizes[i] : eff_mtu - buf_size;
- priv->frag_info[i].frag_prefix_size = buf_size;
- priv->frag_info[i].frag_stride =
- ALIGN(priv->frag_info[i].frag_size, align);
- priv->frag_info[i].dma_dir = dma_dir;
- buf_size += priv->frag_info[i].frag_size;
- i++;
+ priv->frag_info[0].frag_stride = PAGE_SIZE;
+ priv->frag_info[0].dma_dir = PCI_DMA_BIDIRECTIONAL;
+ priv->frag_info[0].rx_headroom = XDP_PACKET_HEADROOM;
+ i = 1;
+ } else {
+ int buf_size = 0;
+
+ while (buf_size < eff_mtu) {
+ priv->frag_info[i].order = MLX4_EN_ALLOC_PREFER_ORDER;
+ priv->frag_info[i].frag_size =
+ (eff_mtu > buf_size + frag_sizes[i]) ?
+ frag_sizes[i] : eff_mtu - buf_size;
+ priv->frag_info[i].frag_prefix_size = buf_size;
+ priv->frag_info[i].frag_stride =
+ ALIGN(priv->frag_info[i].frag_size,
+ SMP_CACHE_BYTES);
+ priv->frag_info[i].dma_dir = PCI_DMA_FROMDEVICE;
+ priv->frag_info[i].rx_headroom = 0;
+ buf_size += priv->frag_info[i].frag_size;
+ i++;
+ }
}
priv->num_frags = i;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 4b597dca5c52..5886ad78058f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -354,7 +354,7 @@ u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
struct mlx4_en_rx_alloc frame = {
.page = tx_info->page,
.dma = tx_info->map0_dma,
- .page_offset = 0,
+ .page_offset = XDP_PACKET_HEADROOM,
.page_size = PAGE_SIZE,
};
@@ -1132,7 +1132,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
tx_info->page = frame->page;
frame->page = NULL;
tx_info->map0_dma = dma;
- tx_info->map0_byte_count = length;
+ tx_info->map0_byte_count = PAGE_SIZE;
tx_info->nr_txbb = nr_txbb;
tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
tx_info->data_offset = (void *)data - (void *)tx_desc;
@@ -1141,9 +1141,10 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
tx_info->linear = 1;
tx_info->inl = 0;
- dma_sync_single_for_device(priv->ddev, dma, length, PCI_DMA_TODEVICE);
+ dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset,
+ length, PCI_DMA_TODEVICE);
- data->addr = cpu_to_be64(dma);
+ data->addr = cpu_to_be64(dma + frame->page_offset);
data->lkey = ring->mr_key;
dma_wmb();
data->byte_count = cpu_to_be32(length);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 20a936428f4a..ba1c6cd0cc79 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -475,7 +475,8 @@ struct mlx4_en_frag_info {
u16 frag_prefix_size;
u32 frag_stride;
enum dma_data_direction dma_dir;
- int order;
+ u16 order;
+ u16 rx_headroom;
};
#ifdef CONFIG_MLX4_EN_DCB
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 07020276fe73..cbfa38fc72c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3183,6 +3183,11 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
bool reset, was_opened;
int i;
+ if (prog && prog->xdp_adjust_head) {
+ netdev_err(netdev, "Does not support bpf_xdp_adjust_head()\n");
+ return -EOPNOTSUPP;
+ }
+
mutex_lock(&priv->state_lock);
if ((netdev->features & NETIF_F_LRO) && prog) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index f07ef8c7da55..f8829b517156 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -31,6 +31,7 @@
*/
#include <net/flow_dissector.h>
+#include <net/sch_generic.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_gact.h>
#include <net/tc_act/tc_skbedit.h>
@@ -363,7 +364,18 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
skb_flow_dissector_target(f->dissector,
FLOW_DISSECTOR_KEY_CONTROL,
f->key);
+
+ struct flow_dissector_key_control *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_CONTROL,
+ f->mask);
addr_type = key->addr_type;
+
+ if (mask->flags & FLOW_DIS_IS_FRAGMENT) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
+ key->flags & FLOW_DIS_IS_FRAGMENT);
+ }
}
if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 00d9a03be31d..e8d448109e03 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2946,6 +2946,10 @@ static int nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog)
};
int err;
+ if (prog && prog->xdp_adjust_head) {
+ nn_err(nn, "Does not support bpf_xdp_adjust_head()\n");
+ return -EOPNOTSUPP;
+ }
if (!prog && !nn->xdp_prog)
return 0;
if (prog && nn->xdp_prog) {
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index cf1dd1436d93..aecdd1c5c0ea 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -2507,6 +2507,11 @@ static int qede_xdp_set(struct qede_dev *edev, struct bpf_prog *prog)
{
struct qede_reload_args args;
+ if (prog && prog->xdp_adjust_head) {
+ DP_ERR(edev, "Does not support bpf_xdp_adjust_head()\n");
+ return -EOPNOTSUPP;
+ }
+
/* If we're called, there was already a bpf reference increment */
args.func = &qede_xdp_reload_func;
args.u.new_prog = prog;
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 3ced2e1703c1..b13a144f72ad 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -412,8 +412,8 @@ extern const struct stmmac_desc_ops ndesc_ops;
struct stmmac_dma_ops {
/* DMA core initialization */
int (*reset)(void __iomem *ioaddr);
- void (*init)(void __iomem *ioaddr, int pbl, int fb, int mb,
- int aal, u32 dma_tx, u32 dma_rx, int atds);
+ void (*init)(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg,
+ u32 dma_tx, u32 dma_rx, int atds);
/* Configure the AXI Bus Mode Register */
void (*axi)(void __iomem *ioaddr, struct stmmac_axi *axi);
/* Dump DMA registers */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c
index e6e6c2fcc4b7..3304095c934c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c
@@ -71,9 +71,12 @@ err_remove_config_dt:
static const struct of_device_id dwmac_generic_match[] = {
{ .compatible = "st,spear600-gmac"},
+ { .compatible = "snps,dwmac-3.50a"},
{ .compatible = "snps,dwmac-3.610"},
{ .compatible = "snps,dwmac-3.70a"},
{ .compatible = "snps,dwmac-3.710"},
+ { .compatible = "snps,dwmac-4.00"},
+ { .compatible = "snps,dwmac-4.10a"},
{ .compatible = "snps,dwmac"},
{ }
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
index ff3e5ab39bd0..52b9407a8a39 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
@@ -225,7 +225,7 @@ enum rx_tx_priority_ratio {
#define DMA_BUS_MODE_FB 0x00010000 /* Fixed burst */
#define DMA_BUS_MODE_MB 0x04000000 /* Mixed burst */
-#define DMA_BUS_MODE_RPBL_MASK 0x003e0000 /* Rx-Programmable Burst Len */
+#define DMA_BUS_MODE_RPBL_MASK 0x007e0000 /* Rx-Programmable Burst Len */
#define DMA_BUS_MODE_RPBL_SHIFT 17
#define DMA_BUS_MODE_USP 0x00800000
#define DMA_BUS_MODE_MAXPBL 0x01000000
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
index f35385266fbf..612d3aaac9a4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
@@ -84,37 +84,39 @@ static void dwmac1000_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
writel(value, ioaddr + DMA_AXI_BUS_MODE);
}
-static void dwmac1000_dma_init(void __iomem *ioaddr, int pbl, int fb, int mb,
- int aal, u32 dma_tx, u32 dma_rx, int atds)
+static void dwmac1000_dma_init(void __iomem *ioaddr,
+ struct stmmac_dma_cfg *dma_cfg,
+ u32 dma_tx, u32 dma_rx, int atds)
{
u32 value = readl(ioaddr + DMA_BUS_MODE);
+ int txpbl = dma_cfg->txpbl ?: dma_cfg->pbl;
+ int rxpbl = dma_cfg->rxpbl ?: dma_cfg->pbl;
/*
* Set the DMA PBL (Programmable Burst Length) mode.
*
* Note: before stmmac core 3.50 this mode bit was 4xPBL, and
* post 3.5 mode bit acts as 8*PBL.
- *
- * This configuration doesn't take care about the Separate PBL
- * so only the bits: 13-8 are programmed with the PBL passed from the
- * platform.
*/
- value |= DMA_BUS_MODE_MAXPBL;
- value &= ~DMA_BUS_MODE_PBL_MASK;
- value |= (pbl << DMA_BUS_MODE_PBL_SHIFT);
+ if (dma_cfg->pblx8)
+ value |= DMA_BUS_MODE_MAXPBL;
+ value |= DMA_BUS_MODE_USP;
+ value &= ~(DMA_BUS_MODE_PBL_MASK | DMA_BUS_MODE_RPBL_MASK);
+ value |= (txpbl << DMA_BUS_MODE_PBL_SHIFT);
+ value |= (rxpbl << DMA_BUS_MODE_RPBL_SHIFT);
/* Set the Fixed burst mode */
- if (fb)
+ if (dma_cfg->fixed_burst)
value |= DMA_BUS_MODE_FB;
/* Mixed Burst has no effect when fb is set */
- if (mb)
+ if (dma_cfg->mixed_burst)
value |= DMA_BUS_MODE_MB;
if (atds)
value |= DMA_BUS_MODE_ATDS;
- if (aal)
+ if (dma_cfg->aal)
value |= DMA_BUS_MODE_AAL;
writel(value, ioaddr + DMA_BUS_MODE);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
index 61f54c99a7de..e5664da382f3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
@@ -32,11 +32,12 @@
#include "dwmac100.h"
#include "dwmac_dma.h"
-static void dwmac100_dma_init(void __iomem *ioaddr, int pbl, int fb, int mb,
- int aal, u32 dma_tx, u32 dma_rx, int atds)
+static void dwmac100_dma_init(void __iomem *ioaddr,
+ struct stmmac_dma_cfg *dma_cfg,
+ u32 dma_tx, u32 dma_rx, int atds)
{
/* Enable Application Access by writing to DMA CSR0 */
- writel(DMA_BUS_MODE_DEFAULT | (pbl << DMA_BUS_MODE_PBL_SHIFT),
+ writel(DMA_BUS_MODE_DEFAULT | (dma_cfg->pbl << DMA_BUS_MODE_PBL_SHIFT),
ioaddr + DMA_BUS_MODE);
/* Mask interrupts by writing to CSR7 */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index e81b6e565c29..8196ab5fc33c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -71,25 +71,29 @@ static void dwmac4_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
writel(value, ioaddr + DMA_SYS_BUS_MODE);
}
-static void dwmac4_dma_init_channel(void __iomem *ioaddr, int pbl,
+static void dwmac4_dma_init_channel(void __iomem *ioaddr,
+ struct stmmac_dma_cfg *dma_cfg,
u32 dma_tx_phy, u32 dma_rx_phy,
u32 channel)
{
u32 value;
+ int txpbl = dma_cfg->txpbl ?: dma_cfg->pbl;
+ int rxpbl = dma_cfg->rxpbl ?: dma_cfg->pbl;
/* set PBL for each channels. Currently we affect same configuration
* on each channel
*/
value = readl(ioaddr + DMA_CHAN_CONTROL(channel));
- value = value | DMA_BUS_MODE_PBL;
+ if (dma_cfg->pblx8)
+ value = value | DMA_BUS_MODE_PBL;
writel(value, ioaddr + DMA_CHAN_CONTROL(channel));
value = readl(ioaddr + DMA_CHAN_TX_CONTROL(channel));
- value = value | (pbl << DMA_BUS_MODE_PBL_SHIFT);
+ value = value | (txpbl << DMA_BUS_MODE_PBL_SHIFT);
writel(value, ioaddr + DMA_CHAN_TX_CONTROL(channel));
value = readl(ioaddr + DMA_CHAN_RX_CONTROL(channel));
- value = value | (pbl << DMA_BUS_MODE_RPBL_SHIFT);
+ value = value | (rxpbl << DMA_BUS_MODE_RPBL_SHIFT);
writel(value, ioaddr + DMA_CHAN_RX_CONTROL(channel));
/* Mask interrupts by writing to CSR7 */
@@ -99,27 +103,28 @@ static void dwmac4_dma_init_channel(void __iomem *ioaddr, int pbl,
writel(dma_rx_phy, ioaddr + DMA_CHAN_RX_BASE_ADDR(channel));
}
-static void dwmac4_dma_init(void __iomem *ioaddr, int pbl, int fb, int mb,
- int aal, u32 dma_tx, u32 dma_rx, int atds)
+static void dwmac4_dma_init(void __iomem *ioaddr,
+ struct stmmac_dma_cfg *dma_cfg,
+ u32 dma_tx, u32 dma_rx, int atds)
{
u32 value = readl(ioaddr + DMA_SYS_BUS_MODE);
int i;
/* Set the Fixed burst mode */
- if (fb)
+ if (dma_cfg->fixed_burst)
value |= DMA_SYS_BUS_FB;
/* Mixed Burst has no effect when fb is set */
- if (mb)
+ if (dma_cfg->mixed_burst)
value |= DMA_SYS_BUS_MB;
- if (aal)
+ if (dma_cfg->aal)
value |= DMA_SYS_BUS_AAL;
writel(value, ioaddr + DMA_SYS_BUS_MODE);
for (i = 0; i < DMA_CHANNEL_NB_MAX; i++)
- dwmac4_dma_init_channel(ioaddr, pbl, dma_tx, dma_rx, i);
+ dwmac4_dma_init_channel(ioaddr, dma_cfg, dma_tx, dma_rx, i);
}
static void _dwmac4_dump_dma_regs(void __iomem *ioaddr, u32 channel)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 6ab7e2bdcadd..699ee1d30426 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -263,7 +263,7 @@ static void stmmac_ethtool_getdrvinfo(struct net_device *dev,
{
struct stmmac_priv *priv = netdev_priv(dev);
- if (priv->plat->has_gmac)
+ if (priv->plat->has_gmac || priv->plat->has_gmac4)
strlcpy(info->driver, GMAC_ETHTOOL_NAME, sizeof(info->driver));
else
strlcpy(info->driver, MAC100_ETHTOOL_NAME,
@@ -446,7 +446,7 @@ static void stmmac_ethtool_gregs(struct net_device *dev,
memset(reg_space, 0x0, REG_SPACE_SIZE);
- if (!priv->plat->has_gmac) {
+ if (!(priv->plat->has_gmac || priv->plat->has_gmac4)) {
/* MAC registers */
for (i = 0; i < 12; i++)
reg_space[i] = readl(priv->ioaddr + (i * 4));
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 982c95213da4..b5188122bc15 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1578,16 +1578,12 @@ static void stmmac_check_ether_addr(struct stmmac_priv *priv)
*/
static int stmmac_init_dma_engine(struct stmmac_priv *priv)
{
- int pbl = DEFAULT_DMA_PBL, fixed_burst = 0, aal = 0;
- int mixed_burst = 0;
int atds = 0;
int ret = 0;
- if (priv->plat->dma_cfg) {
- pbl = priv->plat->dma_cfg->pbl;
- fixed_burst = priv->plat->dma_cfg->fixed_burst;
- mixed_burst = priv->plat->dma_cfg->mixed_burst;
- aal = priv->plat->dma_cfg->aal;
+ if (!priv->plat->dma_cfg || !priv->plat->dma_cfg->pbl) {
+ dev_err(priv->device, "Invalid DMA configuration\n");
+ return -EINVAL;
}
if (priv->extend_desc && (priv->mode == STMMAC_RING_MODE))
@@ -1599,8 +1595,8 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
return ret;
}
- priv->hw->dma->init(priv->ioaddr, pbl, fixed_burst, mixed_burst,
- aal, priv->dma_tx_phy, priv->dma_rx_phy, atds);
+ priv->hw->dma->init(priv->ioaddr, priv->plat->dma_cfg,
+ priv->dma_tx_phy, priv->dma_rx_phy, atds);
if (priv->synopsys_id >= DWMAC_CORE_4_00) {
priv->rx_tail_addr = priv->dma_rx_phy +
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 56c8a2342c14..a2831773431a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -81,6 +81,7 @@ static void stmmac_default_data(struct plat_stmmacenet_data *plat)
plat->mdio_bus_data->phy_mask = 0;
plat->dma_cfg->pbl = 32;
+ plat->dma_cfg->pblx8 = true;
/* TODO: AXI */
/* Set default value for multicast hash bins */
@@ -115,6 +116,7 @@ static int quark_default_data(struct plat_stmmacenet_data *plat,
plat->mdio_bus_data->phy_mask = 0;
plat->dma_cfg->pbl = 16;
+ plat->dma_cfg->pblx8 = true;
plat->dma_cfg->fixed_burst = 1;
/* AXI (TODO) */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index e528e7126b65..082cd48db6a7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -292,6 +292,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
if (of_device_is_compatible(np, "snps,dwmac-4.00") ||
of_device_is_compatible(np, "snps,dwmac-4.10a")) {
plat->has_gmac4 = 1;
+ plat->has_gmac = 0;
plat->pmt = 1;
plat->tso_en = of_property_read_bool(np, "snps,tso");
}
@@ -303,21 +304,25 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
plat->force_sf_dma_mode = 1;
}
- if (of_find_property(np, "snps,pbl", NULL)) {
- dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*dma_cfg),
- GFP_KERNEL);
- if (!dma_cfg) {
- stmmac_remove_config_dt(pdev, plat);
- return ERR_PTR(-ENOMEM);
- }
- plat->dma_cfg = dma_cfg;
- of_property_read_u32(np, "snps,pbl", &dma_cfg->pbl);
- dma_cfg->aal = of_property_read_bool(np, "snps,aal");
- dma_cfg->fixed_burst =
- of_property_read_bool(np, "snps,fixed-burst");
- dma_cfg->mixed_burst =
- of_property_read_bool(np, "snps,mixed-burst");
+ dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*dma_cfg),
+ GFP_KERNEL);
+ if (!dma_cfg) {
+ stmmac_remove_config_dt(pdev, plat);
+ return ERR_PTR(-ENOMEM);
}
+ plat->dma_cfg = dma_cfg;
+
+ of_property_read_u32(np, "snps,pbl", &dma_cfg->pbl);
+ if (!dma_cfg->pbl)
+ dma_cfg->pbl = DEFAULT_DMA_PBL;
+ of_property_read_u32(np, "snps,txpbl", &dma_cfg->txpbl);
+ of_property_read_u32(np, "snps,rxpbl", &dma_cfg->rxpbl);
+ dma_cfg->pblx8 = !of_property_read_bool(np, "snps,no-pbl-x8");
+
+ dma_cfg->aal = of_property_read_bool(np, "snps,aal");
+ dma_cfg->fixed_burst = of_property_read_bool(np, "snps,fixed-burst");
+ dma_cfg->mixed_burst = of_property_read_bool(np, "snps,mixed-burst");
+
plat->force_thresh_dma_mode = of_property_read_bool(np, "snps,force_thresh_dma_mode");
if (plat->force_thresh_dma_mode) {
plat->force_sf_dma_mode = 0;
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index bcd7b76dde9f..d73da8afe08e 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -37,6 +37,7 @@
#include <linux/of_device.h>
#include <linux/of_irq.h>
#include <linux/of_mdio.h>
+#include <linux/of_net.h>
#include <linux/of_platform.h>
#include <linux/of_address.h>
#include <linux/skbuff.h>
@@ -332,7 +333,7 @@ static void temac_do_set_mac_address(struct net_device *ndev)
mutex_unlock(&lp->indirect_mutex);
}
-static int temac_init_mac_address(struct net_device *ndev, void *address)
+static int temac_init_mac_address(struct net_device *ndev, const void *address)
{
memcpy(ndev->dev_addr, address, ETH_ALEN);
if (!is_valid_ether_addr(ndev->dev_addr))
@@ -982,7 +983,7 @@ static int temac_of_probe(struct platform_device *op)
struct net_device *ndev;
const void *addr;
__be32 *p;
- int size, rc = 0;
+ int rc = 0;
/* Init network device structure */
ndev = alloc_etherdev(sizeof(*lp));
@@ -1074,13 +1075,13 @@ static int temac_of_probe(struct platform_device *op)
/* Retrieve the MAC address */
- addr = of_get_property(op->dev.of_node, "local-mac-address", &size);
- if ((!addr) || (size != 6)) {
+ addr = of_get_mac_address(op->dev.of_node);
+ if (!addr) {
dev_err(&op->dev, "could not find MAC address\n");
rc = -ENODEV;
goto err_iounmap_2;
}
- temac_init_mac_address(ndev, (void *)addr);
+ temac_init_mac_address(ndev, addr);
rc = temac_mdio_setup(lp, op->dev.of_node);
if (rc)
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index c9c8a3be9f1b..b96e96919e31 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -25,6 +25,7 @@
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/of_mdio.h>
+#include <linux/of_net.h>
#include <linux/of_platform.h>
#include <linux/of_irq.h>
#include <linux/of_address.h>
@@ -292,7 +293,8 @@ out:
* This function is called to initialize the MAC address of the Axi Ethernet
* core. It writes to the UAW0 and UAW1 registers of the core.
*/
-static void axienet_set_mac_address(struct net_device *ndev, void *address)
+static void axienet_set_mac_address(struct net_device *ndev,
+ const void *address)
{
struct axienet_local *lp = netdev_priv(ndev);
@@ -1456,7 +1458,7 @@ static int axienet_probe(struct platform_device *pdev)
struct device_node *np;
struct axienet_local *lp;
struct net_device *ndev;
- u8 mac_addr[6];
+ const void *mac_addr;
struct resource *ethres, dmares;
u32 value;
@@ -1567,13 +1569,12 @@ static int axienet_probe(struct platform_device *pdev)
}
/* Retrieve the MAC address */
- ret = of_property_read_u8_array(pdev->dev.of_node,
- "local-mac-address", mac_addr, 6);
- if (ret) {
+ mac_addr = of_get_mac_address(pdev->dev.of_node);
+ if (!mac_addr) {
dev_err(&pdev->dev, "could not find MAC address\n");
goto free_netdev;
}
- axienet_set_mac_address(ndev, (void *)mac_addr);
+ axienet_set_mac_address(ndev, mac_addr);
lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD;
lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD;
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index cc00eb0db5d2..f83cf6696820 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -3346,19 +3346,18 @@ static struct net *macsec_get_link_net(const struct net_device *dev)
static size_t macsec_get_size(const struct net_device *dev)
{
- return 0 +
- nla_total_size_64bit(8) + /* SCI */
- nla_total_size(1) + /* ICV_LEN */
- nla_total_size_64bit(8) + /* CIPHER_SUITE */
- nla_total_size(4) + /* WINDOW */
- nla_total_size(1) + /* ENCODING_SA */
- nla_total_size(1) + /* ENCRYPT */
- nla_total_size(1) + /* PROTECT */
- nla_total_size(1) + /* INC_SCI */
- nla_total_size(1) + /* ES */
- nla_total_size(1) + /* SCB */
- nla_total_size(1) + /* REPLAY_PROTECT */
- nla_total_size(1) + /* VALIDATION */
+ return nla_total_size_64bit(8) + /* IFLA_MACSEC_SCI */
+ nla_total_size(1) + /* IFLA_MACSEC_ICV_LEN */
+ nla_total_size_64bit(8) + /* IFLA_MACSEC_CIPHER_SUITE */
+ nla_total_size(4) + /* IFLA_MACSEC_WINDOW */
+ nla_total_size(1) + /* IFLA_MACSEC_ENCODING_SA */
+ nla_total_size(1) + /* IFLA_MACSEC_ENCRYPT */
+ nla_total_size(1) + /* IFLA_MACSEC_PROTECT */
+ nla_total_size(1) + /* IFLA_MACSEC_INC_SCI */
+ nla_total_size(1) + /* IFLA_MACSEC_ES */
+ nla_total_size(1) + /* IFLA_MACSEC_SCB */
+ nla_total_size(1) + /* IFLA_MACSEC_REPLAY_PROTECT */
+ nla_total_size(1) + /* IFLA_MACSEC_VALIDATION */
0;
}
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index aeaf1bcb12d0..32fa7c76f29c 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -235,6 +235,53 @@ int phy_register_fixup_for_id(const char *bus_id,
}
EXPORT_SYMBOL(phy_register_fixup_for_id);
+/**
+ * phy_unregister_fixup - remove a phy_fixup from the list
+ * @bus_id: A string matches fixup->bus_id (or PHY_ANY_ID) in phy_fixup_list
+ * @phy_uid: A phy id matches fixup->phy_id (or PHY_ANY_UID) in phy_fixup_list
+ * @phy_uid_mask: Applied to phy_uid and fixup->phy_uid before comparison
+ */
+int phy_unregister_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask)
+{
+ struct list_head *pos, *n;
+ struct phy_fixup *fixup;
+ int ret;
+
+ ret = -ENODEV;
+
+ mutex_lock(&phy_fixup_lock);
+ list_for_each_safe(pos, n, &phy_fixup_list) {
+ fixup = list_entry(pos, struct phy_fixup, list);
+
+ if ((!strcmp(fixup->bus_id, bus_id)) &&
+ ((fixup->phy_uid & phy_uid_mask) ==
+ (phy_uid & phy_uid_mask))) {
+ list_del(&fixup->list);
+ kfree(fixup);
+ ret = 0;
+ break;
+ }
+ }
+ mutex_unlock(&phy_fixup_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL(phy_unregister_fixup);
+
+/* Unregisters a fixup of any PHY with the UID in phy_uid */
+int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask)
+{
+ return phy_unregister_fixup(PHY_ANY_ID, phy_uid, phy_uid_mask);
+}
+EXPORT_SYMBOL(phy_unregister_fixup_for_uid);
+
+/* Unregisters a fixup of the PHY with id string bus_id */
+int phy_unregister_fixup_for_id(const char *bus_id)
+{
+ return phy_unregister_fixup(bus_id, PHY_ANY_UID, 0xffffffff);
+}
+EXPORT_SYMBOL(phy_unregister_fixup_for_id);
+
/* Returns 1 if fixup matches phydev in bus_id and phy_uid.
* Fixups can be set to match any in one or more fields.
*/
diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig
index cdde59089f72..3dd490f53e48 100644
--- a/drivers/net/usb/Kconfig
+++ b/drivers/net/usb/Kconfig
@@ -114,6 +114,11 @@ config USB_LAN78XX
help
This option adds support for Microchip LAN78XX based USB 2
& USB 3 10/100/1000 Ethernet adapters.
+ LAN7800 : USB 3 to 10/100/1000 Ethernet adapter
+ LAN7850 : USB 2 to 10/100/1000 Ethernet adapter
+ LAN7801 : USB 3 to 10/100/1000 Ethernet adapter (MAC only)
+
+ Proper PHY driver is required for LAN7801.
To compile this driver as a module, choose M here: the
module will be called lan78xx.
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 019f758953fc..08f8703e4d54 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -40,7 +40,7 @@
#define DRIVER_AUTHOR "WOOJUNG HUH <woojung.huh@microchip.com>"
#define DRIVER_DESC "LAN78XX USB 3.0 Gigabit Ethernet Devices"
#define DRIVER_NAME "lan78xx"
-#define DRIVER_VERSION "1.0.5"
+#define DRIVER_VERSION "1.0.6"
#define TX_TIMEOUT_JIFFIES (5 * HZ)
#define THROTTLE_JIFFIES (HZ / 8)
@@ -67,6 +67,7 @@
#define LAN78XX_USB_VENDOR_ID (0x0424)
#define LAN7800_USB_PRODUCT_ID (0x7800)
#define LAN7850_USB_PRODUCT_ID (0x7850)
+#define LAN7801_USB_PRODUCT_ID (0x7801)
#define LAN78XX_EEPROM_MAGIC (0x78A5)
#define LAN78XX_OTP_MAGIC (0x78F3)
@@ -390,6 +391,7 @@ struct lan78xx_net {
u32 chipid;
u32 chiprev;
struct mii_bus *mdiobus;
+ phy_interface_t interface;
int fc_autoneg;
u8 fc_request_control;
@@ -400,6 +402,10 @@ struct lan78xx_net {
struct irq_domain_data domain_data;
};
+/* define external phy id */
+#define PHY_LAN8835 (0x0007C130)
+#define PHY_KSZ9031RNX (0x00221620)
+
/* use ethtool to change the level for any given device */
static int msg_level = -1;
module_param(msg_level, int, 0);
@@ -1697,6 +1703,7 @@ static int lan78xx_mdiobus_read(struct mii_bus *bus, int phy_id, int idx)
done:
mutex_unlock(&dev->phy_mutex);
usb_autopm_put_interface(dev->intf);
+
return ret;
}
@@ -1759,6 +1766,10 @@ static int lan78xx_mdio_init(struct lan78xx_net *dev)
/* set to internal PHY id */
dev->mdiobus->phy_mask = ~(1 << 1);
break;
+ case ID_REV_CHIP_ID_7801_:
+ /* scan thru PHYAD[2..0] */
+ dev->mdiobus->phy_mask = ~(0xFF);
+ break;
}
ret = mdiobus_register(dev->mdiobus);
@@ -1933,6 +1944,47 @@ static void lan78xx_remove_irq_domain(struct lan78xx_net *dev)
dev->domain_data.irqdomain = NULL;
}
+static int lan8835_fixup(struct phy_device *phydev)
+{
+ int buf;
+ int ret;
+ struct lan78xx_net *dev = netdev_priv(phydev->attached_dev);
+
+ /* LED2/PME_N/IRQ_N/RGMII_ID pin to IRQ_N mode */
+ buf = phy_read_mmd_indirect(phydev, 0x8010, 3);
+ buf &= ~0x1800;
+ buf |= 0x0800;
+ phy_write_mmd_indirect(phydev, 0x8010, 3, buf);
+
+ /* RGMII MAC TXC Delay Enable */
+ ret = lan78xx_write_reg(dev, MAC_RGMII_ID,
+ MAC_RGMII_ID_TXC_DELAY_EN_);
+
+ /* RGMII TX DLL Tune Adjust */
+ ret = lan78xx_write_reg(dev, RGMII_TX_BYP_DLL, 0x3D00);
+
+ dev->interface = PHY_INTERFACE_MODE_RGMII_TXID;
+
+ return 1;
+}
+
+static int ksz9031rnx_fixup(struct phy_device *phydev)
+{
+ struct lan78xx_net *dev = netdev_priv(phydev->attached_dev);
+
+ /* Micrel9301RNX PHY configuration */
+ /* RGMII Control Signal Pad Skew */
+ phy_write_mmd_indirect(phydev, 4, 2, 0x0077);
+ /* RGMII RX Data Pad Skew */
+ phy_write_mmd_indirect(phydev, 5, 2, 0x7777);
+ /* RGMII RX Clock Pad Skew */
+ phy_write_mmd_indirect(phydev, 8, 2, 0x1FF);
+
+ dev->interface = PHY_INTERFACE_MODE_RGMII_RXID;
+
+ return 1;
+}
+
static int lan78xx_phy_init(struct lan78xx_net *dev)
{
int ret;
@@ -1945,6 +1997,42 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
return -EIO;
}
+ if ((dev->chipid == ID_REV_CHIP_ID_7800_) ||
+ (dev->chipid == ID_REV_CHIP_ID_7850_)) {
+ phydev->is_internal = true;
+ dev->interface = PHY_INTERFACE_MODE_GMII;
+
+ } else if (dev->chipid == ID_REV_CHIP_ID_7801_) {
+ if (!phydev->drv) {
+ netdev_err(dev->net, "no PHY driver found\n");
+ return -EIO;
+ }
+
+ dev->interface = PHY_INTERFACE_MODE_RGMII;
+
+ /* external PHY fixup for KSZ9031RNX */
+ ret = phy_register_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0,
+ ksz9031rnx_fixup);
+ if (ret < 0) {
+ netdev_err(dev->net, "fail to register fixup\n");
+ return ret;
+ }
+ /* external PHY fixup for LAN8835 */
+ ret = phy_register_fixup_for_uid(PHY_LAN8835, 0xfffffff0,
+ lan8835_fixup);
+ if (ret < 0) {
+ netdev_err(dev->net, "fail to register fixup\n");
+ return ret;
+ }
+ /* add more external PHY fixup here if needed */
+
+ phydev->is_internal = false;
+ } else {
+ netdev_err(dev->net, "unknown ID found\n");
+ ret = -EIO;
+ goto error;
+ }
+
/* if phyirq is not set, use polling mode in phylib */
if (dev->domain_data.phyirq > 0)
phydev->irq = dev->domain_data.phyirq;
@@ -1957,7 +2045,7 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
ret = phy_connect_direct(dev->net, phydev,
lan78xx_link_status_change,
- PHY_INTERFACE_MODE_GMII);
+ dev->interface);
if (ret) {
netdev_err(dev->net, "can't attach PHY to %s\n",
dev->mdiobus->id);
@@ -1982,6 +2070,12 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
netif_dbg(dev, ifup, dev->net, "phy initialised successfully");
return 0;
+
+error:
+ phy_unregister_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0);
+ phy_unregister_fixup_for_uid(PHY_LAN8835, 0xfffffff0);
+
+ return ret;
}
static int lan78xx_set_rx_max_frame_length(struct lan78xx_net *dev, int size)
@@ -2338,6 +2432,9 @@ static int lan78xx_reset(struct lan78xx_net *dev)
} while ((buf & PMT_CTL_PHY_RST_) || !(buf & PMT_CTL_READY_));
ret = lan78xx_read_reg(dev, MAC_CR, &buf);
+ /* LAN7801 only has RGMII mode */
+ if (dev->chipid == ID_REV_CHIP_ID_7801_)
+ buf &= ~MAC_CR_GMII_EN_;
buf |= MAC_CR_AUTO_DUPLEX_ | MAC_CR_AUTO_SPEED_;
ret = lan78xx_write_reg(dev, MAC_CR, buf);
@@ -2464,8 +2561,12 @@ static int lan78xx_stop(struct net_device *net)
if (timer_pending(&dev->stat_monitor))
del_timer_sync(&dev->stat_monitor);
+ phy_unregister_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0);
+ phy_unregister_fixup_for_uid(PHY_LAN8835, 0xfffffff0);
+
phy_stop(net->phydev);
phy_disconnect(net->phydev);
+
net->phydev = NULL;
clear_bit(EVENT_DEV_OPEN, &dev->flags);
@@ -3888,6 +3989,10 @@ static const struct usb_device_id products[] = {
/* LAN7850 USB Gigabit Ethernet Device */
USB_DEVICE(LAN78XX_USB_VENDOR_ID, LAN7850_USB_PRODUCT_ID),
},
+ {
+ /* LAN7801 USB Gigabit Ethernet Device */
+ USB_DEVICE(LAN78XX_USB_VENDOR_ID, LAN7801_USB_PRODUCT_ID),
+ },
{},
};
MODULE_DEVICE_TABLE(usb, products);
diff --git a/drivers/net/usb/lan78xx.h b/drivers/net/usb/lan78xx.h
index 40927906109a..25aa54611774 100644
--- a/drivers/net/usb/lan78xx.h
+++ b/drivers/net/usb/lan78xx.h
@@ -108,6 +108,7 @@
#define ID_REV_CHIP_REV_MASK_ (0x0000FFFF)
#define ID_REV_CHIP_ID_7800_ (0x7800)
#define ID_REV_CHIP_ID_7850_ (0x7850)
+#define ID_REV_CHIP_ID_7801_ (0x7801)
#define FPGA_REV (0x04)
#define FPGA_REV_MINOR_MASK_ (0x0000FF00)
@@ -550,6 +551,7 @@
#define LTM_INACTIVE1_TIMER10_ (0x0000FFFF)
#define MAC_CR (0x100)
+#define MAC_CR_GMII_EN_ (0x00080000)
#define MAC_CR_EEE_TX_CLK_STOP_EN_ (0x00040000)
#define MAC_CR_EEE_EN_ (0x00020000)
#define MAC_CR_EEE_TLAR_EN_ (0x00010000)
@@ -787,6 +789,18 @@
#define PHY_DEV_ID_MODEL_MASK_ (0x0FC00000)
#define PHY_DEV_ID_OUI_MASK_ (0x003FFFFF)
+#define RGMII_TX_BYP_DLL (0x708)
+#define RGMII_TX_BYP_DLL_TX_TUNE_ADJ_MASK_ (0x000FC00)
+#define RGMII_TX_BYP_DLL_TX_TUNE_SEL_MASK_ (0x00003F0)
+#define RGMII_TX_BYP_DLL_TX_DLL_RESET_ (0x0000002)
+#define RGMII_TX_BYP_DLL_TX_DLL_BYPASS_ (0x0000001)
+
+#define RGMII_RX_BYP_DLL (0x70C)
+#define RGMII_RX_BYP_DLL_RX_TUNE_ADJ_MASK_ (0x000FC00)
+#define RGMII_RX_BYP_DLL_RX_TUNE_SEL_MASK_ (0x00003F0)
+#define RGMII_RX_BYP_DLL_RX_DLL_RESET_ (0x0000002)
+#define RGMII_RX_BYP_DLL_RX_DLL_BYPASS_ (0x0000001)
+
#define OTP_BASE_ADDR (0x00001000)
#define OTP_ADDR_RANGE_ (0x1FF)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index c6f2d89c0e97..266354390c8f 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -261,8 +261,8 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
/* We can only add the work to the list after we're
* sure it was not in the list.
+ * test_and_set_bit() implies a memory barrier.
*/
- smp_mb();
llist_add(&work->node, &dev->work_list);
wake_up_process(dev->worker);
}
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index e3b30ea9ece5..9c3c68b9a49e 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -195,7 +195,6 @@ static int
vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
{
struct vhost_vsock *vsock;
- struct vhost_virtqueue *vq;
int len = pkt->len;
/* Find the vhost_vsock according to guest context id */
@@ -205,8 +204,6 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
return -ENODEV;
}
- vq = &vsock->vqs[VSOCK_VQ_RX];
-
if (pkt->reply)
atomic_inc(&vsock->queued_replies);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 7453c1281531..a13b031dc6b8 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -18,13 +18,6 @@
struct bpf_reg_state {
enum bpf_reg_type type;
- /*
- * Used to determine if any memory access using this register will
- * result in a bad access.
- */
- s64 min_value;
- u64 max_value;
- u32 id;
union {
/* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
s64 imm;
@@ -40,6 +33,13 @@ struct bpf_reg_state {
*/
struct bpf_map *map_ptr;
};
+ u32 id;
+ /* Used to determine if any memory access using this register will
+ * result in a bad access. These two fields must be last.
+ * See states_equal()
+ */
+ s64 min_value;
+ u64 max_value;
};
enum bpf_stack_slot_type {
diff --git a/include/linux/filter.h b/include/linux/filter.h
index f078d2b1cff6..6a1658308612 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -406,7 +406,8 @@ struct bpf_prog {
u16 jited:1, /* Is our filter JIT'ed? */
gpl_compatible:1, /* Is filter GPL compatible? */
cb_access:1, /* Is control block accessed? */
- dst_needed:1; /* Do we need dst entry? */
+ dst_needed:1, /* Do we need dst entry? */
+ xdp_adjust_head:1; /* Adjusting pkt head? */
kmemcheck_bitfield_end(meta);
enum bpf_prog_type type; /* Type of BPF program */
u32 len; /* Number of filter blocks */
@@ -440,6 +441,7 @@ struct bpf_skb_data_end {
struct xdp_buff {
void *data;
void *data_end;
+ void *data_hard_start;
};
/* compute the linear packet data range [data, data_end) which
@@ -595,7 +597,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
-bool bpf_helper_changes_skb_data(void *func);
+bool bpf_helper_changes_pkt_data(void *func);
struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
const struct bpf_insn *patch, u32 len);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1ff5ea6e1221..994f7423a74b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -192,6 +192,7 @@ struct net_device_stats {
#ifdef CONFIG_RPS
#include <linux/static_key.h>
extern struct static_key rps_needed;
+extern struct static_key rfs_needed;
#endif
struct neighbour;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index feb8a98e8dd3..f7d95f644eed 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -860,6 +860,10 @@ int phy_register_fixup_for_id(const char *bus_id,
int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask,
int (*run)(struct phy_device *));
+int phy_unregister_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask);
+int phy_unregister_fixup_for_id(const char *bus_id);
+int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask);
+
int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable);
int phy_get_eee_err(struct phy_device *phydev);
int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_eee *data);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9c535fbccf2c..0cd92b0f2af5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1966,6 +1966,8 @@ static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
return __pskb_pull_tail(skb, len - skb_headlen(skb)) != NULL;
}
+void skb_condense(struct sk_buff *skb);
+
/**
* skb_headroom - bytes at buffer head
* @skb: buffer to check
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 3537fb33cc90..266dab9ad782 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -88,6 +88,9 @@ struct stmmac_mdio_bus_data {
struct stmmac_dma_cfg {
int pbl;
+ int txpbl;
+ int rxpbl;
+ bool pblx8;
int fixed_burst;
int mixed_burst;
bool aal;
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index c4f31666afd2..d896a33e00d4 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -104,6 +104,22 @@ struct flow_dissector_key_ports {
};
};
+/**
+ * flow_dissector_key_icmp:
+ * @ports: type and code of ICMP header
+ * icmp: ICMP type (high) and code (low)
+ * type: ICMP type
+ * code: ICMP code
+ */
+struct flow_dissector_key_icmp {
+ union {
+ __be16 icmp;
+ struct {
+ u8 type;
+ u8 code;
+ };
+ };
+};
/**
* struct flow_dissector_key_eth_addrs:
@@ -122,6 +138,7 @@ enum flow_dissector_key_id {
FLOW_DISSECTOR_KEY_IPV4_ADDRS, /* struct flow_dissector_key_ipv4_addrs */
FLOW_DISSECTOR_KEY_IPV6_ADDRS, /* struct flow_dissector_key_ipv6_addrs */
FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */
+ FLOW_DISSECTOR_KEY_ICMP, /* struct flow_dissector_key_icmp */
FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */
FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */
FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_flow_vlan */
diff --git a/include/net/sock.h b/include/net/sock.h
index 1749e38d0301..e17aa3de2b4d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -913,17 +913,20 @@ static inline void sock_rps_record_flow_hash(__u32 hash)
static inline void sock_rps_record_flow(const struct sock *sk)
{
#ifdef CONFIG_RPS
- /* Reading sk->sk_rxhash might incur an expensive cache line miss.
- *
- * TCP_ESTABLISHED does cover almost all states where RFS
- * might be useful, and is cheaper [1] than testing :
- * IPv4: inet_sk(sk)->inet_daddr
- * IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
- * OR an additional socket flag
- * [1] : sk_state and sk_prot are in the same cache line.
- */
- if (sk->sk_state == TCP_ESTABLISHED)
- sock_rps_record_flow_hash(sk->sk_rxhash);
+ if (static_key_false(&rfs_needed)) {
+ /* Reading sk->sk_rxhash might incur an expensive cache line
+ * miss.
+ *
+ * TCP_ESTABLISHED does cover almost all states where RFS
+ * might be useful, and is cheaper [1] than testing :
+ * IPv4: inet_sk(sk)->inet_daddr
+ * IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
+ * OR an additional socket flag
+ * [1] : sk_state and sk_prot are in the same cache line.
+ */
+ if (sk->sk_state == TCP_ESTABLISHED)
+ sock_rps_record_flow_hash(sk->sk_rxhash);
+ }
#endif
}
@@ -2160,7 +2163,8 @@ struct sock_skb_cb {
static inline void
sock_skb_set_dropcount(const struct sock *sk, struct sk_buff *skb)
{
- SOCK_SKB_CB(skb)->dropcount = atomic_read(&sk->sk_drops);
+ SOCK_SKB_CB(skb)->dropcount = sock_flag(sk, SOCK_RXQ_OVFL) ?
+ atomic_read(&sk->sk_drops) : 0;
}
static inline void sk_drops_add(struct sock *sk, const struct sk_buff *skb)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6123d9b8e828..0eb0e87dbe9f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -424,6 +424,12 @@ union bpf_attr {
* @len: length of header to be pushed in front
* @flags: Flags (unused for now)
* Return: 0 on success or negative error
+ *
+ * int bpf_xdp_adjust_head(xdp_md, delta)
+ * Adjust the xdp_md.data by delta
+ * @xdp_md: pointer to xdp_md
+ * @delta: An positive/negative integer to be added to xdp_md.data
+ * Return: 0 on success or negative on error
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -469,7 +475,8 @@ union bpf_attr {
FN(csum_update), \
FN(set_hash_invalid), \
FN(get_numa_node_id), \
- FN(skb_change_head),
+ FN(skb_change_head), \
+ FN(xdp_adjust_head),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -576,6 +583,8 @@ struct bpf_sock {
__u32 protocol;
};
+#define XDP_PACKET_HEADROOM 256
+
/* User return codes for XDP prog type.
* A valid XDP program must return one of these defined values. All other
* return codes are reserved for future use. Unknown return codes will result
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 1adc0b654996..cb4bcdc58543 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -458,11 +458,28 @@ enum {
TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, /* be16 */
TCA_FLOWER_KEY_ENC_UDP_DST_PORT, /* be16 */
TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, /* be16 */
+
+ TCA_FLOWER_KEY_FLAGS, /* be32 */
+ TCA_FLOWER_KEY_FLAGS_MASK, /* be32 */
+
+ TCA_FLOWER_KEY_ICMPV4_CODE, /* u8 */
+ TCA_FLOWER_KEY_ICMPV4_CODE_MASK,/* u8 */
+ TCA_FLOWER_KEY_ICMPV4_TYPE, /* u8 */
+ TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,/* u8 */
+ TCA_FLOWER_KEY_ICMPV6_CODE, /* u8 */
+ TCA_FLOWER_KEY_ICMPV6_CODE_MASK,/* u8 */
+ TCA_FLOWER_KEY_ICMPV6_TYPE, /* u8 */
+ TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,/* u8 */
+
__TCA_FLOWER_MAX,
};
#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
+enum {
+ TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
+};
+
/* Match-all classifier */
enum {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index bdcc9f4ba767..83e0d153b0b4 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1143,7 +1143,7 @@ struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog)
return prog;
}
-bool __weak bpf_helper_changes_skb_data(void *func)
+bool __weak bpf_helper_changes_pkt_data(void *func)
{
return false;
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 88f609f1c0c3..4819ec9d95f6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -579,6 +579,8 @@ static void fixup_bpf_calls(struct bpf_prog *prog)
prog->dst_needed = 1;
if (insn->imm == BPF_FUNC_get_prandom_u32)
bpf_user_rnd_init_once();
+ if (insn->imm == BPF_FUNC_xdp_adjust_head)
+ prog->xdp_adjust_head = 1;
if (insn->imm == BPF_FUNC_tail_call) {
/* mark bpf_tail_call as different opcode
* to avoid conditional branch in
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index da9fb2a9b7eb..d28f9a3380a9 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1216,7 +1216,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id)
return -EINVAL;
}
- changes_data = bpf_helper_changes_skb_data(fn->func);
+ changes_data = bpf_helper_changes_pkt_data(fn->func);
memset(&meta, 0, sizeof(meta));
meta.pkt_access = fn->pkt_access;
@@ -2528,7 +2528,7 @@ static bool states_equal(struct bpf_verifier_env *env,
* we didn't do a variable access into a map then we are a-ok.
*/
if (!varlen_map_access &&
- rold->type == rcur->type && rold->imm == rcur->imm)
+ memcmp(rold, rcur, offsetofend(struct bpf_reg_state, id)) == 0)
continue;
/* If we didn't map access then again we don't care about the
diff --git a/net/core/dev.c b/net/core/dev.c
index bffb5253e778..1d33ce03365f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3447,6 +3447,8 @@ EXPORT_SYMBOL(rps_cpu_mask);
struct static_key rps_needed __read_mostly;
EXPORT_SYMBOL(rps_needed);
+struct static_key rfs_needed __read_mostly;
+EXPORT_SYMBOL(rfs_needed);
static struct rps_dev_flow *
set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
diff --git a/net/core/filter.c b/net/core/filter.c
index b751202e12f8..b1461708a977 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2234,7 +2234,28 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
.arg3_type = ARG_ANYTHING,
};
-bool bpf_helper_changes_skb_data(void *func)
+BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
+{
+ void *data = xdp->data + offset;
+
+ if (unlikely(data < xdp->data_hard_start ||
+ data > xdp->data_end - ETH_HLEN))
+ return -EINVAL;
+
+ xdp->data = data;
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
+ .func = bpf_xdp_adjust_head,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
+bool bpf_helper_changes_pkt_data(void *func)
{
if (func == bpf_skb_vlan_push ||
func == bpf_skb_vlan_pop ||
@@ -2244,7 +2265,8 @@ bool bpf_helper_changes_skb_data(void *func)
func == bpf_skb_change_tail ||
func == bpf_skb_pull_data ||
func == bpf_l3_csum_replace ||
- func == bpf_l4_csum_replace)
+ func == bpf_l4_csum_replace ||
+ func == bpf_xdp_adjust_head)
return true;
return false;
@@ -2670,6 +2692,8 @@ xdp_func_proto(enum bpf_func_id func_id)
return &bpf_xdp_event_output_proto;
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
+ case BPF_FUNC_xdp_adjust_head:
+ return &bpf_xdp_adjust_head_proto;
default:
return sk_filter_func_proto(func_id);
}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 1eb6f949e5b2..d6447dc10371 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -58,6 +58,28 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
EXPORT_SYMBOL(skb_flow_dissector_init);
/**
+ * skb_flow_get_be16 - extract be16 entity
+ * @skb: sk_buff to extract from
+ * @poff: offset to extract at
+ * @data: raw buffer pointer to the packet
+ * @hlen: packet header length
+ *
+ * The function will try to retrieve a be32 entity at
+ * offset poff
+ */
+__be16 skb_flow_get_be16(const struct sk_buff *skb, int poff, void *data,
+ int hlen)
+{
+ __be16 *u, _u;
+
+ u = __skb_header_pointer(skb, poff, sizeof(_u), data, hlen, &_u);
+ if (u)
+ return *u;
+
+ return 0;
+}
+
+/**
* __skb_flow_get_ports - extract the upper layer ports and return them
* @skb: sk_buff to extract the ports from
* @thoff: transport header offset
@@ -117,6 +139,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
struct flow_dissector_key_basic *key_basic;
struct flow_dissector_key_addrs *key_addrs;
struct flow_dissector_key_ports *key_ports;
+ struct flow_dissector_key_icmp *key_icmp;
struct flow_dissector_key_tags *key_tags;
struct flow_dissector_key_vlan *key_vlan;
struct flow_dissector_key_keyid *key_keyid;
@@ -546,6 +569,14 @@ ip_proto_again:
data, hlen);
}
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ICMP)) {
+ key_icmp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ICMP,
+ target_container);
+ key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen);
+ }
+
out_good:
ret = true;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b45cd1494243..84151cf40aeb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4931,3 +4931,31 @@ struct sk_buff *pskb_extract(struct sk_buff *skb, int off,
return clone;
}
EXPORT_SYMBOL(pskb_extract);
+
+/**
+ * skb_condense - try to get rid of fragments/frag_list if possible
+ * @skb: buffer
+ *
+ * Can be used to save memory before skb is added to a busy queue.
+ * If packet has bytes in frags and enough tail room in skb->head,
+ * pull all of them, so that we can free the frags right now and adjust
+ * truesize.
+ * Notes:
+ * We do not reallocate skb->head thus can not fail.
+ * Caller must re-evaluate skb->truesize if needed.
+ */
+void skb_condense(struct sk_buff *skb)
+{
+ if (!skb->data_len ||
+ skb->data_len > skb->end - skb->tail ||
+ skb_cloned(skb))
+ return;
+
+ /* Nice, we can free page frag(s) right now */
+ __pskb_pull_tail(skb, skb->data_len);
+
+ /* Now adjust skb->truesize, since __pskb_pull_tail() does
+ * not do this.
+ */
+ skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
+}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 0df2aa652530..2a46e4009f62 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -79,10 +79,13 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
if (sock_table != orig_sock_table) {
rcu_assign_pointer(rps_sock_flow_table, sock_table);
- if (sock_table)
+ if (sock_table) {
static_key_slow_inc(&rps_needed);
+ static_key_slow_inc(&rfs_needed);
+ }
if (orig_sock_table) {
static_key_slow_dec(&rps_needed);
+ static_key_slow_dec(&rfs_needed);
synchronize_rcu();
vfree(orig_sock_table);
}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 691146abde2d..f79d7a8ab1c6 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1047,12 +1047,12 @@ int icmp_rcv(struct sk_buff *skb)
if (success) {
consume_skb(skb);
- return 0;
+ return NET_RX_SUCCESS;
}
drop:
kfree_skb(skb);
- return 0;
+ return NET_RX_DROP;
csum_error:
__ICMP_INC_STATS(net, ICMP_MIB_CSUMERRORS);
error:
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 16d88ba9ff1c..f5628ada47b5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1199,7 +1199,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
{
struct sk_buff_head *list = &sk->sk_receive_queue;
int rmem, delta, amt, err = -ENOMEM;
- int size = skb->truesize;
+ int size;
/* try to avoid the costly atomic add/sub pair when the receive
* queue is full; always allow at least a packet
@@ -1208,6 +1208,16 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
if (rmem > sk->sk_rcvbuf)
goto drop;
+ /* Under mem pressure, it might be helpful to help udp_recvmsg()
+ * having linear skbs :
+ * - Reduce memory overhead and thus increase receive queue capacity
+ * - Less cache line misses at copyout() time
+ * - Less work at consume_skb() (less alien page frag freeing)
+ */
+ if (rmem > (sk->sk_rcvbuf >> 1))
+ skb_condense(skb);
+ size = skb->truesize;
+
/* we drop only if the receive buf is full and the receive
* queue contains some other skb
*/
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 29a9e6d9f274..e040c5140f61 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -39,6 +39,7 @@ struct fl_flow_key {
struct flow_dissector_key_ipv6_addrs ipv6;
};
struct flow_dissector_key_ports tp;
+ struct flow_dissector_key_icmp icmp;
struct flow_dissector_key_keyid enc_key_id;
union {
struct flow_dissector_key_ipv4_addrs enc_ipv4;
@@ -386,6 +387,16 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK] = { .type = NLA_U16 },
+ [TCA_FLOWER_KEY_FLAGS] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_FLAGS_MASK] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_ICMPV4_TYPE] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV4_TYPE_MASK] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV4_CODE] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV4_CODE_MASK] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV6_TYPE] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV6_TYPE_MASK] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV6_CODE] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ICMPV6_CODE_MASK] = { .type = NLA_U8 },
};
static void fl_set_key_val(struct nlattr **tb,
@@ -420,6 +431,39 @@ static void fl_set_key_vlan(struct nlattr **tb,
}
}
+static void fl_set_key_flag(u32 flower_key, u32 flower_mask,
+ u32 *dissector_key, u32 *dissector_mask,
+ u32 flower_flag_bit, u32 dissector_flag_bit)
+{
+ if (flower_mask & flower_flag_bit) {
+ *dissector_mask |= dissector_flag_bit;
+ if (flower_key & flower_flag_bit)
+ *dissector_key |= dissector_flag_bit;
+ }
+}
+
+static void fl_set_key_flags(struct nlattr **tb,
+ u32 *flags_key, u32 *flags_mask)
+{
+ u32 key, mask;
+
+ if (!tb[TCA_FLOWER_KEY_FLAGS])
+ return;
+
+ key = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS]));
+
+ if (!tb[TCA_FLOWER_KEY_FLAGS_MASK])
+ mask = ~0;
+ else
+ mask = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS_MASK]));
+
+ *flags_key = 0;
+ *flags_mask = 0;
+
+ fl_set_key_flag(key, mask, flags_key, flags_mask,
+ TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
+}
+
static int fl_set_key(struct net *net, struct nlattr **tb,
struct fl_flow_key *key, struct fl_flow_key *mask)
{
@@ -502,6 +546,26 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST,
&mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
sizeof(key->tp.dst));
+ } else if (key->basic.n_proto == htons(ETH_P_IP) &&
+ key->basic.ip_proto == IPPROTO_ICMP) {
+ fl_set_key_val(tb, &key->icmp.type, TCA_FLOWER_KEY_ICMPV4_TYPE,
+ &mask->icmp.type,
+ TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,
+ sizeof(key->icmp.type));
+ fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV4_CODE,
+ &mask->icmp.code,
+ TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
+ sizeof(key->icmp.code));
+ } else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
+ key->basic.ip_proto == IPPROTO_ICMPV6) {
+ fl_set_key_val(tb, &key->icmp.type, TCA_FLOWER_KEY_ICMPV6_TYPE,
+ &mask->icmp.type,
+ TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,
+ sizeof(key->icmp.type));
+ fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV4_CODE,
+ &mask->icmp.code,
+ TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
+ sizeof(key->icmp.code));
}
if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
@@ -546,6 +610,8 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
&mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
sizeof(key->enc_tp.dst));
+ fl_set_key_flags(tb, &key->control.flags, &mask->control.flags);
+
return 0;
}
@@ -612,6 +678,8 @@ static void fl_init_dissector(struct cls_fl_head *head,
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_PORTS, tp);
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+ FLOW_DISSECTOR_KEY_ICMP, icmp);
+ FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_VLAN, vlan);
FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
@@ -880,6 +948,42 @@ static int fl_dump_key_vlan(struct sk_buff *skb,
return 0;
}
+static void fl_get_key_flag(u32 dissector_key, u32 dissector_mask,
+ u32 *flower_key, u32 *flower_mask,
+ u32 flower_flag_bit, u32 dissector_flag_bit)
+{
+ if (dissector_mask & dissector_flag_bit) {
+ *flower_mask |= flower_flag_bit;
+ if (dissector_key & dissector_flag_bit)
+ *flower_key |= flower_flag_bit;
+ }
+}
+
+static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
+{
+ u32 key, mask;
+ __be32 _key, _mask;
+ int err;
+
+ if (!memchr_inv(&flags_mask, 0, sizeof(flags_mask)))
+ return 0;
+
+ key = 0;
+ mask = 0;
+
+ fl_get_key_flag(flags_key, flags_mask, &key, &mask,
+ TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
+
+ _key = cpu_to_be32(key);
+ _mask = cpu_to_be32(mask);
+
+ err = nla_put(skb, TCA_FLOWER_KEY_FLAGS, 4, &_key);
+ if (err)
+ return err;
+
+ return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);
+}
+
static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
struct sk_buff *skb, struct tcmsg *t)
{
@@ -977,6 +1081,28 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
&mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
sizeof(key->tp.dst))))
goto nla_put_failure;
+ else if (key->basic.n_proto == htons(ETH_P_IP) &&
+ key->basic.ip_proto == IPPROTO_ICMP &&
+ (fl_dump_key_val(skb, &key->icmp.type,
+ TCA_FLOWER_KEY_ICMPV4_TYPE, &mask->icmp.type,
+ TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,
+ sizeof(key->icmp.type)) ||
+ fl_dump_key_val(skb, &key->icmp.code,
+ TCA_FLOWER_KEY_ICMPV4_CODE, &mask->icmp.code,
+ TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
+ sizeof(key->icmp.code))))
+ goto nla_put_failure;
+ else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
+ key->basic.ip_proto == IPPROTO_ICMPV6 &&
+ (fl_dump_key_val(skb, &key->icmp.type,
+ TCA_FLOWER_KEY_ICMPV6_TYPE, &mask->icmp.type,
+ TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,
+ sizeof(key->icmp.type)) ||
+ fl_dump_key_val(skb, &key->icmp.code,
+ TCA_FLOWER_KEY_ICMPV6_CODE, &mask->icmp.code,
+ TCA_FLOWER_KEY_ICMPV6_CODE_MASK,
+ sizeof(key->icmp.code))))
+ goto nla_put_failure;
if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
(fl_dump_key_val(skb, &key->enc_ipv4.src,
@@ -1015,6 +1141,9 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
sizeof(key->enc_tp.dst)))
goto nla_put_failure;
+ if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))
+ goto nla_put_failure;
+
nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags);
if (tcf_exts_dump(skb, &f->exts))
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 00cd3081c038..f2219c1489e5 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -33,6 +33,7 @@ hostprogs-y += trace_event
hostprogs-y += sampleip
hostprogs-y += tc_l2_redirect
hostprogs-y += lwt_len_hist
+hostprogs-y += xdp_tx_iptunnel
test_lru_dist-objs := test_lru_dist.o libbpf.o
sock_example-objs := sock_example.o libbpf.o
@@ -67,6 +68,7 @@ trace_event-objs := bpf_load.o libbpf.o trace_event_user.o
sampleip-objs := bpf_load.o libbpf.o sampleip_user.o
tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o
lwt_len_hist-objs := bpf_load.o libbpf.o lwt_len_hist_user.o
+xdp_tx_iptunnel-objs := bpf_load.o libbpf.o xdp_tx_iptunnel_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -99,6 +101,7 @@ always += test_current_task_under_cgroup_kern.o
always += trace_event_kern.o
always += sampleip_kern.o
always += lwt_len_hist_kern.o
+always += xdp_tx_iptunnel_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
@@ -129,6 +132,7 @@ HOSTLOADLIBES_trace_event += -lelf
HOSTLOADLIBES_sampleip += -lelf
HOSTLOADLIBES_tc_l2_redirect += -l elf
HOSTLOADLIBES_lwt_len_hist += -l elf
+HOSTLOADLIBES_xdp_tx_iptunnel += -lelf
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 8370a6e3839d..faaffe2e139a 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -57,6 +57,8 @@ static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) =
(void *) BPF_FUNC_skb_set_tunnel_opt;
static unsigned long long (*bpf_get_prandom_u32)(void) =
(void *) BPF_FUNC_get_prandom_u32;
+static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
+ (void *) BPF_FUNC_xdp_adjust_head;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 49b45ccbe153..e30b6de94f2e 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -12,6 +12,10 @@
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/perf_event.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <sys/types.h>
+#include <sys/socket.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
@@ -450,3 +454,93 @@ struct ksym *ksym_search(long key)
/* out of range. return _stext */
return &syms[0];
}
+
+int set_link_xdp_fd(int ifindex, int fd)
+{
+ struct sockaddr_nl sa;
+ int sock, seq = 0, len, ret = -1;
+ char buf[4096];
+ struct nlattr *nla, *nla_xdp;
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg ifinfo;
+ char attrbuf[64];
+ } req;
+ struct nlmsghdr *nh;
+ struct nlmsgerr *err;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.nl_family = AF_NETLINK;
+
+ sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (sock < 0) {
+ printf("open netlink socket: %s\n", strerror(errno));
+ return -1;
+ }
+
+ if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+ printf("bind to netlink: %s\n", strerror(errno));
+ goto cleanup;
+ }
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_type = RTM_SETLINK;
+ req.nh.nlmsg_pid = 0;
+ req.nh.nlmsg_seq = ++seq;
+ req.ifinfo.ifi_family = AF_UNSPEC;
+ req.ifinfo.ifi_index = ifindex;
+ nla = (struct nlattr *)(((char *)&req)
+ + NLMSG_ALIGN(req.nh.nlmsg_len));
+ nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
+
+ nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
+ nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
+ nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+ memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
+ nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len;
+
+ req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ printf("send to netlink: %s\n", strerror(errno));
+ goto cleanup;
+ }
+
+ len = recv(sock, buf, sizeof(buf), 0);
+ if (len < 0) {
+ printf("recv from netlink: %s\n", strerror(errno));
+ goto cleanup;
+ }
+
+ for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+ nh = NLMSG_NEXT(nh, len)) {
+ if (nh->nlmsg_pid != getpid()) {
+ printf("Wrong pid %d, expected %d\n",
+ nh->nlmsg_pid, getpid());
+ goto cleanup;
+ }
+ if (nh->nlmsg_seq != seq) {
+ printf("Wrong seq %d, expected %d\n",
+ nh->nlmsg_seq, seq);
+ goto cleanup;
+ }
+ switch (nh->nlmsg_type) {
+ case NLMSG_ERROR:
+ err = (struct nlmsgerr *)NLMSG_DATA(nh);
+ if (!err->error)
+ continue;
+ printf("nlmsg error %s\n", strerror(-err->error));
+ goto cleanup;
+ case NLMSG_DONE:
+ break;
+ }
+ }
+
+ ret = 0;
+
+cleanup:
+ close(sock);
+ return ret;
+}
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
index 4adeeef53ad6..fb46a421ab41 100644
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -31,4 +31,5 @@ struct ksym {
int load_kallsyms(void);
struct ksym *ksym_search(long key);
+int set_link_xdp_fd(int ifindex, int fd);
#endif
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
index 2b2150d6d6f7..5f040a0d7712 100644
--- a/samples/bpf/xdp1_user.c
+++ b/samples/bpf/xdp1_user.c
@@ -5,111 +5,18 @@
* License as published by the Free Software Foundation.
*/
#include <linux/bpf.h>
-#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
#include <assert.h>
#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <sys/socket.h>
#include <unistd.h>
#include "bpf_load.h"
#include "bpf_util.h"
#include "libbpf.h"
-static int set_link_xdp_fd(int ifindex, int fd)
-{
- struct sockaddr_nl sa;
- int sock, seq = 0, len, ret = -1;
- char buf[4096];
- struct nlattr *nla, *nla_xdp;
- struct {
- struct nlmsghdr nh;
- struct ifinfomsg ifinfo;
- char attrbuf[64];
- } req;
- struct nlmsghdr *nh;
- struct nlmsgerr *err;
-
- memset(&sa, 0, sizeof(sa));
- sa.nl_family = AF_NETLINK;
-
- sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
- if (sock < 0) {
- printf("open netlink socket: %s\n", strerror(errno));
- return -1;
- }
-
- if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
- printf("bind to netlink: %s\n", strerror(errno));
- goto cleanup;
- }
-
- memset(&req, 0, sizeof(req));
- req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
- req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
- req.nh.nlmsg_type = RTM_SETLINK;
- req.nh.nlmsg_pid = 0;
- req.nh.nlmsg_seq = ++seq;
- req.ifinfo.ifi_family = AF_UNSPEC;
- req.ifinfo.ifi_index = ifindex;
- nla = (struct nlattr *)(((char *)&req)
- + NLMSG_ALIGN(req.nh.nlmsg_len));
- nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
-
- nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
- nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
- nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
- memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
- nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len;
-
- req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
-
- if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
- printf("send to netlink: %s\n", strerror(errno));
- goto cleanup;
- }
-
- len = recv(sock, buf, sizeof(buf), 0);
- if (len < 0) {
- printf("recv from netlink: %s\n", strerror(errno));
- goto cleanup;
- }
-
- for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
- nh = NLMSG_NEXT(nh, len)) {
- if (nh->nlmsg_pid != getpid()) {
- printf("Wrong pid %d, expected %d\n",
- nh->nlmsg_pid, getpid());
- goto cleanup;
- }
- if (nh->nlmsg_seq != seq) {
- printf("Wrong seq %d, expected %d\n",
- nh->nlmsg_seq, seq);
- goto cleanup;
- }
- switch (nh->nlmsg_type) {
- case NLMSG_ERROR:
- err = (struct nlmsgerr *)NLMSG_DATA(nh);
- if (!err->error)
- continue;
- printf("nlmsg error %s\n", strerror(-err->error));
- goto cleanup;
- case NLMSG_DONE:
- break;
- }
- }
-
- ret = 0;
-
-cleanup:
- close(sock);
- return ret;
-}
-
static int ifindex;
static void int_exit(int sig)
diff --git a/samples/bpf/xdp_tx_iptunnel_common.h b/samples/bpf/xdp_tx_iptunnel_common.h
new file mode 100644
index 000000000000..dd12cc35110f
--- /dev/null
+++ b/samples/bpf/xdp_tx_iptunnel_common.h
@@ -0,0 +1,37 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _SAMPLES_BPF_XDP_TX_IPTNL_COMMON_H
+#define _SAMPLES_BPF_XDP_TX_IPTNL_COMMON_H
+
+#include <linux/types.h>
+
+#define MAX_IPTNL_ENTRIES 256U
+
+struct vip {
+ union {
+ __u32 v6[4];
+ __u32 v4;
+ } daddr;
+ __u16 dport;
+ __u16 family;
+ __u8 protocol;
+};
+
+struct iptnl_info {
+ union {
+ __u32 v6[4];
+ __u32 v4;
+ } saddr;
+ union {
+ __u32 v6[4];
+ __u32 v4;
+ } daddr;
+ __u16 family;
+ __u8 dmac[6];
+};
+
+#endif
diff --git a/samples/bpf/xdp_tx_iptunnel_kern.c b/samples/bpf/xdp_tx_iptunnel_kern.c
new file mode 100644
index 000000000000..85c38ecd3a2d
--- /dev/null
+++ b/samples/bpf/xdp_tx_iptunnel_kern.c
@@ -0,0 +1,236 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program shows how to use bpf_xdp_adjust_head() by
+ * encapsulating the incoming packet in an IPv4/v6 header
+ * and then XDP_TX it out.
+ */
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include "bpf_helpers.h"
+#include "xdp_tx_iptunnel_common.h"
+
+struct bpf_map_def SEC("maps") rxcnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64),
+ .max_entries = 256,
+};
+
+struct bpf_map_def SEC("maps") vip2tnl = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct vip),
+ .value_size = sizeof(struct iptnl_info),
+ .max_entries = MAX_IPTNL_ENTRIES,
+};
+
+static __always_inline void count_tx(u32 protocol)
+{
+ u64 *rxcnt_count;
+
+ rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
+ if (rxcnt_count)
+ *rxcnt_count += 1;
+}
+
+static __always_inline int get_dport(void *trans_data, void *data_end,
+ u8 protocol)
+{
+ struct tcphdr *th;
+ struct udphdr *uh;
+
+ switch (protocol) {
+ case IPPROTO_TCP:
+ th = (struct tcphdr *)trans_data;
+ if (th + 1 > data_end)
+ return -1;
+ return th->dest;
+ case IPPROTO_UDP:
+ uh = (struct udphdr *)trans_data;
+ if (uh + 1 > data_end)
+ return -1;
+ return uh->dest;
+ default:
+ return 0;
+ }
+}
+
+static __always_inline void set_ethhdr(struct ethhdr *new_eth,
+ const struct ethhdr *old_eth,
+ const struct iptnl_info *tnl,
+ __be16 h_proto)
+{
+ memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
+ memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
+ new_eth->h_proto = h_proto;
+}
+
+static __always_inline int handle_ipv4(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct iptnl_info *tnl;
+ struct ethhdr *new_eth;
+ struct ethhdr *old_eth;
+ struct iphdr *iph = data + sizeof(struct ethhdr);
+ u16 *next_iph_u16;
+ u16 payload_len;
+ struct vip vip = {};
+ int dport;
+ u32 csum = 0;
+ int i;
+
+ if (iph + 1 > data_end)
+ return XDP_DROP;
+
+ dport = get_dport(iph + 1, data_end, iph->protocol);
+ if (dport == -1)
+ return XDP_DROP;
+
+ vip.protocol = iph->protocol;
+ vip.family = AF_INET;
+ vip.daddr.v4 = iph->daddr;
+ vip.dport = dport;
+ payload_len = ntohs(iph->tot_len);
+
+ tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+ /* It only does v4-in-v4 */
+ if (!tnl || tnl->family != AF_INET)
+ return XDP_PASS;
+
+ /* The vip key is found. Add an IP header and send it out */
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
+ return XDP_DROP;
+
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+
+ new_eth = data;
+ iph = data + sizeof(*new_eth);
+ old_eth = data + sizeof(*iph);
+
+ if (new_eth + 1 > data_end ||
+ old_eth + 1 > data_end ||
+ iph + 1 > data_end)
+ return XDP_DROP;
+
+ set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IP));
+
+ iph->version = 4;
+ iph->ihl = sizeof(*iph) >> 2;
+ iph->frag_off = 0;
+ iph->protocol = IPPROTO_IPIP;
+ iph->check = 0;
+ iph->tos = 0;
+ iph->tot_len = htons(payload_len + sizeof(*iph));
+ iph->daddr = tnl->daddr.v4;
+ iph->saddr = tnl->saddr.v4;
+ iph->ttl = 8;
+
+ next_iph_u16 = (u16 *)iph;
+#pragma clang loop unroll(full)
+ for (i = 0; i < sizeof(*iph) >> 1; i++)
+ csum += *next_iph_u16++;
+
+ iph->check = ~((csum & 0xffff) + (csum >> 16));
+
+ count_tx(vip.protocol);
+
+ return XDP_TX;
+}
+
+static __always_inline int handle_ipv6(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct iptnl_info *tnl;
+ struct ethhdr *new_eth;
+ struct ethhdr *old_eth;
+ struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
+ __u16 payload_len;
+ struct vip vip = {};
+ int dport;
+
+ if (ip6h + 1 > data_end)
+ return XDP_DROP;
+
+ dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr);
+ if (dport == -1)
+ return XDP_DROP;
+
+ vip.protocol = ip6h->nexthdr;
+ vip.family = AF_INET6;
+ memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
+ vip.dport = dport;
+ payload_len = ip6h->payload_len;
+
+ tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+ /* It only does v6-in-v6 */
+ if (!tnl || tnl->family != AF_INET6)
+ return XDP_PASS;
+
+ /* The vip key is found. Add an IP header and send it out */
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
+ return XDP_DROP;
+
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+
+ new_eth = data;
+ ip6h = data + sizeof(*new_eth);
+ old_eth = data + sizeof(*ip6h);
+
+ if (new_eth + 1 > data_end ||
+ old_eth + 1 > data_end ||
+ ip6h + 1 > data_end)
+ return XDP_DROP;
+
+ set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IPV6));
+
+ ip6h->version = 6;
+ ip6h->priority = 0;
+ memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+ ip6h->payload_len = htons(ntohs(payload_len) + sizeof(*ip6h));
+ ip6h->nexthdr = IPPROTO_IPV6;
+ ip6h->hop_limit = 8;
+ memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
+ memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
+
+ count_tx(vip.protocol);
+
+ return XDP_TX;
+}
+
+SEC("xdp_tx_iptunnel")
+int _xdp_tx_iptunnel(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ __u16 h_proto;
+
+ if (eth + 1 > data_end)
+ return XDP_DROP;
+
+ h_proto = eth->h_proto;
+
+ if (h_proto == htons(ETH_P_IP))
+ return handle_ipv4(xdp);
+ else if (h_proto == htons(ETH_P_IPV6))
+
+ return handle_ipv6(xdp);
+ else
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
new file mode 100644
index 000000000000..7a71f5c74684
--- /dev/null
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -0,0 +1,256 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <arpa/inet.h>
+#include <netinet/ether.h>
+#include <unistd.h>
+#include <time.h>
+#include "bpf_load.h"
+#include "libbpf.h"
+#include "bpf_util.h"
+#include "xdp_tx_iptunnel_common.h"
+
+#define STATS_INTERVAL_S 2U
+
+static int ifindex = -1;
+
+static void int_exit(int sig)
+{
+ if (ifindex > -1)
+ set_link_xdp_fd(ifindex, -1);
+ exit(0);
+}
+
+/* simple per-protocol drop counter
+ */
+static void poll_stats(unsigned int kill_after_s)
+{
+ const unsigned int nr_protos = 256;
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ time_t started_at = time(NULL);
+ __u64 values[nr_cpus], prev[nr_protos][nr_cpus];
+ __u32 proto;
+ int i;
+
+ memset(prev, 0, sizeof(prev));
+
+ while (!kill_after_s || time(NULL) - started_at <= kill_after_s) {
+ sleep(STATS_INTERVAL_S);
+
+ for (proto = 0; proto < nr_protos; proto++) {
+ __u64 sum = 0;
+
+ assert(bpf_lookup_elem(map_fd[0], &proto, values) == 0);
+ for (i = 0; i < nr_cpus; i++)
+ sum += (values[i] - prev[proto][i]);
+
+ if (sum)
+ printf("proto %u: sum:%10llu pkts, rate:%10llu pkts/s\n",
+ proto, sum, sum / STATS_INTERVAL_S);
+ memcpy(prev[proto], values, sizeof(values));
+ }
+ }
+}
+
+static void usage(const char *cmd)
+{
+ printf("Start a XDP prog which encapsulates incoming packets\n"
+ "in an IPv4/v6 header and XDP_TX it out. The dst <VIP:PORT>\n"
+ "is used to select packets to encapsulate\n\n");
+ printf("Usage: %s [...]\n", cmd);
+ printf(" -i <ifindex> Interface Index\n");
+ printf(" -a <vip-service-address> IPv4 or IPv6\n");
+ printf(" -p <vip-service-port> A port range (e.g. 433-444) is also allowed\n");
+ printf(" -s <source-ip> Used in the IPTunnel header\n");
+ printf(" -d <dest-ip> Used in the IPTunnel header\n");
+ printf(" -m <dest-MAC> Used in sending the IP Tunneled pkt\n");
+ printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n");
+ printf(" -P <IP-Protocol> Default is TCP\n");
+ printf(" -h Display this help\n");
+}
+
+static int parse_ipstr(const char *ipstr, unsigned int *addr)
+{
+ if (inet_pton(AF_INET6, ipstr, addr) == 1) {
+ return AF_INET6;
+ } else if (inet_pton(AF_INET, ipstr, addr) == 1) {
+ addr[1] = addr[2] = addr[3] = 0;
+ return AF_INET;
+ }
+
+ fprintf(stderr, "%s is an invalid IP\n", ipstr);
+ return AF_UNSPEC;
+}
+
+static int parse_ports(const char *port_str, int *min_port, int *max_port)
+{
+ char *end;
+ long tmp_min_port;
+ long tmp_max_port;
+
+ tmp_min_port = strtol(optarg, &end, 10);
+ if (tmp_min_port < 1 || tmp_min_port > 65535) {
+ fprintf(stderr, "Invalid port(s):%s\n", optarg);
+ return 1;
+ }
+
+ if (*end == '-') {
+ end++;
+ tmp_max_port = strtol(end, NULL, 10);
+ if (tmp_max_port < 1 || tmp_max_port > 65535) {
+ fprintf(stderr, "Invalid port(s):%s\n", optarg);
+ return 1;
+ }
+ } else {
+ tmp_max_port = tmp_min_port;
+ }
+
+ if (tmp_min_port > tmp_max_port) {
+ fprintf(stderr, "Invalid port(s):%s\n", optarg);
+ return 1;
+ }
+
+ if (tmp_max_port - tmp_min_port + 1 > MAX_IPTNL_ENTRIES) {
+ fprintf(stderr, "Port range (%s) is larger than %u\n",
+ port_str, MAX_IPTNL_ENTRIES);
+ return 1;
+ }
+ *min_port = tmp_min_port;
+ *max_port = tmp_max_port;
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ unsigned char opt_flags[256] = {};
+ unsigned int kill_after_s = 0;
+ const char *optstr = "i:a:p:s:d:m:T:P:h";
+ int min_port = 0, max_port = 0;
+ struct iptnl_info tnl = {};
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ struct vip vip = {};
+ char filename[256];
+ int opt;
+ int i;
+
+ tnl.family = AF_UNSPEC;
+ vip.protocol = IPPROTO_TCP;
+
+ for (i = 0; i < strlen(optstr); i++)
+ if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z')
+ opt_flags[(unsigned char)optstr[i]] = 1;
+
+ while ((opt = getopt(argc, argv, optstr)) != -1) {
+ unsigned short family;
+ unsigned int *v6;
+
+ switch (opt) {
+ case 'i':
+ ifindex = atoi(optarg);
+ break;
+ case 'a':
+ vip.family = parse_ipstr(optarg, vip.daddr.v6);
+ if (vip.family == AF_UNSPEC)
+ return 1;
+ break;
+ case 'p':
+ if (parse_ports(optarg, &min_port, &max_port))
+ return 1;
+ break;
+ case 'P':
+ vip.protocol = atoi(optarg);
+ break;
+ case 's':
+ case 'd':
+ if (opt == 's')
+ v6 = tnl.saddr.v6;
+ else
+ v6 = tnl.daddr.v6;
+
+ family = parse_ipstr(optarg, v6);
+ if (family == AF_UNSPEC)
+ return 1;
+ if (tnl.family == AF_UNSPEC) {
+ tnl.family = family;
+ } else if (tnl.family != family) {
+ fprintf(stderr,
+ "The IP version of the src and dst addresses used in the IP encapsulation does not match\n");
+ return 1;
+ }
+ break;
+ case 'm':
+ if (!ether_aton_r(optarg,
+ (struct ether_addr *)tnl.dmac)) {
+ fprintf(stderr, "Invalid mac address:%s\n",
+ optarg);
+ return 1;
+ }
+ break;
+ case 'T':
+ kill_after_s = atoi(optarg);
+ break;
+ default:
+ usage(argv[0]);
+ return 1;
+ }
+ opt_flags[opt] = 0;
+ }
+
+ for (i = 0; i < strlen(optstr); i++) {
+ if (opt_flags[(unsigned int)optstr[i]]) {
+ fprintf(stderr, "Missing argument -%c\n", optstr[i]);
+ usage(argv[0]);
+ return 1;
+ }
+ }
+
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
+ return 1;
+ }
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ if (!prog_fd[0]) {
+ printf("load_bpf_file: %s\n", strerror(errno));
+ return 1;
+ }
+
+ signal(SIGINT, int_exit);
+
+ while (min_port <= max_port) {
+ vip.dport = htons(min_port++);
+ if (bpf_update_elem(map_fd[1], &vip, &tnl, BPF_NOEXIST)) {
+ perror("bpf_update_elem(&vip2tnl)");
+ return 1;
+ }
+ }
+
+ if (set_link_xdp_fd(ifindex, prog_fd[0]) < 0) {
+ printf("link set xdp fd failed\n");
+ return 1;
+ }
+
+ poll_stats(kill_after_s);
+
+ set_link_xdp_fd(ifindex, -1);
+
+ return 0;
+}