summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-04-18 13:24:42 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-04-18 13:24:42 -0700
commit40d9018eb738f724dc9c27eec88c78cc488e0af0 (patch)
tree8afd5db71fd8cb36290cfa488ce75e50a533ba24
parentfb5e2154b764812705dce84881319471d27606fb (diff)
parentacf167f3f2495f0b6565f6e8ac9bd87158a0e98d (diff)
downloadlwn-40d9018eb738f724dc9c27eec88c78cc488e0af0.tar.gz
lwn-40d9018eb738f724dc9c27eec88c78cc488e0af0.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller: 1) BPF tail call handling bug fixes from Daniel Borkmann. 2) Fix allowance of too many rx queues in sfc driver, from Bert Kenward. 3) Non-loopback ipv6 packets claiming src of ::1 should be dropped, from Florian Westphal. 4) Statistics requests on KSZ9031 can crash, fix from Grygorii Strashko. 5) TX ring handling fixes in mediatek driver, from Sean Wang. 6) ip_ra_control can deadlock, fix lock acquisition ordering to fix, from Cong WANG. 7) Fix use after free in ip_recv_error(), from Willem de Buijn. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: bpf: fix checking xdp_adjust_head on tail calls bpf: fix cb access in socket filter programs on tail calls ipv6: drop non loopback packets claiming to originate from ::1 net: ethernet: mediatek: fix inconsistency of port number carried in TXD net: ethernet: mediatek: fix inconsistency between TXD and the used buffer net: phy: micrel: fix crash when statistic requested for KSZ9031 phy net: vrf: Fix setting NLM_F_EXCL flag when adding l3mdev rule net: thunderx: Fix set_max_bgx_per_node for 81xx rgx net-timestamp: avoid use-after-free in ip_recv_error ipv4: fix a deadlock in ip_ra_control sfc: limit the number of receive queues
-rw-r--r--drivers/net/ethernet/cavium/thunder/thunder_bgx.c1
-rw-r--r--drivers/net/ethernet/cavium/thunder/thunder_bgx.h1
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c31
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.h12
-rw-r--r--drivers/net/ethernet/sfc/efx.c7
-rw-r--r--drivers/net/ethernet/sfc/falcon/efx.c7
-rw-r--r--drivers/net/phy/micrel.c17
-rw-r--r--drivers/net/vrf.c2
-rw-r--r--kernel/bpf/syscall.c8
-rw-r--r--net/core/skbuff.c1
-rw-r--r--net/ipv4/ip_sockglue.c10
-rw-r--r--net/ipv4/ipmr.c11
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv6/datagram.c10
-rw-r--r--net/ipv6/ip6_input.c7
15 files changed, 69 insertions, 58 deletions
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 64a1095e4d14..a0ca68ce3fbb 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -134,6 +134,7 @@ static void set_max_bgx_per_node(struct pci_dev *pdev)
pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid);
switch (sdevid) {
case PCI_SUBSYS_DEVID_81XX_BGX:
+ case PCI_SUBSYS_DEVID_81XX_RGX:
max_bgx_per_node = MAX_BGX_PER_CN81XX;
break;
case PCI_SUBSYS_DEVID_83XX_BGX:
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index c5080f2cead5..6b7fe6fdd13b 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -16,6 +16,7 @@
/* Subsystem device IDs */
#define PCI_SUBSYS_DEVID_88XX_BGX 0xA126
#define PCI_SUBSYS_DEVID_81XX_BGX 0xA226
+#define PCI_SUBSYS_DEVID_81XX_RGX 0xA254
#define PCI_SUBSYS_DEVID_83XX_BGX 0xA326
#define MAX_BGX_THUNDER 8 /* Max 2 nodes, 4 per node */
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 9e757684816d..93949139e62c 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -613,7 +613,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
struct mtk_mac *mac = netdev_priv(dev);
struct mtk_eth *eth = mac->hw;
struct mtk_tx_dma *itxd, *txd;
- struct mtk_tx_buf *tx_buf;
+ struct mtk_tx_buf *itx_buf, *tx_buf;
dma_addr_t mapped_addr;
unsigned int nr_frags;
int i, n_desc = 1;
@@ -627,8 +627,8 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
fport = (mac->id + 1) << TX_DMA_FPORT_SHIFT;
txd4 |= fport;
- tx_buf = mtk_desc_to_tx_buf(ring, itxd);
- memset(tx_buf, 0, sizeof(*tx_buf));
+ itx_buf = mtk_desc_to_tx_buf(ring, itxd);
+ memset(itx_buf, 0, sizeof(*itx_buf));
if (gso)
txd4 |= TX_DMA_TSO;
@@ -647,9 +647,11 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
return -ENOMEM;
WRITE_ONCE(itxd->txd1, mapped_addr);
- tx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
- dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
- dma_unmap_len_set(tx_buf, dma_len0, skb_headlen(skb));
+ itx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
+ itx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 :
+ MTK_TX_FLAGS_FPORT1;
+ dma_unmap_addr_set(itx_buf, dma_addr0, mapped_addr);
+ dma_unmap_len_set(itx_buf, dma_len0, skb_headlen(skb));
/* TX SG offload */
txd = itxd;
@@ -685,11 +687,13 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
last_frag * TX_DMA_LS0));
WRITE_ONCE(txd->txd4, fport);
- tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
tx_buf = mtk_desc_to_tx_buf(ring, txd);
memset(tx_buf, 0, sizeof(*tx_buf));
-
+ tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
tx_buf->flags |= MTK_TX_FLAGS_PAGE0;
+ tx_buf->flags |= (!mac->id) ? MTK_TX_FLAGS_FPORT0 :
+ MTK_TX_FLAGS_FPORT1;
+
dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
dma_unmap_len_set(tx_buf, dma_len0, frag_map_size);
frag_size -= frag_map_size;
@@ -698,7 +702,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
}
/* store skb to cleanup */
- tx_buf->skb = skb;
+ itx_buf->skb = skb;
WRITE_ONCE(itxd->txd4, txd4);
WRITE_ONCE(itxd->txd3, (TX_DMA_SWC | TX_DMA_PLEN0(skb_headlen(skb)) |
@@ -1012,17 +1016,16 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget)
while ((cpu != dma) && budget) {
u32 next_cpu = desc->txd2;
- int mac;
+ int mac = 0;
desc = mtk_qdma_phys_to_virt(ring, desc->txd2);
if ((desc->txd3 & TX_DMA_OWNER_CPU) == 0)
break;
- mac = (desc->txd4 >> TX_DMA_FPORT_SHIFT) &
- TX_DMA_FPORT_MASK;
- mac--;
-
tx_buf = mtk_desc_to_tx_buf(ring, desc);
+ if (tx_buf->flags & MTK_TX_FLAGS_FPORT1)
+ mac = 1;
+
skb = tx_buf->skb;
if (!skb) {
condition = 1;
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 99b1c8e9f16f..08285a96ff70 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -406,12 +406,18 @@ struct mtk_hw_stats {
struct u64_stats_sync syncp;
};
-/* PDMA descriptor can point at 1-2 segments. This enum allows us to track how
- * memory was allocated so that it can be freed properly
- */
enum mtk_tx_flags {
+ /* PDMA descriptor can point at 1-2 segments. This enum allows us to
+ * track how memory was allocated so that it can be freed properly.
+ */
MTK_TX_FLAGS_SINGLE0 = 0x01,
MTK_TX_FLAGS_PAGE0 = 0x02,
+
+ /* MTK_TX_FLAGS_FPORTx allows tracking which port the transmitted
+ * SKB out instead of looking up through hardware TX descriptor.
+ */
+ MTK_TX_FLAGS_FPORT0 = 0x04,
+ MTK_TX_FLAGS_FPORT1 = 0x08,
};
/* This enum allows us to identify how the clock is defined on the array of the
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 50d28261b6b9..b9cb697b2818 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -1371,6 +1371,13 @@ static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
free_cpumask_var(thread_mask);
}
+ if (count > EFX_MAX_RX_QUEUES) {
+ netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
+ "Reducing number of rx queues from %u to %u.\n",
+ count, EFX_MAX_RX_QUEUES);
+ count = EFX_MAX_RX_QUEUES;
+ }
+
/* If RSS is requested for the PF *and* VFs then we can't write RSS
* table entries that are inaccessible to VFs
*/
diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c
index f5e5cd1659a1..29614da91cbf 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.c
+++ b/drivers/net/ethernet/sfc/falcon/efx.c
@@ -1354,6 +1354,13 @@ static unsigned int ef4_wanted_parallelism(struct ef4_nic *efx)
free_cpumask_var(thread_mask);
}
+ if (count > EF4_MAX_RX_QUEUES) {
+ netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
+ "Reducing number of rx queues from %u to %u.\n",
+ count, EF4_MAX_RX_QUEUES);
+ count = EF4_MAX_RX_QUEUES;
+ }
+
return count;
}
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 6742070ca676..1326d99771c1 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -798,9 +798,6 @@ static struct phy_driver ksphy_driver[] = {
.read_status = genphy_read_status,
.ack_interrupt = kszphy_ack_interrupt,
.config_intr = kszphy_config_intr,
- .get_sset_count = kszphy_get_sset_count,
- .get_strings = kszphy_get_strings,
- .get_stats = kszphy_get_stats,
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
@@ -940,9 +937,6 @@ static struct phy_driver ksphy_driver[] = {
.read_status = genphy_read_status,
.ack_interrupt = kszphy_ack_interrupt,
.config_intr = kszphy_config_intr,
- .get_sset_count = kszphy_get_sset_count,
- .get_strings = kszphy_get_strings,
- .get_stats = kszphy_get_stats,
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
@@ -952,6 +946,7 @@ static struct phy_driver ksphy_driver[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
.driver_data = &ksz9021_type,
+ .probe = kszphy_probe,
.config_init = ksz9021_config_init,
.config_aneg = genphy_config_aneg,
.read_status = genphy_read_status,
@@ -971,6 +966,7 @@ static struct phy_driver ksphy_driver[] = {
.features = PHY_GBIT_FEATURES,
.flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
.driver_data = &ksz9021_type,
+ .probe = kszphy_probe,
.config_init = ksz9031_config_init,
.config_aneg = genphy_config_aneg,
.read_status = ksz9031_read_status,
@@ -989,9 +985,6 @@ static struct phy_driver ksphy_driver[] = {
.config_init = kszphy_config_init,
.config_aneg = ksz8873mll_config_aneg,
.read_status = ksz8873mll_read_status,
- .get_sset_count = kszphy_get_sset_count,
- .get_strings = kszphy_get_strings,
- .get_stats = kszphy_get_stats,
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
@@ -1003,9 +996,6 @@ static struct phy_driver ksphy_driver[] = {
.config_init = kszphy_config_init,
.config_aneg = genphy_config_aneg,
.read_status = genphy_read_status,
- .get_sset_count = kszphy_get_sset_count,
- .get_strings = kszphy_get_strings,
- .get_stats = kszphy_get_stats,
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
@@ -1017,9 +1007,6 @@ static struct phy_driver ksphy_driver[] = {
.config_init = kszphy_config_init,
.config_aneg = ksz8873mll_config_aneg,
.read_status = ksz8873mll_read_status,
- .get_sset_count = kszphy_get_sset_count,
- .get_strings = kszphy_get_strings,
- .get_stats = kszphy_get_stats,
.suspend = genphy_suspend,
.resume = genphy_resume,
} };
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index d6988db1930d..7d909c8183e9 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1128,7 +1128,7 @@ static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it)
goto nla_put_failure;
/* rule only needs to appear once */
- nlh->nlmsg_flags &= NLM_F_EXCL;
+ nlh->nlmsg_flags |= NLM_F_EXCL;
frh = nlmsg_data(nlh);
memset(frh, 0, sizeof(*frh));
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7af0dcc5d755..821f9e807de5 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -617,6 +617,14 @@ static void fixup_bpf_calls(struct bpf_prog *prog)
if (insn->imm == BPF_FUNC_xdp_adjust_head)
prog->xdp_adjust_head = 1;
if (insn->imm == BPF_FUNC_tail_call) {
+ /* If we tail call into other programs, we
+ * cannot make any assumptions since they
+ * can be replaced dynamically during runtime
+ * in the program array.
+ */
+ prog->cb_access = 1;
+ prog->xdp_adjust_head = 1;
+
/* mark bpf_tail_call as different opcode
* to avoid conditional branch in
* interpeter for every normal call
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 9f781092fda9..35c1e2460206 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3807,6 +3807,7 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
serr->ee.ee_info = tstype;
serr->opt_stats = opt_stats;
+ serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
serr->ee.ee_data = skb_shinfo(skb)->tskey;
if (sk->sk_protocol == IPPROTO_TCP &&
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index ebd953bc5607..1d46d05efb0f 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -488,16 +488,15 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk,
return false;
/* Support IP_PKTINFO on tstamp packets if requested, to correlate
- * timestamp with egress dev. Not possible for packets without dev
+ * timestamp with egress dev. Not possible for packets without iif
* or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
*/
- if ((!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) ||
- (!skb->dev))
+ info = PKTINFO_SKB_CB(skb);
+ if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
+ !info->ipi_ifindex)
return false;
- info = PKTINFO_SKB_CB(skb);
info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
- info->ipi_ifindex = skb->dev->ifindex;
return true;
}
@@ -591,6 +590,7 @@ static bool setsockopt_needs_rtnl(int optname)
case MCAST_LEAVE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP:
case MCAST_UNBLOCK_SOURCE:
+ case IP_ROUTER_ALERT:
return true;
}
return false;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c0317c940bcd..b036e85e093b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1278,7 +1278,7 @@ static void mrtsock_destruct(struct sock *sk)
struct net *net = sock_net(sk);
struct mr_table *mrt;
- rtnl_lock();
+ ASSERT_RTNL();
ipmr_for_each_table(mrt, net) {
if (sk == rtnl_dereference(mrt->mroute_sk)) {
IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
@@ -1289,7 +1289,6 @@ static void mrtsock_destruct(struct sock *sk)
mroute_clean_tables(mrt, false);
}
}
- rtnl_unlock();
}
/* Socket options and virtual interface manipulation. The whole
@@ -1353,13 +1352,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
if (sk != rcu_access_pointer(mrt->mroute_sk)) {
ret = -EACCES;
} else {
- /* We need to unlock here because mrtsock_destruct takes
- * care of rtnl itself and we can't change that due to
- * the IP_ROUTER_ALERT setsockopt which runs without it.
- */
- rtnl_unlock();
ret = ip_ra_control(sk, 0, NULL);
- goto out;
+ goto out_unlock;
}
break;
case MRT_ADD_VIF:
@@ -1470,7 +1464,6 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
}
out_unlock:
rtnl_unlock();
-out:
return ret;
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 8119e1f66e03..9d943974de2b 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -682,7 +682,9 @@ static void raw_close(struct sock *sk, long timeout)
/*
* Raw sockets may have direct kernel references. Kill them.
*/
+ rtnl_lock();
ip_ra_control(sk, 0, NULL);
+ rtnl_unlock();
sk_common_release(sk);
}
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index eec27f87efac..e011122ebd43 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -405,9 +405,6 @@ static inline bool ipv6_datagram_support_addr(struct sock_exterr_skb *serr)
* At one point, excluding local errors was a quick test to identify icmp/icmp6
* errors. This is no longer true, but the test remained, so the v6 stack,
* unlike v4, also honors cmsg requests on all wifi and timestamp errors.
- *
- * Timestamp code paths do not initialize the fields expected by cmsg:
- * the PKTINFO fields in skb->cb[]. Fill those in here.
*/
static bool ip6_datagram_support_cmsg(struct sk_buff *skb,
struct sock_exterr_skb *serr)
@@ -419,14 +416,9 @@ static bool ip6_datagram_support_cmsg(struct sk_buff *skb,
if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL)
return false;
- if (!skb->dev)
+ if (!IP6CB(skb)->iif)
return false;
- if (skb->protocol == htons(ETH_P_IPV6))
- IP6CB(skb)->iif = skb->dev->ifindex;
- else
- PKTINFO_SKB_CB(skb)->ipi_ifindex = skb->dev->ifindex;
-
return true;
}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index aacfb4bce153..c45b12b4431c 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -122,11 +122,14 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
/*
* RFC4291 2.5.3
+ * The loopback address must not be used as the source address in IPv6
+ * packets that are sent outside of a single node. [..]
* A packet received on an interface with a destination address
* of loopback must be dropped.
*/
- if (!(dev->flags & IFF_LOOPBACK) &&
- ipv6_addr_loopback(&hdr->daddr))
+ if ((ipv6_addr_loopback(&hdr->saddr) ||
+ ipv6_addr_loopback(&hdr->daddr)) &&
+ !(dev->flags & IFF_LOOPBACK))
goto err;
/* RFC4291 Errata ID: 3480