summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/Kconfig32
-rw-r--r--net/8021q/Kconfig2
-rw-r--r--net/8021q/vlan_dev.c8
-rw-r--r--net/9p/trans_xen.c61
-rw-r--r--net/Kconfig26
-rw-r--r--net/atm/Kconfig2
-rw-r--r--net/atm/lec.c4
-rw-r--r--net/batman-adv/soft-interface.c2
-rw-r--r--net/bpfilter/Kconfig6
-rw-r--r--net/bpfilter/Makefile11
-rw-r--r--net/bridge/Kconfig6
-rw-r--r--net/bridge/br_device.c8
-rw-r--r--net/caif/Kconfig8
-rw-r--r--net/can/Kconfig8
-rw-r--r--net/ceph/ceph_common.c75
-rw-r--r--net/ceph/crush/crush.c3
-rw-r--r--net/ceph/debugfs.c6
-rw-r--r--net/ceph/osd_client.c103
-rw-r--r--net/ceph/osdmap.c363
-rw-r--r--net/core/dev.c79
-rw-r--r--net/core/dev_addr_lists.c12
-rw-r--r--net/core/filter.c21
-rw-r--r--net/core/rtnetlink.c1
-rw-r--r--net/core/sock.c2
-rw-r--r--net/core/sock_map.c38
-rw-r--r--net/core/sysctl_net_core.c2
-rw-r--r--net/core/xdp.c1
-rw-r--r--net/dcb/Kconfig2
-rw-r--r--net/dccp/Kconfig4
-rw-r--r--net/dccp/ccids/Kconfig6
-rw-r--r--net/dccp/proto.c7
-rw-r--r--net/decnet/Kconfig4
-rw-r--r--net/dsa/Kconfig2
-rw-r--r--net/dsa/master.c4
-rw-r--r--net/ethtool/common.c1
-rw-r--r--net/ethtool/linkinfo.c3
-rw-r--r--net/hsr/Kconfig2
-rw-r--r--net/ieee802154/6lowpan/Kconfig2
-rw-r--r--net/ieee802154/Kconfig6
-rw-r--r--net/ipv4/Kconfig72
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/inet_connection_sock.c1
-rw-r--r--net/ipv4/ip_tunnel.c14
-rw-r--r--net/ipv4/netfilter/Kconfig16
-rw-r--r--net/ipv4/nexthop.c82
-rw-r--r--net/ipv4/tcp.c78
-rw-r--r--net/ipv4/tcp_bpf.c6
-rw-r--r--net/ipv4/tcp_input.c12
-rw-r--r--net/ipv6/Kconfig44
-rw-r--r--net/ipv6/ip6_flowlabel.c2
-rw-r--r--net/ipv6/ip6_gre.c9
-rw-r--r--net/ipv6/ipv6_sockglue.c2
-rw-r--r--net/ipv6/mcast.c1
-rw-r--r--net/ipv6/netfilter/Kconfig6
-rw-r--r--net/ipv6/seg6.c16
-rw-r--r--net/ipv6/seg6_iptunnel.c2
-rw-r--r--net/ipv6/seg6_local.c6
-rw-r--r--net/kcm/Kconfig2
-rw-r--r--net/l2tp/Kconfig2
-rw-r--r--net/l3mdev/Kconfig2
-rw-r--r--net/lapb/Kconfig2
-rw-r--r--net/mac80211/Kconfig52
-rw-r--r--net/mac80211/mlme.c2
-rw-r--r--net/mac80211/rx.c2
-rw-r--r--net/mac802154/Kconfig2
-rw-r--r--net/mpls/Kconfig6
-rw-r--r--net/mptcp/options.c2
-rw-r--r--net/mptcp/protocol.c45
-rw-r--r--net/mptcp/protocol.h8
-rw-r--r--net/mptcp/subflow.c62
-rw-r--r--net/ncsi/Kconfig4
-rw-r--r--net/netfilter/Kconfig58
-rw-r--r--net/netfilter/core.c2
-rw-r--r--net/netfilter/ipvs/Kconfig54
-rw-r--r--net/netfilter/nf_conntrack_netlink.c32
-rw-r--r--net/netfilter/nf_flow_table_core.c45
-rw-r--r--net/netfilter/nf_tables_api.c31
-rw-r--r--net/netfilter/nft_set_pipapo.c6
-rw-r--r--net/netfilter/nft_set_rbtree.c21
-rw-r--r--net/netlabel/Kconfig2
-rw-r--r--net/netlink/Kconfig2
-rw-r--r--net/netlink/genetlink.c112
-rw-r--r--net/netrom/af_netrom.c2
-rw-r--r--net/nfc/hci/Kconfig2
-rw-r--r--net/nsh/Kconfig2
-rw-r--r--net/openvswitch/Kconfig8
-rw-r--r--net/packet/Kconfig4
-rw-r--r--net/qrtr/Kconfig6
-rw-r--r--net/rds/Kconfig6
-rw-r--r--net/rds/Makefile2
-rw-r--r--net/rds/ib.c43
-rw-r--r--net/rds/ib.h10
-rw-r--r--net/rds/ib_cm.c8
-rw-r--r--net/rds/ib_fmr.c269
-rw-r--r--net/rds/ib_frmr.c4
-rw-r--r--net/rds/ib_mr.h14
-rw-r--r--net/rds/ib_rdma.c28
-rw-r--r--net/rose/af_rose.c2
-rw-r--r--net/rxrpc/ar-internal.h119
-rw-r--r--net/rxrpc/call_event.c30
-rw-r--r--net/rxrpc/conn_event.c7
-rw-r--r--net/rxrpc/input.c7
-rw-r--r--net/rxrpc/peer_event.c7
-rw-r--r--net/rxrpc/proc.c6
-rw-r--r--net/rxrpc/recvmsg.c79
-rw-r--r--net/rxrpc/sendmsg.c4
-rw-r--r--net/sched/Kconfig122
-rw-r--r--net/sched/act_ct.c11
-rw-r--r--net/sched/act_gate.c126
-rw-r--r--net/sched/sch_generic.c1
-rw-r--r--net/sctp/Kconfig2
-rw-r--r--net/smc/Kconfig4
-rw-r--r--net/smc/smc_ib.c13
-rw-r--r--net/sunrpc/addr.c4
-rw-r--r--net/sunrpc/auth.c2
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c56
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c12
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c2
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c18
-rw-r--r--net/sunrpc/auth_gss/trace.c1
-rw-r--r--net/sunrpc/clnt.c54
-rw-r--r--net/sunrpc/rpcb_clnt.c6
-rw-r--r--net/sunrpc/sunrpc.h1
-rw-r--r--net/sunrpc/sunrpc_syms.c2
-rw-r--r--net/sunrpc/svc.c29
-rw-r--r--net/sunrpc/svc_xprt.c57
-rw-r--r--net/sunrpc/svcauth.c25
-rw-r--r--net/sunrpc/svcauth_unix.c9
-rw-r--r--net/sunrpc/svcsock.c400
-rw-r--r--net/sunrpc/xprt.c23
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c121
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c21
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c92
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c10
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c55
-rw-r--r--net/sunrpc/xprtrdma/transport.c10
-rw-r--r--net/sunrpc/xprtrdma/verbs.c1
-rw-r--r--net/sunrpc/xprtsock.c12
-rw-r--r--net/switchdev/Kconfig2
-rw-r--r--net/tipc/Kconfig4
-rw-r--r--net/tipc/bearer.c2
-rw-r--r--net/tipc/msg.c6
-rw-r--r--net/tipc/socket.c3
-rw-r--r--net/tls/Kconfig2
-rw-r--r--net/tls/tls_main.c2
-rw-r--r--net/unix/Kconfig4
-rw-r--r--net/vmw_vsock/vmci_transport.c2
-rw-r--r--net/wireless/Kconfig14
-rw-r--r--net/wireless/core.c6
-rw-r--r--net/wireless/core.h2
-rw-r--r--net/wireless/mlme.c26
-rw-r--r--net/x25/Kconfig2
-rw-r--r--net/xdp/xdp_umem.c6
-rw-r--r--net/xdp/xsk.c4
-rw-r--r--net/xfrm/Kconfig14
156 files changed, 2107 insertions, 1831 deletions
diff --git a/net/6lowpan/Kconfig b/net/6lowpan/Kconfig
index 4c1f4c0aa58a..d8fc459492b0 100644
--- a/net/6lowpan/Kconfig
+++ b/net/6lowpan/Kconfig
@@ -2,7 +2,7 @@
menuconfig 6LOWPAN
tristate "6LoWPAN Support"
depends on IPV6
- ---help---
+ help
This enables IPv6 over Low power Wireless Personal Area Network -
"6LoWPAN" which is supported by IEEE 802.15.4 or Bluetooth stacks.
@@ -10,7 +10,7 @@ config 6LOWPAN_DEBUGFS
bool "6LoWPAN debugfs support"
depends on 6LOWPAN
depends on DEBUG_FS
- ---help---
+ help
This enables 6LoWPAN debugfs support. For example to manipulate
IPHC context information at runtime.
@@ -18,7 +18,7 @@ menuconfig 6LOWPAN_NHC
tristate "Next Header and Generic Header Compression Support"
depends on 6LOWPAN
default y
- ---help---
+ help
Support for next header and generic header compression defined in
RFC6282 and RFC7400.
@@ -27,78 +27,78 @@ if 6LOWPAN_NHC
config 6LOWPAN_NHC_DEST
tristate "Destination Options Header Support"
default y
- ---help---
+ help
6LoWPAN IPv6 Destination Options Header compression according to
RFC6282.
config 6LOWPAN_NHC_FRAGMENT
tristate "Fragment Header Support"
default y
- ---help---
+ help
6LoWPAN IPv6 Fragment Header compression according to RFC6282.
config 6LOWPAN_NHC_HOP
tristate "Hop-by-Hop Options Header Support"
default y
- ---help---
+ help
6LoWPAN IPv6 Hop-by-Hop Options Header compression according to
RFC6282.
config 6LOWPAN_NHC_IPV6
tristate "IPv6 Header Support"
default y
- ---help---
+ help
6LoWPAN IPv6 Header compression according to RFC6282.
config 6LOWPAN_NHC_MOBILITY
tristate "Mobility Header Support"
default y
- ---help---
+ help
6LoWPAN IPv6 Mobility Header compression according to RFC6282.
config 6LOWPAN_NHC_ROUTING
tristate "Routing Header Support"
default y
- ---help---
+ help
6LoWPAN IPv6 Routing Header compression according to RFC6282.
config 6LOWPAN_NHC_UDP
tristate "UDP Header Support"
default y
- ---help---
+ help
6LoWPAN IPv6 UDP Header compression according to RFC6282.
config 6LOWPAN_GHC_EXT_HDR_HOP
tristate "GHC Hop-by-Hop Options Header Support"
- ---help---
+ help
6LoWPAN IPv6 Hop-by-Hop option generic header compression according
to RFC7400.
config 6LOWPAN_GHC_UDP
tristate "GHC UDP Support"
- ---help---
+ help
6LoWPAN IPv6 UDP generic header compression according to RFC7400.
config 6LOWPAN_GHC_ICMPV6
tristate "GHC ICMPv6 Support"
- ---help---
+ help
6LoWPAN IPv6 ICMPv6 generic header compression according to RFC7400.
config 6LOWPAN_GHC_EXT_HDR_DEST
tristate "GHC Destination Options Header Support"
- ---help---
+ help
6LoWPAN IPv6 destination option generic header compression according
to RFC7400.
config 6LOWPAN_GHC_EXT_HDR_FRAG
tristate "GHC Fragmentation Options Header Support"
- ---help---
+ help
6LoWPAN IPv6 fragmentation option generic header compression
according to RFC7400.
config 6LOWPAN_GHC_EXT_HDR_ROUTE
tristate "GHC Routing Options Header Support"
- ---help---
+ help
6LoWPAN IPv6 routing option generic header compression according
to RFC7400.
diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig
index 5510b4b90ff0..8bf7a1765b78 100644
--- a/net/8021q/Kconfig
+++ b/net/8021q/Kconfig
@@ -5,7 +5,7 @@
config VLAN_8021Q
tristate "802.1Q/802.1ad VLAN Support"
- ---help---
+ help
Select this and you will be able to create 802.1Q VLAN interfaces
on your Ethernet interfaces. 802.1Q VLAN supports almost
everything a regular Ethernet interface does, including
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index f00bb57f0f60..c8d6a07e23c5 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -494,6 +494,7 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
* separate class since they always nest.
*/
static struct lock_class_key vlan_netdev_xmit_lock_key;
+static struct lock_class_key vlan_netdev_addr_lock_key;
static void vlan_dev_set_lockdep_one(struct net_device *dev,
struct netdev_queue *txq,
@@ -502,8 +503,11 @@ static void vlan_dev_set_lockdep_one(struct net_device *dev,
lockdep_set_class(&txq->_xmit_lock, &vlan_netdev_xmit_lock_key);
}
-static void vlan_dev_set_lockdep_class(struct net_device *dev)
+static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass)
{
+ lockdep_set_class_and_subclass(&dev->addr_list_lock,
+ &vlan_netdev_addr_lock_key,
+ subclass);
netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, NULL);
}
@@ -597,7 +601,7 @@ static int vlan_dev_init(struct net_device *dev)
SET_NETDEV_DEVTYPE(dev, &vlan_type);
- vlan_dev_set_lockdep_class(dev);
+ vlan_dev_set_lockdep_class(dev, dev->lower_level);
vlan->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats);
if (!vlan->vlan_pcpu_stats)
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 3963eb11c3fb..3debad93be1a 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -43,8 +43,8 @@
#include <net/9p/transport.h>
#define XEN_9PFS_NUM_RINGS 2
-#define XEN_9PFS_RING_ORDER 6
-#define XEN_9PFS_RING_SIZE XEN_FLEX_RING_SIZE(XEN_9PFS_RING_ORDER)
+#define XEN_9PFS_RING_ORDER 9
+#define XEN_9PFS_RING_SIZE(ring) XEN_FLEX_RING_SIZE(ring->intf->ring_order)
struct xen_9pfs_header {
uint32_t size;
@@ -132,8 +132,8 @@ static bool p9_xen_write_todo(struct xen_9pfs_dataring *ring, RING_IDX size)
prod = ring->intf->out_prod;
virt_mb();
- return XEN_9PFS_RING_SIZE -
- xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE) >= size;
+ return XEN_9PFS_RING_SIZE(ring) -
+ xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE(ring)) >= size;
}
static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
@@ -167,17 +167,18 @@ again:
prod = ring->intf->out_prod;
virt_mb();
- if (XEN_9PFS_RING_SIZE - xen_9pfs_queued(prod, cons,
- XEN_9PFS_RING_SIZE) < size) {
+ if (XEN_9PFS_RING_SIZE(ring) -
+ xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE(ring)) < size) {
spin_unlock_irqrestore(&ring->lock, flags);
goto again;
}
- masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE);
- masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
+ masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE(ring));
+ masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE(ring));
xen_9pfs_write_packet(ring->data.out, p9_req->tc.sdata, size,
- &masked_prod, masked_cons, XEN_9PFS_RING_SIZE);
+ &masked_prod, masked_cons,
+ XEN_9PFS_RING_SIZE(ring));
p9_req->status = REQ_STATUS_SENT;
virt_wmb(); /* write ring before updating pointer */
@@ -207,19 +208,19 @@ static void p9_xen_response(struct work_struct *work)
prod = ring->intf->in_prod;
virt_rmb();
- if (xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE) <
+ if (xen_9pfs_queued(prod, cons, XEN_9PFS_RING_SIZE(ring)) <
sizeof(h)) {
notify_remote_via_irq(ring->irq);
return;
}
- masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE);
- masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
+ masked_prod = xen_9pfs_mask(prod, XEN_9PFS_RING_SIZE(ring));
+ masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE(ring));
/* First, read just the header */
xen_9pfs_read_packet(&h, ring->data.in, sizeof(h),
masked_prod, &masked_cons,
- XEN_9PFS_RING_SIZE);
+ XEN_9PFS_RING_SIZE(ring));
req = p9_tag_lookup(priv->client, h.tag);
if (!req || req->status != REQ_STATUS_SENT) {
@@ -233,11 +234,11 @@ static void p9_xen_response(struct work_struct *work)
memcpy(&req->rc, &h, sizeof(h));
req->rc.offset = 0;
- masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE);
+ masked_cons = xen_9pfs_mask(cons, XEN_9PFS_RING_SIZE(ring));
/* Then, read the whole packet (including the header) */
xen_9pfs_read_packet(req->rc.sdata, ring->data.in, h.size,
masked_prod, &masked_cons,
- XEN_9PFS_RING_SIZE);
+ XEN_9PFS_RING_SIZE(ring));
virt_mb();
cons += h.size;
@@ -267,7 +268,7 @@ static irqreturn_t xen_9pfs_front_event_handler(int irq, void *r)
static struct p9_trans_module p9_xen_trans = {
.name = "xen",
- .maxsize = 1 << (XEN_9PFS_RING_ORDER + XEN_PAGE_SHIFT),
+ .maxsize = 1 << (XEN_9PFS_RING_ORDER + XEN_PAGE_SHIFT - 2),
.def = 1,
.create = p9_xen_create,
.close = p9_xen_close,
@@ -295,14 +296,16 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
if (priv->rings[i].irq > 0)
unbind_from_irqhandler(priv->rings[i].irq, priv->dev);
if (priv->rings[i].data.in) {
- for (j = 0; j < (1 << XEN_9PFS_RING_ORDER); j++) {
+ for (j = 0;
+ j < (1 << priv->rings[i].intf->ring_order);
+ j++) {
grant_ref_t ref;
ref = priv->rings[i].intf->ref[j];
gnttab_end_foreign_access(ref, 0, 0);
}
free_pages((unsigned long)priv->rings[i].data.in,
- XEN_9PFS_RING_ORDER -
+ priv->rings[i].intf->ring_order -
(PAGE_SHIFT - XEN_PAGE_SHIFT));
}
gnttab_end_foreign_access(priv->rings[i].ref, 0, 0);
@@ -323,7 +326,8 @@ static int xen_9pfs_front_remove(struct xenbus_device *dev)
}
static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
- struct xen_9pfs_dataring *ring)
+ struct xen_9pfs_dataring *ring,
+ unsigned int order)
{
int i = 0;
int ret = -ENOMEM;
@@ -342,21 +346,21 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
goto out;
ring->ref = ret;
bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- XEN_9PFS_RING_ORDER - (PAGE_SHIFT - XEN_PAGE_SHIFT));
+ order - (PAGE_SHIFT - XEN_PAGE_SHIFT));
if (!bytes) {
ret = -ENOMEM;
goto out;
}
- for (; i < (1 << XEN_9PFS_RING_ORDER); i++) {
+ for (; i < (1 << order); i++) {
ret = gnttab_grant_foreign_access(
dev->otherend_id, virt_to_gfn(bytes) + i, 0);
if (ret < 0)
goto out;
ring->intf->ref[i] = ret;
}
- ring->intf->ring_order = XEN_9PFS_RING_ORDER;
+ ring->intf->ring_order = order;
ring->data.in = bytes;
- ring->data.out = bytes + XEN_9PFS_RING_SIZE;
+ ring->data.out = bytes + XEN_FLEX_RING_SIZE(order);
ret = xenbus_alloc_evtchn(dev, &ring->evtchn);
if (ret)
@@ -374,7 +378,7 @@ out:
for (i--; i >= 0; i--)
gnttab_end_foreign_access(ring->intf->ref[i], 0, 0);
free_pages((unsigned long)bytes,
- XEN_9PFS_RING_ORDER -
+ ring->intf->ring_order -
(PAGE_SHIFT - XEN_PAGE_SHIFT));
}
gnttab_end_foreign_access(ring->ref, 0, 0);
@@ -404,8 +408,10 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev,
return -EINVAL;
max_ring_order = xenbus_read_unsigned(dev->otherend,
"max-ring-page-order", 0);
- if (max_ring_order < XEN_9PFS_RING_ORDER)
- return -EINVAL;
+ if (max_ring_order > XEN_9PFS_RING_ORDER)
+ max_ring_order = XEN_9PFS_RING_ORDER;
+ if (p9_xen_trans.maxsize > XEN_FLEX_RING_SIZE(max_ring_order))
+ p9_xen_trans.maxsize = XEN_FLEX_RING_SIZE(max_ring_order) / 2;
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
@@ -422,7 +428,8 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev,
for (i = 0; i < priv->num_rings; i++) {
priv->rings[i].priv = priv;
- ret = xen_9pfs_front_alloc_dataring(dev, &priv->rings[i]);
+ ret = xen_9pfs_front_alloc_dataring(dev, &priv->rings[i],
+ max_ring_order);
if (ret < 0)
goto error;
}
diff --git a/net/Kconfig b/net/Kconfig
index 5c524c6ee75d..d1672280d6a4 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -8,7 +8,7 @@ menuconfig NET
select NLATTR
select GENERIC_NET_UTILS
select BPF
- ---help---
+ help
Unless you really know what you are doing, you should say Y here.
The reason is that some programs need kernel networking support even
when running on a stand-alone machine that isn't connected to any
@@ -70,7 +70,7 @@ source "net/xdp/Kconfig"
config INET
bool "TCP/IP networking"
- ---help---
+ help
These are the protocols used on the Internet and on most local
Ethernets. It is highly recommended to say Y here (this will enlarge
your kernel by about 400 KB), since some programs (e.g. the X window
@@ -121,7 +121,7 @@ config NETWORK_PHY_TIMESTAMPING
menuconfig NETFILTER
bool "Network packet filtering framework (Netfilter)"
- ---help---
+ help
Netfilter is a framework for filtering and mangling network packets
that pass through your Linux box.
@@ -192,7 +192,7 @@ config BRIDGE_NETFILTER
depends on NETFILTER_ADVANCED
select NETFILTER_FAMILY_BRIDGE
select SKB_EXTENSIONS
- ---help---
+ help
Enabling this option will let arptables resp. iptables see bridged
ARP resp. IP traffic. If you want a bridging firewall, you probably
want this option enabled.
@@ -268,7 +268,7 @@ config CGROUP_NET_PRIO
bool "Network priority cgroup"
depends on CGROUPS
select SOCK_CGROUP_DATA
- ---help---
+ help
Cgroup subsystem for use in assigning processes to network priorities on
a per-interface basis.
@@ -276,7 +276,7 @@ config CGROUP_NET_CLASSID
bool "Network classid cgroup"
depends on CGROUPS
select SOCK_CGROUP_DATA
- ---help---
+ help
Cgroup subsystem for use as general purpose socket classid marker that is
being used in cls_cgroup and for netfilter matching.
@@ -294,7 +294,7 @@ config BPF_JIT
bool "enable BPF Just In Time compiler"
depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT
depends on MODULES
- ---help---
+ help
Berkeley Packet Filter filtering capabilities are normally handled
by an interpreter. This option allows kernel to generate a native
code when filter is loaded in memory. This should speedup
@@ -312,7 +312,7 @@ config BPF_STREAM_PARSER
depends on CGROUP_BPF
select STREAM_PARSER
select NET_SOCK_MSG
- ---help---
+ help
Enabling this allows a stream parser to be used with
BPF_MAP_TYPE_SOCKMAP.
@@ -324,7 +324,7 @@ config NET_FLOW_LIMIT
bool
depends on RPS
default y
- ---help---
+ help
The network stack has to drop packets when a receive processing CPU's
backlog reaches netdev_max_backlog. If a few out of many active flows
generate the vast majority of load, drop their traffic earlier to
@@ -337,7 +337,7 @@ menu "Network testing"
config NET_PKTGEN
tristate "Packet Generator (USE WITH CAUTION)"
depends on INET && PROC_FS
- ---help---
+ help
This module will inject preconfigured packets, at a configurable
rate, out of a given interface. It is used for network interface
stress testing and performance analysis. If you don't understand
@@ -352,7 +352,7 @@ config NET_PKTGEN
config NET_DROP_MONITOR
tristate "Network packet drop alerting service"
depends on INET && TRACEPOINTS
- ---help---
+ help
This feature provides an alerting service to userspace in the
event that packets are discarded in the network stack. Alerts
are broadcast via netlink socket to any listening user space
@@ -398,7 +398,7 @@ source "net/ife/Kconfig"
config LWTUNNEL
bool "Network light weight tunnels"
- ---help---
+ help
This feature provides an infrastructure to support light weight
tunnels like mpls. There is no netdevice associated with a light
weight tunnel endpoint. Tunnel encapsulation parameters are stored
@@ -408,7 +408,7 @@ config LWTUNNEL_BPF
bool "Execute BPF program as route nexthop action"
depends on LWTUNNEL && INET
default y if LWTUNNEL=y
- ---help---
+ help
Allows to run BPF programs as a nexthop action following a route
lookup for incoming and outgoing packets.
diff --git a/net/atm/Kconfig b/net/atm/Kconfig
index e61dcc9f85b2..77343d57ff2a 100644
--- a/net/atm/Kconfig
+++ b/net/atm/Kconfig
@@ -5,7 +5,7 @@
config ATM
tristate "Asynchronous Transfer Mode (ATM)"
- ---help---
+ help
ATM is a high-speed networking technology for Local Area Networks
and Wide Area Networks. It uses a fixed packet size and is
connection oriented, allowing for the negotiation of minimum
diff --git a/net/atm/lec.c b/net/atm/lec.c
index ca37f5a71f5e..875fc0bc1780 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1536,10 +1536,8 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv,
struct lec_arp_table *to_return;
to_return = kzalloc(sizeof(struct lec_arp_table), GFP_ATOMIC);
- if (!to_return) {
- pr_info("LEC: Arp entry kmalloc failed\n");
+ if (!to_return)
return NULL;
- }
ether_addr_copy(to_return->mac_addr, mac_addr);
INIT_HLIST_NODE(&to_return->next);
timer_setup(&to_return->timer, lec_arp_expire_arp, 0);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 0ddd80130ea3..f1f1c86f3419 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -745,6 +745,7 @@ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto,
* separate class since they always nest.
*/
static struct lock_class_key batadv_netdev_xmit_lock_key;
+static struct lock_class_key batadv_netdev_addr_lock_key;
/**
* batadv_set_lockdep_class_one() - Set lockdep class for a single tx queue
@@ -765,6 +766,7 @@ static void batadv_set_lockdep_class_one(struct net_device *dev,
*/
static void batadv_set_lockdep_class(struct net_device *dev)
{
+ lockdep_set_class(&dev->addr_list_lock, &batadv_netdev_addr_lock_key);
netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL);
}
diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig
index fed9290e3b41..84015ef3ee27 100644
--- a/net/bpfilter/Kconfig
+++ b/net/bpfilter/Kconfig
@@ -9,8 +9,12 @@ menuconfig BPFILTER
if BPFILTER
config BPFILTER_UMH
tristate "bpfilter kernel module with user mode helper"
- depends on CC_CAN_LINK
+ depends on CC_CAN_LINK_STATIC
default m
help
This builds bpfilter kernel module with embedded user mode helper
+
+ Note: your toolchain must support building static binaries, since
+ rootfs isn't mounted at the time when __init functions are called
+ and do_execv won't be able to find the elf interpreter.
endif
diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
index 36580301da70..f23b53294fba 100644
--- a/net/bpfilter/Makefile
+++ b/net/bpfilter/Makefile
@@ -3,17 +3,14 @@
# Makefile for the Linux BPFILTER layer.
#
-hostprogs := bpfilter_umh
+userprogs := bpfilter_umh
bpfilter_umh-objs := main.o
-KBUILD_HOSTCFLAGS += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi
-HOSTCC := $(CC)
+userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi
-ifeq ($(CONFIG_BPFILTER_UMH), y)
-# builtin bpfilter_umh should be compiled with -static
+# builtin bpfilter_umh should be linked with -static
# since rootfs isn't mounted at the time of __init
# function is called and do_execv won't find elf interpreter
-KBUILD_HOSTLDFLAGS += -static
-endif
+userldflags += -static
$(obj)/bpfilter_umh_blob.o: $(obj)/bpfilter_umh
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index 51a6414145d2..80879196560c 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -8,7 +8,7 @@ config BRIDGE
select LLC
select STP
depends on IPV6 || IPV6=n
- ---help---
+ help
If you say Y here, then your Linux box will be able to act as an
Ethernet bridge, which means that the different Ethernet segments it
is connected to will appear as one Ethernet to the participants.
@@ -39,7 +39,7 @@ config BRIDGE_IGMP_SNOOPING
depends on BRIDGE
depends on INET
default y
- ---help---
+ help
If you say Y here, then the Ethernet bridge will be able selectively
forward multicast traffic based on IGMP/MLD traffic received from
each port.
@@ -53,7 +53,7 @@ config BRIDGE_VLAN_FILTERING
depends on BRIDGE
depends on VLAN_8021Q
default n
- ---help---
+ help
If you say Y here, then the Ethernet bridge will be able selectively
receive and forward traffic based on VLAN information in the packet
any VLAN information configured on the bridge port or bridge device.
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 8ec1362588af..8c7b78f8bc23 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -105,6 +105,13 @@ out:
return NETDEV_TX_OK;
}
+static struct lock_class_key bridge_netdev_addr_lock_key;
+
+static void br_set_lockdep_class(struct net_device *dev)
+{
+ lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key);
+}
+
static int br_dev_init(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
@@ -143,6 +150,7 @@ static int br_dev_init(struct net_device *dev)
br_fdb_hash_fini(br);
}
+ br_set_lockdep_class(dev);
return err;
}
diff --git a/net/caif/Kconfig b/net/caif/Kconfig
index b7532a79ca7a..87205251cc25 100644
--- a/net/caif/Kconfig
+++ b/net/caif/Kconfig
@@ -7,7 +7,7 @@ menuconfig CAIF
tristate "CAIF support"
select CRC_CCITT
default n
- ---help---
+ help
The "Communication CPU to Application CPU Interface" (CAIF) is a packet
based connection-oriented MUX protocol developed by ST-Ericsson for use
with its modems. It is accessed from user space as sockets (PF_CAIF).
@@ -26,7 +26,7 @@ config CAIF_DEBUG
bool "Enable Debug"
depends on CAIF
default n
- ---help---
+ help
Enable the inclusion of debug code in the CAIF stack.
Be aware that doing this will impact performance.
If unsure say N.
@@ -35,7 +35,7 @@ config CAIF_NETDEV
tristate "CAIF GPRS Network device"
depends on CAIF
default CAIF
- ---help---
+ help
Say Y if you will be using a CAIF based GPRS network device.
This can be either built-in or a loadable module.
If you select to build it as a built-in then the main CAIF device must
@@ -46,7 +46,7 @@ config CAIF_USB
tristate "CAIF USB support"
depends on CAIF
default n
- ---help---
+ help
Say Y if you are using CAIF over USB CDC NCM.
This can be either built-in or a loadable module.
If you select to build it as a built-in then the main CAIF device must
diff --git a/net/can/Kconfig b/net/can/Kconfig
index d77042752457..25436a715db3 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -6,7 +6,7 @@
menuconfig CAN
depends on NET
tristate "CAN bus subsystem support"
- ---help---
+ help
Controller Area Network (CAN) is a slow (up to 1Mbit/s) serial
communications protocol. Development of the CAN bus started in
1983 at Robert Bosch GmbH, and the protocol was officially
@@ -23,7 +23,7 @@ if CAN
config CAN_RAW
tristate "Raw CAN Protocol (raw access with CAN-ID filtering)"
default y
- ---help---
+ help
The raw CAN protocol option offers access to the CAN bus via
the BSD socket API. You probably want to use the raw socket in
most cases where no higher level protocol is being used. The raw
@@ -33,7 +33,7 @@ config CAN_RAW
config CAN_BCM
tristate "Broadcast Manager CAN Protocol (with content filtering)"
default y
- ---help---
+ help
The Broadcast Manager offers content filtering, timeout monitoring,
sending of RTR frames, and cyclic CAN messages without permanent user
interaction. The BCM can be 'programmed' via the BSD socket API and
@@ -45,7 +45,7 @@ config CAN_BCM
config CAN_GW
tristate "CAN Gateway/Router (with netlink configuration)"
default y
- ---help---
+ help
The CAN Gateway/Router is used to route (and modify) CAN frames.
It is based on the PF_CAN core infrastructure for msg filtering and
msg sending and can optionally modify routed CAN frames on the fly.
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 66f22e8aa529..afe0e8184c23 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -176,6 +176,10 @@ int ceph_compare_options(struct ceph_options *new_opt,
}
}
+ ret = ceph_compare_crush_locs(&opt1->crush_locs, &opt2->crush_locs);
+ if (ret)
+ return ret;
+
/* any matching mon ip implies a match */
for (i = 0; i < opt1->num_mon; i++) {
if (ceph_monmap_contains(client->monc.monmap,
@@ -259,6 +263,8 @@ enum {
Opt_secret,
Opt_key,
Opt_ip,
+ Opt_crush_location,
+ Opt_read_from_replica,
/* string args above */
Opt_share,
Opt_crc,
@@ -268,11 +274,25 @@ enum {
Opt_abort_on_full,
};
+enum {
+ Opt_read_from_replica_no,
+ Opt_read_from_replica_balance,
+ Opt_read_from_replica_localize,
+};
+
+static const struct constant_table ceph_param_read_from_replica[] = {
+ {"no", Opt_read_from_replica_no},
+ {"balance", Opt_read_from_replica_balance},
+ {"localize", Opt_read_from_replica_localize},
+ {}
+};
+
static const struct fs_parameter_spec ceph_parameters[] = {
fsparam_flag ("abort_on_full", Opt_abort_on_full),
fsparam_flag_no ("cephx_require_signatures", Opt_cephx_require_signatures),
fsparam_flag_no ("cephx_sign_messages", Opt_cephx_sign_messages),
fsparam_flag_no ("crc", Opt_crc),
+ fsparam_string ("crush_location", Opt_crush_location),
fsparam_string ("fsid", Opt_fsid),
fsparam_string ("ip", Opt_ip),
fsparam_string ("key", Opt_key),
@@ -283,6 +303,8 @@ static const struct fs_parameter_spec ceph_parameters[] = {
fsparam_u32 ("osdkeepalive", Opt_osdkeepalivetimeout),
__fsparam (fs_param_is_s32, "osdtimeout", Opt_osdtimeout,
fs_param_deprecated, NULL),
+ fsparam_enum ("read_from_replica", Opt_read_from_replica,
+ ceph_param_read_from_replica),
fsparam_string ("secret", Opt_secret),
fsparam_flag_no ("share", Opt_share),
fsparam_flag_no ("tcp_nodelay", Opt_tcp_nodelay),
@@ -297,6 +319,7 @@ struct ceph_options *ceph_alloc_options(void)
if (!opt)
return NULL;
+ opt->crush_locs = RB_ROOT;
opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr),
GFP_KERNEL);
if (!opt->mon_addr) {
@@ -319,6 +342,7 @@ void ceph_destroy_options(struct ceph_options *opt)
if (!opt)
return;
+ ceph_clear_crush_locs(&opt->crush_locs);
kfree(opt->name);
if (opt->key) {
ceph_crypto_key_destroy(opt->key);
@@ -453,6 +477,34 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
if (!opt->key)
return -ENOMEM;
return get_secret(opt->key, param->string, &log);
+ case Opt_crush_location:
+ ceph_clear_crush_locs(&opt->crush_locs);
+ err = ceph_parse_crush_location(param->string,
+ &opt->crush_locs);
+ if (err) {
+ error_plog(&log, "Failed to parse CRUSH location: %d",
+ err);
+ return err;
+ }
+ break;
+ case Opt_read_from_replica:
+ switch (result.uint_32) {
+ case Opt_read_from_replica_no:
+ opt->osd_req_flags &= ~(CEPH_OSD_FLAG_BALANCE_READS |
+ CEPH_OSD_FLAG_LOCALIZE_READS);
+ break;
+ case Opt_read_from_replica_balance:
+ opt->osd_req_flags |= CEPH_OSD_FLAG_BALANCE_READS;
+ opt->osd_req_flags &= ~CEPH_OSD_FLAG_LOCALIZE_READS;
+ break;
+ case Opt_read_from_replica_localize:
+ opt->osd_req_flags |= CEPH_OSD_FLAG_LOCALIZE_READS;
+ opt->osd_req_flags &= ~CEPH_OSD_FLAG_BALANCE_READS;
+ break;
+ default:
+ BUG();
+ }
+ break;
case Opt_osdtimeout:
warn_plog(&log, "Ignoring osdtimeout");
@@ -535,6 +587,7 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
{
struct ceph_options *opt = client->options;
size_t pos = m->count;
+ struct rb_node *n;
if (opt->name) {
seq_puts(m, "name=");
@@ -544,6 +597,28 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
if (opt->key)
seq_puts(m, "secret=<hidden>,");
+ if (!RB_EMPTY_ROOT(&opt->crush_locs)) {
+ seq_puts(m, "crush_location=");
+ for (n = rb_first(&opt->crush_locs); ; ) {
+ struct crush_loc_node *loc =
+ rb_entry(n, struct crush_loc_node, cl_node);
+
+ seq_printf(m, "%s:%s", loc->cl_loc.cl_type_name,
+ loc->cl_loc.cl_name);
+ n = rb_next(n);
+ if (!n)
+ break;
+
+ seq_putc(m, '|');
+ }
+ seq_putc(m, ',');
+ }
+ if (opt->osd_req_flags & CEPH_OSD_FLAG_BALANCE_READS) {
+ seq_puts(m, "read_from_replica=balance,");
+ } else if (opt->osd_req_flags & CEPH_OSD_FLAG_LOCALIZE_READS) {
+ seq_puts(m, "read_from_replica=localize,");
+ }
+
if (opt->flags & CEPH_OPT_FSID)
seq_printf(m, "fsid=%pU,", &opt->fsid);
if (opt->flags & CEPH_OPT_NOSHARE)
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 3d70244bc1b6..254ded0b05f6 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -2,7 +2,6 @@
#ifdef __KERNEL__
# include <linux/slab.h>
# include <linux/crush/crush.h>
-void clear_choose_args(struct crush_map *c);
#else
# include "crush_compat.h"
# include "crush.h"
@@ -130,6 +129,8 @@ void crush_destroy(struct crush_map *map)
#ifndef __KERNEL__
kfree(map->choose_tries);
#else
+ clear_crush_names(&map->type_names);
+ clear_crush_names(&map->names);
clear_choose_args(map);
#endif
kfree(map);
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 1344f232ecc5..409d505ff320 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -81,11 +81,13 @@ static int osdmap_show(struct seq_file *s, void *p)
u32 state = map->osd_state[i];
char sb[64];
- seq_printf(s, "osd%d\t%s\t%3d%%\t(%s)\t%3d%%\n",
+ seq_printf(s, "osd%d\t%s\t%3d%%\t(%s)\t%3d%%\t%2d\n",
i, ceph_pr_addr(addr),
((map->osd_weight[i]*100) >> 16),
ceph_osdmap_state_str(sb, sizeof(sb), state),
- ((ceph_get_primary_affinity(map, i)*100) >> 16));
+ ((ceph_get_primary_affinity(map, i)*100) >> 16),
+ ceph_get_crush_locality(map, i,
+ &client->options->crush_locs));
}
for (n = rb_first(&map->pg_temp); n; n = rb_next(n)) {
struct ceph_pg_mapping *pg =
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 1d4973f8cd7a..4fea3c33af2a 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -932,10 +932,14 @@ static void osd_req_op_watch_init(struct ceph_osd_request *req, int which,
op->watch.gen = 0;
}
+/*
+ * @flags: CEPH_OSD_OP_ALLOC_HINT_FLAG_*
+ */
void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
unsigned int which,
u64 expected_object_size,
- u64 expected_write_size)
+ u64 expected_write_size,
+ u32 flags)
{
struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
CEPH_OSD_OP_SETALLOCHINT,
@@ -943,6 +947,7 @@ void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
op->alloc_hint.expected_object_size = expected_object_size;
op->alloc_hint.expected_write_size = expected_write_size;
+ op->alloc_hint.flags = flags;
/*
* CEPH_OSD_OP_SETALLOCHINT op is advisory and therefore deemed
@@ -1018,6 +1023,7 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
cpu_to_le64(src->alloc_hint.expected_object_size);
dst->alloc_hint.expected_write_size =
cpu_to_le64(src->alloc_hint.expected_write_size);
+ dst->alloc_hint.flags = cpu_to_le32(src->alloc_hint.flags);
break;
case CEPH_OSD_OP_SETXATTR:
case CEPH_OSD_OP_CMPXATTR:
@@ -1497,6 +1503,45 @@ static bool target_should_be_paused(struct ceph_osd_client *osdc,
(osdc->osdmap->epoch < osdc->epoch_barrier);
}
+static int pick_random_replica(const struct ceph_osds *acting)
+{
+ int i = prandom_u32() % acting->size;
+
+ dout("%s picked osd%d, primary osd%d\n", __func__,
+ acting->osds[i], acting->primary);
+ return i;
+}
+
+/*
+ * Picks the closest replica based on client's location given by
+ * crush_location option. Prefers the primary if the locality is
+ * the same.
+ */
+static int pick_closest_replica(struct ceph_osd_client *osdc,
+ const struct ceph_osds *acting)
+{
+ struct ceph_options *opt = osdc->client->options;
+ int best_i, best_locality;
+ int i = 0, locality;
+
+ do {
+ locality = ceph_get_crush_locality(osdc->osdmap,
+ acting->osds[i],
+ &opt->crush_locs);
+ if (i == 0 ||
+ (locality >= 0 && best_locality < 0) ||
+ (locality >= 0 && best_locality >= 0 &&
+ locality < best_locality)) {
+ best_i = i;
+ best_locality = locality;
+ }
+ } while (++i < acting->size);
+
+ dout("%s picked osd%d with locality %d, primary osd%d\n", __func__,
+ acting->osds[best_i], best_locality, acting->primary);
+ return best_i;
+}
+
enum calc_target_result {
CALC_TARGET_NO_ACTION = 0,
CALC_TARGET_NEED_RESEND,
@@ -1510,6 +1555,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
struct ceph_pg_pool_info *pi;
struct ceph_pg pgid, last_pgid;
struct ceph_osds up, acting;
+ bool is_read = t->flags & CEPH_OSD_FLAG_READ;
+ bool is_write = t->flags & CEPH_OSD_FLAG_WRITE;
bool force_resend = false;
bool unpaused = false;
bool legacy_change = false;
@@ -1540,9 +1587,9 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
ceph_oid_copy(&t->target_oid, &t->base_oid);
ceph_oloc_copy(&t->target_oloc, &t->base_oloc);
if ((t->flags & CEPH_OSD_FLAG_IGNORE_OVERLAY) == 0) {
- if (t->flags & CEPH_OSD_FLAG_READ && pi->read_tier >= 0)
+ if (is_read && pi->read_tier >= 0)
t->target_oloc.pool = pi->read_tier;
- if (t->flags & CEPH_OSD_FLAG_WRITE && pi->write_tier >= 0)
+ if (is_write && pi->write_tier >= 0)
t->target_oloc.pool = pi->write_tier;
pi = ceph_pg_pool_by_id(osdc->osdmap, t->target_oloc.pool);
@@ -1581,7 +1628,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
unpaused = true;
}
legacy_change = ceph_pg_compare(&t->pgid, &pgid) ||
- ceph_osds_changed(&t->acting, &acting, any_change);
+ ceph_osds_changed(&t->acting, &acting,
+ t->used_replica || any_change);
if (t->pg_num)
split = ceph_pg_is_split(&last_pgid, t->pg_num, pi->pg_num);
@@ -1597,7 +1645,24 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
t->sort_bitwise = sort_bitwise;
t->recovery_deletes = recovery_deletes;
- t->osd = acting.primary;
+ if ((t->flags & (CEPH_OSD_FLAG_BALANCE_READS |
+ CEPH_OSD_FLAG_LOCALIZE_READS)) &&
+ !is_write && pi->type == CEPH_POOL_TYPE_REP &&
+ acting.size > 1) {
+ int pos;
+
+ WARN_ON(!is_read || acting.osds[0] != acting.primary);
+ if (t->flags & CEPH_OSD_FLAG_BALANCE_READS) {
+ pos = pick_random_replica(&acting);
+ } else {
+ pos = pick_closest_replica(osdc, &acting);
+ }
+ t->osd = acting.osds[pos];
+ t->used_replica = pos > 0;
+ } else {
+ t->osd = acting.primary;
+ t->used_replica = false;
+ }
}
if (unpaused || legacy_change || force_resend || split)
@@ -2366,13 +2431,17 @@ promote:
static void account_request(struct ceph_osd_request *req)
{
+ struct ceph_osd_client *osdc = req->r_osdc;
+
WARN_ON(req->r_flags & (CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK));
WARN_ON(!(req->r_flags & (CEPH_OSD_FLAG_READ | CEPH_OSD_FLAG_WRITE)));
req->r_flags |= CEPH_OSD_FLAG_ONDISK;
- atomic_inc(&req->r_osdc->num_requests);
+ req->r_flags |= osdc->client->options->osd_req_flags;
+ atomic_inc(&osdc->num_requests);
req->r_start_stamp = jiffies;
+ req->r_start_latency = ktime_get();
}
static void submit_request(struct ceph_osd_request *req, bool wrlocked)
@@ -2389,6 +2458,8 @@ static void finish_request(struct ceph_osd_request *req)
WARN_ON(lookup_request_mc(&osdc->map_checks, req->r_tid));
dout("%s req %p tid %llu\n", __func__, req, req->r_tid);
+ req->r_end_latency = ktime_get();
+
if (req->r_osd)
unlink_request(req->r_osd, req);
atomic_dec(&osdc->num_requests);
@@ -3657,6 +3728,26 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
goto out_unlock_osdc;
}
+ if (m.result == -EAGAIN) {
+ dout("req %p tid %llu EAGAIN\n", req, req->r_tid);
+ unlink_request(osd, req);
+ mutex_unlock(&osd->lock);
+
+ /*
+ * The object is missing on the replica or not (yet)
+ * readable. Clear pgid to force a resend to the primary
+ * via legacy_change.
+ */
+ req->r_t.pgid.pool = 0;
+ req->r_t.pgid.seed = 0;
+ WARN_ON(!req->r_t.used_replica);
+ req->r_flags &= ~(CEPH_OSD_FLAG_BALANCE_READS |
+ CEPH_OSD_FLAG_LOCALIZE_READS);
+ req->r_tid = 0;
+ __submit_request(req, false);
+ goto out_unlock_osdc;
+ }
+
if (m.num_ops != req->r_num_ops) {
pr_err("num_ops %d != %d for tid %llu\n", m.num_ops,
req->r_num_ops, req->r_tid);
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 2a6e63a8edbe..96c25f5e064a 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -138,6 +138,79 @@ bad:
return -EINVAL;
}
+struct crush_name_node {
+ struct rb_node cn_node;
+ int cn_id;
+ char cn_name[];
+};
+
+static struct crush_name_node *alloc_crush_name(size_t name_len)
+{
+ struct crush_name_node *cn;
+
+ cn = kmalloc(sizeof(*cn) + name_len + 1, GFP_NOIO);
+ if (!cn)
+ return NULL;
+
+ RB_CLEAR_NODE(&cn->cn_node);
+ return cn;
+}
+
+static void free_crush_name(struct crush_name_node *cn)
+{
+ WARN_ON(!RB_EMPTY_NODE(&cn->cn_node));
+
+ kfree(cn);
+}
+
+DEFINE_RB_FUNCS(crush_name, struct crush_name_node, cn_id, cn_node)
+
+static int decode_crush_names(void **p, void *end, struct rb_root *root)
+{
+ u32 n;
+
+ ceph_decode_32_safe(p, end, n, e_inval);
+ while (n--) {
+ struct crush_name_node *cn;
+ int id;
+ u32 name_len;
+
+ ceph_decode_32_safe(p, end, id, e_inval);
+ ceph_decode_32_safe(p, end, name_len, e_inval);
+ ceph_decode_need(p, end, name_len, e_inval);
+
+ cn = alloc_crush_name(name_len);
+ if (!cn)
+ return -ENOMEM;
+
+ cn->cn_id = id;
+ memcpy(cn->cn_name, *p, name_len);
+ cn->cn_name[name_len] = '\0';
+ *p += name_len;
+
+ if (!__insert_crush_name(root, cn)) {
+ free_crush_name(cn);
+ return -EEXIST;
+ }
+ }
+
+ return 0;
+
+e_inval:
+ return -EINVAL;
+}
+
+void clear_crush_names(struct rb_root *root)
+{
+ while (!RB_EMPTY_ROOT(root)) {
+ struct crush_name_node *cn =
+ rb_entry(rb_first(root), struct crush_name_node, cn_node);
+
+ erase_crush_name(root, cn);
+ free_crush_name(cn);
+ }
+}
+
static struct crush_choose_arg_map *alloc_choose_arg_map(void)
{
struct crush_choose_arg_map *arg_map;
@@ -354,6 +427,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
if (c == NULL)
return ERR_PTR(-ENOMEM);
+ c->type_names = RB_ROOT;
+ c->names = RB_ROOT;
c->choose_args = RB_ROOT;
/* set tunables to default values */
@@ -510,8 +585,14 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
}
}
- ceph_decode_skip_map(p, end, 32, string, bad); /* type_map */
- ceph_decode_skip_map(p, end, 32, string, bad); /* name_map */
+ err = decode_crush_names(p, end, &c->type_names);
+ if (err)
+ goto fail;
+
+ err = decode_crush_names(p, end, &c->names);
+ if (err)
+ goto fail;
+
ceph_decode_skip_map(p, end, 32, string, bad); /* rule_name_map */
/* tunables */
@@ -636,48 +717,11 @@ DEFINE_RB_FUNCS2(pg_mapping, struct ceph_pg_mapping, pgid, ceph_pg_compare,
/*
* rbtree of pg pool info
*/
-static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new)
-{
- struct rb_node **p = &root->rb_node;
- struct rb_node *parent = NULL;
- struct ceph_pg_pool_info *pi = NULL;
-
- while (*p) {
- parent = *p;
- pi = rb_entry(parent, struct ceph_pg_pool_info, node);
- if (new->id < pi->id)
- p = &(*p)->rb_left;
- else if (new->id > pi->id)
- p = &(*p)->rb_right;
- else
- return -EEXIST;
- }
-
- rb_link_node(&new->node, parent, p);
- rb_insert_color(&new->node, root);
- return 0;
-}
-
-static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, u64 id)
-{
- struct ceph_pg_pool_info *pi;
- struct rb_node *n = root->rb_node;
-
- while (n) {
- pi = rb_entry(n, struct ceph_pg_pool_info, node);
- if (id < pi->id)
- n = n->rb_left;
- else if (id > pi->id)
- n = n->rb_right;
- else
- return pi;
- }
- return NULL;
-}
+DEFINE_RB_FUNCS(pg_pool, struct ceph_pg_pool_info, id, node)
struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map, u64 id)
{
- return __lookup_pg_pool(&map->pg_pools, id);
+ return lookup_pg_pool(&map->pg_pools, id);
}
const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id)
@@ -690,8 +734,7 @@ const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id)
if (WARN_ON_ONCE(id > (u64) INT_MAX))
return NULL;
- pi = __lookup_pg_pool(&map->pg_pools, (int) id);
-
+ pi = lookup_pg_pool(&map->pg_pools, id);
return pi ? pi->name : NULL;
}
EXPORT_SYMBOL(ceph_pg_pool_name_by_id);
@@ -714,14 +757,14 @@ u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id)
{
struct ceph_pg_pool_info *pi;
- pi = __lookup_pg_pool(&map->pg_pools, id);
+ pi = lookup_pg_pool(&map->pg_pools, id);
return pi ? pi->flags : 0;
}
EXPORT_SYMBOL(ceph_pg_pool_flags);
static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
{
- rb_erase(&pi->node, root);
+ erase_pg_pool(root, pi);
kfree(pi->name);
kfree(pi);
}
@@ -903,7 +946,7 @@ static int decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
ceph_decode_32_safe(p, end, len, bad);
dout(" pool %llu len %d\n", pool, len);
ceph_decode_need(p, end, len, bad);
- pi = __lookup_pg_pool(&map->pg_pools, pool);
+ pi = lookup_pg_pool(&map->pg_pools, pool);
if (pi) {
char *name = kstrndup(*p, len, GFP_NOFS);
@@ -1154,18 +1197,18 @@ static int __decode_pools(void **p, void *end, struct ceph_osdmap *map,
ceph_decode_64_safe(p, end, pool, e_inval);
- pi = __lookup_pg_pool(&map->pg_pools, pool);
+ pi = lookup_pg_pool(&map->pg_pools, pool);
if (!incremental || !pi) {
pi = kzalloc(sizeof(*pi), GFP_NOFS);
if (!pi)
return -ENOMEM;
+ RB_CLEAR_NODE(&pi->node);
pi->id = pool;
- ret = __insert_pg_pool(&map->pg_pools, pi);
- if (ret) {
+ if (!__insert_pg_pool(&map->pg_pools, pi)) {
kfree(pi);
- return ret;
+ return -EEXIST;
}
}
@@ -1829,7 +1872,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
struct ceph_pg_pool_info *pi;
ceph_decode_64_safe(p, end, pool, e_inval);
- pi = __lookup_pg_pool(&map->pg_pools, pool);
+ pi = lookup_pg_pool(&map->pg_pools, pool);
if (pi)
__remove_pg_pool(&map->pg_pools, pi);
}
@@ -2672,3 +2715,221 @@ int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
return acting.primary;
}
EXPORT_SYMBOL(ceph_pg_to_acting_primary);
+
+static struct crush_loc_node *alloc_crush_loc(size_t type_name_len,
+ size_t name_len)
+{
+ struct crush_loc_node *loc;
+
+ loc = kmalloc(sizeof(*loc) + type_name_len + name_len + 2, GFP_NOIO);
+ if (!loc)
+ return NULL;
+
+ RB_CLEAR_NODE(&loc->cl_node);
+ return loc;
+}
+
+static void free_crush_loc(struct crush_loc_node *loc)
+{
+ WARN_ON(!RB_EMPTY_NODE(&loc->cl_node));
+
+ kfree(loc);
+}
+
+static int crush_loc_compare(const struct crush_loc *loc1,
+ const struct crush_loc *loc2)
+{
+ return strcmp(loc1->cl_type_name, loc2->cl_type_name) ?:
+ strcmp(loc1->cl_name, loc2->cl_name);
+}
+
+DEFINE_RB_FUNCS2(crush_loc, struct crush_loc_node, cl_loc, crush_loc_compare,
+ RB_BYPTR, const struct crush_loc *, cl_node)
+
+/*
+ * Parses a set of <bucket type name>':'<bucket name> pairs separated
+ * by '|', e.g. "rack:foo1|rack:foo2|datacenter:bar".
+ *
+ * Note that @crush_location is modified by strsep().
+ */
+int ceph_parse_crush_location(char *crush_location, struct rb_root *locs)
+{
+ struct crush_loc_node *loc;
+ const char *type_name, *name, *colon;
+ size_t type_name_len, name_len;
+
+ dout("%s '%s'\n", __func__, crush_location);
+ while ((type_name = strsep(&crush_location, "|"))) {
+ colon = strchr(type_name, ':');
+ if (!colon)
+ return -EINVAL;
+
+ type_name_len = colon - type_name;
+ if (type_name_len == 0)
+ return -EINVAL;
+
+ name = colon + 1;
+ name_len = strlen(name);
+ if (name_len == 0)
+ return -EINVAL;
+
+ loc = alloc_crush_loc(type_name_len, name_len);
+ if (!loc)
+ return -ENOMEM;
+
+ loc->cl_loc.cl_type_name = loc->cl_data;
+ memcpy(loc->cl_loc.cl_type_name, type_name, type_name_len);
+ loc->cl_loc.cl_type_name[type_name_len] = '\0';
+
+ loc->cl_loc.cl_name = loc->cl_data + type_name_len + 1;
+ memcpy(loc->cl_loc.cl_name, name, name_len);
+ loc->cl_loc.cl_name[name_len] = '\0';
+
+ if (!__insert_crush_loc(locs, loc)) {
+ free_crush_loc(loc);
+ return -EEXIST;
+ }
+
+ dout("%s type_name '%s' name '%s'\n", __func__,
+ loc->cl_loc.cl_type_name, loc->cl_loc.cl_name);
+ }
+
+ return 0;
+}
+
+int ceph_compare_crush_locs(struct rb_root *locs1, struct rb_root *locs2)
+{
+ struct rb_node *n1 = rb_first(locs1);
+ struct rb_node *n2 = rb_first(locs2);
+ int ret;
+
+ for ( ; n1 && n2; n1 = rb_next(n1), n2 = rb_next(n2)) {
+ struct crush_loc_node *loc1 =
+ rb_entry(n1, struct crush_loc_node, cl_node);
+ struct crush_loc_node *loc2 =
+ rb_entry(n2, struct crush_loc_node, cl_node);
+
+ ret = crush_loc_compare(&loc1->cl_loc, &loc2->cl_loc);
+ if (ret)
+ return ret;
+ }
+
+ if (!n1 && n2)
+ return -1;
+ if (n1 && !n2)
+ return 1;
+ return 0;
+}
+
+void ceph_clear_crush_locs(struct rb_root *locs)
+{
+ while (!RB_EMPTY_ROOT(locs)) {
+ struct crush_loc_node *loc =
+ rb_entry(rb_first(locs), struct crush_loc_node, cl_node);
+
+ erase_crush_loc(locs, loc);
+ free_crush_loc(loc);
+ }
+}
+
+/*
+ * [a-zA-Z0-9-_.]+
+ */
+static bool is_valid_crush_name(const char *name)
+{
+ do {
+ if (!('a' <= *name && *name <= 'z') &&
+ !('A' <= *name && *name <= 'Z') &&
+ !('0' <= *name && *name <= '9') &&
+ *name != '-' && *name != '_' && *name != '.')
+ return false;
+ } while (*++name != '\0');
+
+ return true;
+}
+
+/*
+ * Gets the parent of an item. Returns its id (<0 because the
+ * parent is always a bucket), type id (>0 for the same reason,
+ * via @parent_type_id) and location (via @parent_loc). If no
+ * parent, returns 0.
+ *
+ * Does a linear search, as there are no parent pointers of any
+ * kind. Note that the result is ambigous for items that occur
+ * multiple times in the map.
+ */
+static int get_immediate_parent(struct crush_map *c, int id,
+ u16 *parent_type_id,
+ struct crush_loc *parent_loc)
+{
+ struct crush_bucket *b;
+ struct crush_name_node *type_cn, *cn;
+ int i, j;
+
+ for (i = 0; i < c->max_buckets; i++) {
+ b = c->buckets[i];
+ if (!b)
+ continue;
+
+ /* ignore per-class shadow hierarchy */
+ cn = lookup_crush_name(&c->names, b->id);
+ if (!cn || !is_valid_crush_name(cn->cn_name))
+ continue;
+
+ for (j = 0; j < b->size; j++) {
+ if (b->items[j] != id)
+ continue;
+
+ *parent_type_id = b->type;
+ type_cn = lookup_crush_name(&c->type_names, b->type);
+ parent_loc->cl_type_name = type_cn->cn_name;
+ parent_loc->cl_name = cn->cn_name;
+ return b->id;
+ }
+ }
+
+ return 0; /* no parent */
+}
+
+/*
+ * Calculates the locality/distance from an item to a client
+ * location expressed in terms of CRUSH hierarchy as a set of
+ * (bucket type name, bucket name) pairs. Specifically, looks
+ * for the lowest-valued bucket type for which the location of
+ * @id matches one of the locations in @locs, so for standard
+ * bucket types (host = 1, rack = 3, datacenter = 8, zone = 9)
+ * a matching host is closer than a matching rack and a matching
+ * data center is closer than a matching zone.
+ *
+ * Specifying multiple locations (a "multipath" location) such
+ * as "rack=foo1 rack=foo2 datacenter=bar" is allowed -- @locs
+ * is a multimap. The locality will be:
+ *
+ * - 3 for OSDs in racks foo1 and foo2
+ * - 8 for OSDs in data center bar
+ * - -1 for all other OSDs
+ *
+ * The lowest possible bucket type is 1, so the best locality
+ * for an OSD is 1 (i.e. a matching host). Locality 0 would be
+ * the OSD itself.
+ */
+int ceph_get_crush_locality(struct ceph_osdmap *osdmap, int id,
+ struct rb_root *locs)
+{
+ struct crush_loc loc;
+ u16 type_id;
+
+ /*
+ * Instead of repeated get_immediate_parent() calls,
+ * the location of @id could be obtained with a single
+ * depth-first traversal.
+ */
+ for (;;) {
+ id = get_immediate_parent(osdmap->crush, id, &type_id, &loc);
+ if (id >= 0)
+ return -1; /* not local */
+
+ if (lookup_crush_loc(locs, &loc))
+ return type_id;
+ }
+}
diff --git a/net/core/dev.c b/net/core/dev.c
index 10684833f864..90b59fc50dc9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -79,6 +79,7 @@
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/mutex.h>
+#include <linux/rwsem.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
@@ -194,7 +195,7 @@ static DEFINE_SPINLOCK(napi_hash_lock);
static unsigned int napi_gen_id = NR_CPUS;
static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
-static seqcount_t devnet_rename_seq;
+static DECLARE_RWSEM(devnet_rename_sem);
static inline void dev_base_seq_inc(struct net *net)
{
@@ -438,6 +439,7 @@ static const char *const netdev_lock_name[] = {
"_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
+static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
static inline unsigned short netdev_lock_pos(unsigned short dev_type)
{
@@ -459,11 +461,25 @@ static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
netdev_lock_name[i]);
}
+
+static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
+{
+ int i;
+
+ i = netdev_lock_pos(dev->type);
+ lockdep_set_class_and_name(&dev->addr_list_lock,
+ &netdev_addr_lock_key[i],
+ netdev_lock_name[i]);
+}
#else
static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
unsigned short dev_type)
{
}
+
+static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
+{
+}
#endif
/*******************************************************************************
@@ -998,33 +1014,28 @@ EXPORT_SYMBOL(dev_get_by_napi_id);
* @net: network namespace
* @name: a pointer to the buffer where the name will be stored.
* @ifindex: the ifindex of the interface to get the name from.
- *
- * The use of raw_seqcount_begin() and cond_resched() before
- * retrying is required as we want to give the writers a chance
- * to complete when CONFIG_PREEMPTION is not set.
*/
int netdev_get_name(struct net *net, char *name, int ifindex)
{
struct net_device *dev;
- unsigned int seq;
+ int ret;
-retry:
- seq = raw_seqcount_begin(&devnet_rename_seq);
+ down_read(&devnet_rename_sem);
rcu_read_lock();
+
dev = dev_get_by_index_rcu(net, ifindex);
if (!dev) {
- rcu_read_unlock();
- return -ENODEV;
+ ret = -ENODEV;
+ goto out;
}
strcpy(name, dev->name);
- rcu_read_unlock();
- if (read_seqcount_retry(&devnet_rename_seq, seq)) {
- cond_resched();
- goto retry;
- }
- return 0;
+ ret = 0;
+out:
+ rcu_read_unlock();
+ up_read(&devnet_rename_sem);
+ return ret;
}
/**
@@ -1296,10 +1307,10 @@ int dev_change_name(struct net_device *dev, const char *newname)
likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK)))
return -EBUSY;
- write_seqcount_begin(&devnet_rename_seq);
+ down_write(&devnet_rename_sem);
if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
- write_seqcount_end(&devnet_rename_seq);
+ up_write(&devnet_rename_sem);
return 0;
}
@@ -1307,7 +1318,7 @@ int dev_change_name(struct net_device *dev, const char *newname)
err = dev_get_valid_name(net, dev, newname);
if (err < 0) {
- write_seqcount_end(&devnet_rename_seq);
+ up_write(&devnet_rename_sem);
return err;
}
@@ -1322,11 +1333,11 @@ rollback:
if (ret) {
memcpy(dev->name, oldname, IFNAMSIZ);
dev->name_assign_type = old_assign_type;
- write_seqcount_end(&devnet_rename_seq);
+ up_write(&devnet_rename_sem);
return ret;
}
- write_seqcount_end(&devnet_rename_seq);
+ up_write(&devnet_rename_sem);
netdev_adjacent_rename_links(dev, oldname);
@@ -1347,7 +1358,7 @@ rollback:
/* err >= 0 after dev_alloc_name() or stores the first errno */
if (err >= 0) {
err = ret;
- write_seqcount_begin(&devnet_rename_seq);
+ down_write(&devnet_rename_sem);
memcpy(dev->name, oldname, IFNAMSIZ);
memcpy(oldname, newname, IFNAMSIZ);
dev->name_assign_type = old_assign_type;
@@ -4181,10 +4192,12 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
local_bh_disable();
+ dev_xmit_recursion_inc();
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (!netif_xmit_frozen_or_drv_stopped(txq))
ret = netdev_start_xmit(skb, dev, txq, false);
HARD_TX_UNLOCK(dev, txq);
+ dev_xmit_recursion_dec();
local_bh_enable();
@@ -9377,15 +9390,6 @@ void netif_tx_stop_all_queues(struct net_device *dev)
}
EXPORT_SYMBOL(netif_tx_stop_all_queues);
-void netdev_update_lockdep_key(struct net_device *dev)
-{
- lockdep_unregister_key(&dev->addr_list_lock_key);
- lockdep_register_key(&dev->addr_list_lock_key);
-
- lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key);
-}
-EXPORT_SYMBOL(netdev_update_lockdep_key);
-
/**
* register_netdevice - register a network device
* @dev: device to register
@@ -9424,7 +9428,7 @@ int register_netdevice(struct net_device *dev)
return ret;
spin_lock_init(&dev->addr_list_lock);
- lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key);
+ netdev_set_addr_lockdep_class(dev);
ret = dev_get_valid_name(net, dev, dev->name);
if (ret < 0)
@@ -9545,6 +9549,13 @@ int register_netdevice(struct net_device *dev)
rcu_barrier();
dev->reg_state = NETREG_UNREGISTERED;
+ /* We should put the kobject that hold in
+ * netdev_unregister_kobject(), otherwise
+ * the net device cannot be freed when
+ * driver calls free_netdev(), because the
+ * kobject is being hold.
+ */
+ kobject_put(&dev->dev.kobj);
}
/*
* Prevent userspace races by waiting until the network
@@ -9943,8 +9954,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev_net_set(dev, &init_net);
- lockdep_register_key(&dev->addr_list_lock_key);
-
dev->gso_max_size = GSO_MAX_SIZE;
dev->gso_max_segs = GSO_MAX_SEGS;
dev->upper_level = 1;
@@ -10032,8 +10041,6 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->xdp_bulkq);
dev->xdp_bulkq = NULL;
- lockdep_unregister_key(&dev->addr_list_lock_key);
-
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
netdev_freemem(dev);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 2f949b5a1eb9..6393ba930097 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -637,7 +637,7 @@ int dev_uc_sync(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock(to);
+ netif_addr_lock_nested(to);
err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -667,7 +667,7 @@ int dev_uc_sync_multiple(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock(to);
+ netif_addr_lock_nested(to);
err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -691,7 +691,7 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from)
return;
netif_addr_lock_bh(from);
- netif_addr_lock(to);
+ netif_addr_lock_nested(to);
__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
__dev_set_rx_mode(to);
netif_addr_unlock(to);
@@ -858,7 +858,7 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock(to);
+ netif_addr_lock_nested(to);
err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -888,7 +888,7 @@ int dev_mc_sync_multiple(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock(to);
+ netif_addr_lock_nested(to);
err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -912,7 +912,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from)
return;
netif_addr_lock_bh(from);
- netif_addr_lock(to);
+ netif_addr_lock_nested(to);
__hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
__dev_set_rx_mode(to);
netif_addr_unlock(to);
diff --git a/net/core/filter.c b/net/core/filter.c
index d01a244b5087..73395384afe2 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1755,25 +1755,27 @@ BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
u32, offset, void *, to, u32, len, u32, start_header)
{
u8 *end = skb_tail_pointer(skb);
- u8 *net = skb_network_header(skb);
- u8 *mac = skb_mac_header(skb);
- u8 *ptr;
+ u8 *start, *ptr;
- if (unlikely(offset > 0xffff || len > (end - mac)))
+ if (unlikely(offset > 0xffff))
goto err_clear;
switch (start_header) {
case BPF_HDR_START_MAC:
- ptr = mac + offset;
+ if (unlikely(!skb_mac_header_was_set(skb)))
+ goto err_clear;
+ start = skb_mac_header(skb);
break;
case BPF_HDR_START_NET:
- ptr = net + offset;
+ start = skb_network_header(skb);
break;
default:
goto err_clear;
}
- if (likely(ptr >= mac && ptr + len <= end)) {
+ ptr = start + offset;
+
+ if (likely(ptr + len <= end)) {
memcpy(to, ptr, len);
return 0;
}
@@ -4340,8 +4342,6 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
}
break;
case SO_BINDTODEVICE:
- ret = -ENOPROTOOPT;
-#ifdef CONFIG_NETDEVICES
optlen = min_t(long, optlen, IFNAMSIZ - 1);
strncpy(devname, optval, optlen);
devname[optlen] = 0;
@@ -4360,7 +4360,6 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
dev_put(dev);
}
ret = sock_bindtoindex(sk, ifindex, false);
-#endif
break;
default:
ret = -EINVAL;
@@ -5050,7 +5049,7 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len
int err;
struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)hdr;
- if (!seg6_validate_srh(srh, len))
+ if (!seg6_validate_srh(srh, len, false))
return -EINVAL;
switch (type) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2269199c5891..9aedc15736ad 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2462,7 +2462,6 @@ static int do_set_master(struct net_device *dev, int ifindex,
err = ops->ndo_del_slave(upper_dev, dev);
if (err)
return err;
- netdev_update_lockdep_key(dev);
} else {
return -EOPNOTSUPP;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 6c4acf1f0220..94391da27754 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -718,7 +718,7 @@ bool sk_mc_loop(struct sock *sk)
return inet6_sk(sk)->mc_loop;
#endif
}
- WARN_ON(1);
+ WARN_ON_ONCE(1);
return true;
}
EXPORT_SYMBOL(sk_mc_loop);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 00a26cf2cfe9..4059f94e9bb5 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -424,10 +424,7 @@ static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next)
return 0;
}
-static bool sock_map_redirect_allowed(const struct sock *sk)
-{
- return sk->sk_state != TCP_LISTEN;
-}
+static bool sock_map_redirect_allowed(const struct sock *sk);
static int sock_map_update_common(struct bpf_map *map, u32 idx,
struct sock *sk, u64 flags)
@@ -508,6 +505,11 @@ static bool sk_is_udp(const struct sock *sk)
sk->sk_protocol == IPPROTO_UDP;
}
+static bool sock_map_redirect_allowed(const struct sock *sk)
+{
+ return sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN;
+}
+
static bool sock_map_sk_is_suitable(const struct sock *sk)
{
return sk_is_tcp(sk) || sk_is_udp(sk);
@@ -989,11 +991,15 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
err = -EINVAL;
goto free_htab;
}
+ err = bpf_map_charge_init(&htab->map.memory, cost);
+ if (err)
+ goto free_htab;
htab->buckets = bpf_map_area_alloc(htab->buckets_num *
sizeof(struct bpf_htab_bucket),
htab->map.numa_node);
if (!htab->buckets) {
+ bpf_map_charge_finish(&htab->map.memory);
err = -ENOMEM;
goto free_htab;
}
@@ -1013,6 +1019,7 @@ static void sock_hash_free(struct bpf_map *map)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
struct bpf_htab_bucket *bucket;
+ struct hlist_head unlink_list;
struct bpf_htab_elem *elem;
struct hlist_node *node;
int i;
@@ -1024,13 +1031,32 @@ static void sock_hash_free(struct bpf_map *map)
synchronize_rcu();
for (i = 0; i < htab->buckets_num; i++) {
bucket = sock_hash_select_bucket(htab, i);
- hlist_for_each_entry_safe(elem, node, &bucket->head, node) {
- hlist_del_rcu(&elem->node);
+
+ /* We are racing with sock_hash_delete_from_link to
+ * enter the spin-lock critical section. Every socket on
+ * the list is still linked to sockhash. Since link
+ * exists, psock exists and holds a ref to socket. That
+ * lets us to grab a socket ref too.
+ */
+ raw_spin_lock_bh(&bucket->lock);
+ hlist_for_each_entry(elem, &bucket->head, node)
+ sock_hold(elem->sk);
+ hlist_move_list(&bucket->head, &unlink_list);
+ raw_spin_unlock_bh(&bucket->lock);
+
+ /* Process removed entries out of atomic context to
+ * block for socket lock before deleting the psock's
+ * link to sockhash.
+ */
+ hlist_for_each_entry_safe(elem, node, &unlink_list, node) {
+ hlist_del(&elem->node);
lock_sock(elem->sk);
rcu_read_lock();
sock_map_unref(elem->sk, elem);
rcu_read_unlock();
release_sock(elem->sk);
+ sock_put(elem->sk);
+ sock_hash_free_elem(htab, elem);
}
}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b109cc8a6dd8..f93f8ace6c56 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -128,7 +128,7 @@ static int flow_limit_cpu_sysctl(struct ctl_table *table, int write,
return -ENOMEM;
if (write) {
- ret = cpumask_parse_user(buffer, *lenp, mask);
+ ret = cpumask_parse(buffer, mask);
if (ret)
goto done;
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 90f44f382115..3c45f99e26d5 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -462,6 +462,7 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
xdpf->len = totsize - metasize;
xdpf->headroom = 0;
xdpf->metasize = metasize;
+ xdpf->frame_sz = PAGE_SIZE;
xdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
xsk_buff_free(xdp);
diff --git a/net/dcb/Kconfig b/net/dcb/Kconfig
index 917e6e7b1cac..efee8b9fe1d4 100644
--- a/net/dcb/Kconfig
+++ b/net/dcb/Kconfig
@@ -2,7 +2,7 @@
config DCB
bool "Data Center Bridging support"
default n
- ---help---
+ help
This enables support for configuring Data Center Bridging (DCB)
features on DCB capable Ethernet adapters via rtnetlink. Say 'Y'
if you have a DCB capable Ethernet adapter which supports this
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index f7c7495677b0..51ac2631fb48 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -2,7 +2,7 @@
menuconfig IP_DCCP
tristate "The DCCP Protocol"
depends on INET
- ---help---
+ help
Datagram Congestion Control Protocol (RFC 4340)
From http://www.ietf.org/rfc/rfc4340.txt:
@@ -32,7 +32,7 @@ menu "DCCP Kernel Hacking"
config IP_DCCP_DEBUG
bool "DCCP debug messages"
- ---help---
+ help
Only use this if you're hacking DCCP.
When compiling DCCP as a module, this debugging output can be toggled
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 4a358e6847a8..4d7771f36eff 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -3,7 +3,7 @@ menu "DCCP CCIDs Configuration"
config IP_DCCP_CCID2_DEBUG
bool "CCID-2 debugging messages"
- ---help---
+ help
Enable CCID-2 specific debugging messages.
The debugging output can additionally be toggled by setting the
@@ -14,7 +14,7 @@ config IP_DCCP_CCID2_DEBUG
config IP_DCCP_CCID3
bool "CCID-3 (TCP-Friendly)"
def_bool y if (IP_DCCP = y || IP_DCCP = m)
- ---help---
+ help
CCID-3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
rate-controlled congestion control mechanism. TFRC is designed to
be reasonably fair when competing for bandwidth with TCP-like flows,
@@ -39,7 +39,7 @@ config IP_DCCP_CCID3
config IP_DCCP_CCID3_DEBUG
bool "CCID-3 debugging messages"
depends on IP_DCCP_CCID3
- ---help---
+ help
Enable CCID-3 specific debugging messages.
The debugging output can additionally be toggled by setting the
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 4af8a98fe784..c13b6609474b 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1139,14 +1139,14 @@ static int __init dccp_init(void)
inet_hashinfo_init(&dccp_hashinfo);
rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
if (rc)
- goto out_fail;
+ goto out_free_percpu;
rc = -ENOBUFS;
dccp_hashinfo.bind_bucket_cachep =
kmem_cache_create("dccp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
SLAB_HWCACHE_ALIGN, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
- goto out_free_percpu;
+ goto out_free_hashinfo2;
/*
* Size and allocate the main established and bind bucket
@@ -1242,6 +1242,8 @@ out_free_dccp_ehash:
free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
+out_free_hashinfo2:
+ inet_hashinfo2_free_mod(&dccp_hashinfo);
out_free_percpu:
percpu_counter_destroy(&dccp_orphan_count);
out_fail:
@@ -1265,6 +1267,7 @@ static void __exit dccp_fini(void)
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
dccp_ackvec_exit();
dccp_sysctl_exit();
+ inet_hashinfo2_free_mod(&dccp_hashinfo);
percpu_counter_destroy(&dccp_orphan_count);
}
diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig
index 8f98fb2f2ec9..24336bdb1054 100644
--- a/net/decnet/Kconfig
+++ b/net/decnet/Kconfig
@@ -4,7 +4,7 @@
#
config DECNET
tristate "DECnet Support"
- ---help---
+ help
The DECnet networking protocol was used in many products made by
Digital (now Compaq). It provides reliable stream and sequenced
packet communications over which run a variety of services similar
@@ -29,7 +29,7 @@ config DECNET_ROUTER
bool "DECnet: router support"
depends on DECNET
select FIB_RULES
- ---help---
+ help
Add support for turning your DECnet Endnode into a level 1 or 2
router. This is an experimental, but functional option. If you
do say Y here, then make sure that you also say Y to "Kernel/User
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 739613070d07..d5bc6ac599ef 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -13,7 +13,7 @@ menuconfig NET_DSA
select NET_SWITCHDEV
select PHYLINK
select NET_DEVLINK
- ---help---
+ help
Say Y if you want to enable support for the hardware switches supported
by the Distributed Switch Architecture.
diff --git a/net/dsa/master.c b/net/dsa/master.c
index a621367c6e8c..480a61460c23 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -327,6 +327,8 @@ static void dsa_master_reset_mtu(struct net_device *dev)
rtnl_unlock();
}
+static struct lock_class_key dsa_master_addr_list_lock_key;
+
int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
{
int ret;
@@ -345,6 +347,8 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
wmb();
dev->dsa_ptr = cpu_dp;
+ lockdep_set_class(&dev->addr_list_lock,
+ &dsa_master_addr_list_lock_key);
ret = dsa_master_ethtool_setup(dev);
if (ret)
return ret;
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 423e640e3876..47f63526818e 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -43,6 +43,7 @@ const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
[NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
[NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation",
[NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation",
+ [NETIF_F_GSO_FRAGLIST_BIT] = "tx-gso-list",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c
index 677068deb68c..5eaf173eaaca 100644
--- a/net/ethtool/linkinfo.c
+++ b/net/ethtool/linkinfo.c
@@ -140,8 +140,7 @@ int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info)
ret = __ethtool_get_link_ksettings(dev, &ksettings);
if (ret < 0) {
- if (info)
- GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
+ GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
goto out_ops;
}
lsettings = &ksettings.base;
diff --git a/net/hsr/Kconfig b/net/hsr/Kconfig
index 9c58f8763997..8095b034e76e 100644
--- a/net/hsr/Kconfig
+++ b/net/hsr/Kconfig
@@ -5,7 +5,7 @@
config HSR
tristate "High-availability Seamless Redundancy (HSR)"
- ---help---
+ help
If you say Y here, then your Linux box will be able to act as a
DANH ("Doubly attached node implementing HSR"). For this to work,
your Linux box needs (at least) two physical Ethernet interfaces,
diff --git a/net/ieee802154/6lowpan/Kconfig b/net/ieee802154/6lowpan/Kconfig
index d1b4655a6d43..e808e4db2678 100644
--- a/net/ieee802154/6lowpan/Kconfig
+++ b/net/ieee802154/6lowpan/Kconfig
@@ -2,5 +2,5 @@
config IEEE802154_6LOWPAN
tristate "6lowpan support over IEEE 802.15.4"
depends on 6LOWPAN
- ---help---
+ help
IPv6 compression over IEEE 802.15.4.
diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig
index 5dbbc2ca95b4..bcb05ba97686 100644
--- a/net/ieee802154/Kconfig
+++ b/net/ieee802154/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
menuconfig IEEE802154
tristate "IEEE Std 802.15.4 Low-Rate Wireless Personal Area Networks support"
- ---help---
+ help
IEEE Std 802.15.4 defines a low data rate, low power and low
complexity short range wireless personal area networks. It was
designed to organise networks of sensors, switches, etc automation
@@ -15,13 +15,13 @@ if IEEE802154
config IEEE802154_NL802154_EXPERIMENTAL
bool "IEEE 802.15.4 experimental netlink support"
- ---help---
+ help
Adds experimental netlink support for nl802154.
config IEEE802154_SOCKET
tristate "IEEE 802.15.4 socket interface"
default y
- ---help---
+ help
Socket interface for IEEE 802.15.4. Contains DGRAM sockets interface
for 802.15.4 dataframes. Also RAW socket interface to build MAC
header from userspace.
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index dc9dfaef77e5..e64e59b536d3 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -14,7 +14,7 @@ config IP_MULTICAST
config IP_ADVANCED_ROUTER
bool "IP: advanced router"
- ---help---
+ help
If you intend to run your Linux box mostly as a router, i.e. as a
computer that forwards and redistributes network packets, say Y; you
will then be presented with several options that allow more precise
@@ -56,7 +56,7 @@ config IP_ADVANCED_ROUTER
config IP_FIB_TRIE_STATS
bool "FIB TRIE statistics"
depends on IP_ADVANCED_ROUTER
- ---help---
+ help
Keep track of statistics on structure of FIB TRIE table.
Useful for testing and measuring TRIE performance.
@@ -64,7 +64,7 @@ config IP_MULTIPLE_TABLES
bool "IP: policy routing"
depends on IP_ADVANCED_ROUTER
select FIB_RULES
- ---help---
+ help
Normally, a router decides what to do with a received packet based
solely on the packet's final destination address. If you say Y here,
the Linux router will also be able to take the packet's source
@@ -117,7 +117,7 @@ config IP_PNP
config IP_PNP_DHCP
bool "IP: DHCP support"
depends on IP_PNP
- ---help---
+ help
If you want your Linux box to mount its whole root file system (the
one containing the directory /) from some other computer over the
net via NFS and you want the IP address of your computer to be
@@ -134,7 +134,7 @@ config IP_PNP_DHCP
config IP_PNP_BOOTP
bool "IP: BOOTP support"
depends on IP_PNP
- ---help---
+ help
If you want your Linux box to mount its whole root file system (the
one containing the directory /) from some other computer over the
net via NFS and you want the IP address of your computer to be
@@ -163,7 +163,7 @@ config NET_IPIP
tristate "IP: tunneling"
select INET_TUNNEL
select NET_IP_TUNNEL
- ---help---
+ help
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
encapsulating protocol. This particular tunneling driver implements
@@ -267,7 +267,7 @@ config IP_PIMSM_V2
config SYN_COOKIES
bool "IP: TCP syncookie support"
- ---help---
+ help
Normal TCP/IP networking is open to an attack known as "SYN
flooding". This denial-of-service attack prevents legitimate remote
users from being able to connect to your computer during an ongoing
@@ -307,7 +307,7 @@ config NET_IPVTI
select INET_TUNNEL
select NET_IP_TUNNEL
select XFRM
- ---help---
+ help
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
encapsulating protocol. This can be used with xfrm mode tunnel to give
@@ -323,7 +323,7 @@ config NET_FOU
tristate "IP: Foo (IP protocols) over UDP"
select XFRM
select NET_UDP_TUNNEL
- ---help---
+ help
Foo over UDP allows any IP protocol to be directly encapsulated
over UDP include tunnels (IPIP, GRE, SIT). By encapsulating in UDP
network mechanisms and optimizations for UDP (such as ECMP
@@ -333,7 +333,7 @@ config NET_FOU_IP_TUNNELS
bool "IP: FOU encapsulation of IP tunnels"
depends on NET_IPIP || NET_IPGRE || IPV6_SIT
select NET_FOU
- ---help---
+ help
Allow configuration of FOU or GUE encapsulation for IP tunnels.
When this option is enabled IP tunnels can be configured to use
FOU or GUE encapsulation.
@@ -341,7 +341,7 @@ config NET_FOU_IP_TUNNELS
config INET_AH
tristate "IP: AH transformation"
select XFRM_AH
- ---help---
+ help
Support for IPsec AH (Authentication Header).
AH can be used with various authentication algorithms. Besides
@@ -356,7 +356,7 @@ config INET_AH
config INET_ESP
tristate "IP: ESP transformation"
select XFRM_ESP
- ---help---
+ help
Support for IPsec ESP (Encapsulating Security Payload).
ESP can be used with various encryption and authentication algorithms.
@@ -373,7 +373,7 @@ config INET_ESP_OFFLOAD
depends on INET_ESP
select XFRM_OFFLOAD
default n
- ---help---
+ help
Support for ESP transformation offload. This makes sense
only if this system really does IPsec and want to do it
with high throughput. A typical desktop system does not
@@ -397,7 +397,7 @@ config INET_IPCOMP
tristate "IP: IPComp transformation"
select INET_XFRM_TUNNEL
select XFRM_IPCOMP
- ---help---
+ help
Support for IP Payload Compression Protocol (IPComp) (RFC3173),
typically needed for IPsec.
@@ -415,7 +415,7 @@ config INET_TUNNEL
config INET_DIAG
tristate "INET: socket monitoring interface"
default y
- ---help---
+ help
Support for INET (TCP, DCCP, etc) socket monitoring interface used by
native Linux tools such as ss. ss is included in iproute2, currently
downloadable at:
@@ -432,7 +432,7 @@ config INET_UDP_DIAG
tristate "UDP: socket monitoring interface"
depends on INET_DIAG && (IPV6 || IPV6=n)
default n
- ---help---
+ help
Support for UDP socket monitoring interface used by the ss tool.
If unsure, say Y.
@@ -440,7 +440,7 @@ config INET_RAW_DIAG
tristate "RAW: socket monitoring interface"
depends on INET_DIAG && (IPV6 || IPV6=n)
default n
- ---help---
+ help
Support for RAW socket monitoring interface used by the ss tool.
If unsure, say Y.
@@ -448,7 +448,7 @@ config INET_DIAG_DESTROY
bool "INET: allow privileged process to administratively close sockets"
depends on INET_DIAG
default n
- ---help---
+ help
Provides a SOCK_DESTROY operation that allows privileged processes
(e.g., a connection manager or a network administration tool such as
ss) to close sockets opened by other processes. Closing a socket in
@@ -459,7 +459,7 @@ config INET_DIAG_DESTROY
menuconfig TCP_CONG_ADVANCED
bool "TCP: advanced congestion control"
- ---help---
+ help
Support for selection of various TCP congestion control
modules.
@@ -473,7 +473,7 @@ if TCP_CONG_ADVANCED
config TCP_CONG_BIC
tristate "Binary Increase Congestion (BIC) control"
default m
- ---help---
+ help
BIC-TCP is a sender-side only change that ensures a linear RTT
fairness under large windows while offering both scalability and
bounded TCP-friendliness. The protocol combines two schemes
@@ -487,7 +487,7 @@ config TCP_CONG_BIC
config TCP_CONG_CUBIC
tristate "CUBIC TCP"
default y
- ---help---
+ help
This is version 2.0 of BIC-TCP which uses a cubic growth function
among other techniques.
See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf
@@ -495,7 +495,7 @@ config TCP_CONG_CUBIC
config TCP_CONG_WESTWOOD
tristate "TCP Westwood+"
default m
- ---help---
+ help
TCP Westwood+ is a sender-side only modification of the TCP Reno
protocol stack that optimizes the performance of TCP congestion
control. It is based on end-to-end bandwidth estimation to set
@@ -509,7 +509,7 @@ config TCP_CONG_WESTWOOD
config TCP_CONG_HTCP
tristate "H-TCP"
default m
- ---help---
+ help
H-TCP is a send-side only modifications of the TCP Reno
protocol stack that optimizes the performance of TCP
congestion control for high speed network links. It uses a
@@ -520,7 +520,7 @@ config TCP_CONG_HTCP
config TCP_CONG_HSTCP
tristate "High Speed TCP"
default n
- ---help---
+ help
Sally Floyd's High Speed TCP (RFC 3649) congestion control.
A modification to TCP's congestion control mechanism for use
with large congestion windows. A table indicates how much to
@@ -530,7 +530,7 @@ config TCP_CONG_HSTCP
config TCP_CONG_HYBLA
tristate "TCP-Hybla congestion control algorithm"
default n
- ---help---
+ help
TCP-Hybla is a sender-side only change that eliminates penalization of
long-RTT, large-bandwidth connections, like when satellite legs are
involved, especially when sharing a common bottleneck with normal
@@ -539,7 +539,7 @@ config TCP_CONG_HYBLA
config TCP_CONG_VEGAS
tristate "TCP Vegas"
default n
- ---help---
+ help
TCP Vegas is a sender-side only change to TCP that anticipates
the onset of congestion by estimating the bandwidth. TCP Vegas
adjusts the sending rate by modifying the congestion
@@ -549,7 +549,7 @@ config TCP_CONG_VEGAS
config TCP_CONG_NV
tristate "TCP NV"
default n
- ---help---
+ help
TCP NV is a follow up to TCP Vegas. It has been modified to deal with
10G networks, measurement noise introduced by LRO, GRO and interrupt
coalescence. In addition, it will decrease its cwnd multiplicatively
@@ -565,7 +565,7 @@ config TCP_CONG_NV
config TCP_CONG_SCALABLE
tristate "Scalable TCP"
default n
- ---help---
+ help
Scalable TCP is a sender-side only change to TCP which uses a
MIMD congestion control algorithm which has some nice scaling
properties, though is known to have fairness issues.
@@ -574,7 +574,7 @@ config TCP_CONG_SCALABLE
config TCP_CONG_LP
tristate "TCP Low Priority"
default n
- ---help---
+ help
TCP Low Priority (TCP-LP), a distributed algorithm whose goal is
to utilize only the excess network bandwidth as compared to the
``fair share`` of bandwidth as targeted by TCP.
@@ -583,7 +583,7 @@ config TCP_CONG_LP
config TCP_CONG_VENO
tristate "TCP Veno"
default n
- ---help---
+ help
TCP Veno is a sender-side only enhancement of TCP to obtain better
throughput over wireless networks. TCP Veno makes use of state
distinguishing to circumvent the difficult judgment of the packet loss
@@ -595,7 +595,7 @@ config TCP_CONG_YEAH
tristate "YeAH TCP"
select TCP_CONG_VEGAS
default n
- ---help---
+ help
YeAH-TCP is a sender-side high-speed enabled TCP congestion control
algorithm, which uses a mixed loss/delay approach to compute the
congestion window. It's design goals target high efficiency,
@@ -608,7 +608,7 @@ config TCP_CONG_YEAH
config TCP_CONG_ILLINOIS
tristate "TCP Illinois"
default n
- ---help---
+ help
TCP-Illinois is a sender-side modification of TCP Reno for
high speed long delay links. It uses round-trip-time to
adjust the alpha and beta parameters to achieve a higher average
@@ -620,7 +620,7 @@ config TCP_CONG_ILLINOIS
config TCP_CONG_DCTCP
tristate "DataCenter TCP (DCTCP)"
default n
- ---help---
+ help
DCTCP leverages Explicit Congestion Notification (ECN) in the network to
provide multi-bit feedback to the end hosts. It is designed to provide:
@@ -641,7 +641,7 @@ config TCP_CONG_DCTCP
config TCP_CONG_CDG
tristate "CAIA Delay-Gradient (CDG)"
default n
- ---help---
+ help
CAIA Delay-Gradient (CDG) is a TCP congestion control that modifies
the TCP sender in order to:
@@ -657,7 +657,7 @@ config TCP_CONG_CDG
config TCP_CONG_BBR
tristate "BBR TCP"
default n
- ---help---
+ help
BBR (Bottleneck Bandwidth and RTT) TCP congestion control aims to
maximize network utilization and minimize queues. It builds an explicit
@@ -736,7 +736,7 @@ config TCP_MD5SIG
bool "TCP: MD5 Signature Option support (RFC2385)"
select CRYPTO
select CRYPTO_MD5
- ---help---
+ help
RFC2385 specifies a method of giving MD5 protection to TCP sessions.
Its main (only?) use is to protect BGP sessions between core routers
on the Internet.
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index e53871e4a097..1f75dc686b6b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1109,7 +1109,7 @@ static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table,
if (fl4.flowi4_scope < RT_SCOPE_LINK)
fl4.flowi4_scope = RT_SCOPE_LINK;
- if (table)
+ if (table && table != RT_TABLE_MAIN)
tbl = fib_get_table(net, table);
if (tbl)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f40b1b72f979..afaf582a5aa9 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -902,6 +902,7 @@ void inet_csk_prepare_forced_close(struct sock *sk)
bh_unlock_sock(sk);
sock_put(sk);
inet_csk_prepare_for_destroy_sock(sk);
+ inet_sk(sk)->inet_num = 0;
}
EXPORT_SYMBOL(inet_csk_prepare_forced_close);
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index f4f1d11eab50..0c1f36404471 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -85,9 +85,10 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
__be32 remote, __be32 local,
__be32 key)
{
- unsigned int hash;
struct ip_tunnel *t, *cand = NULL;
struct hlist_head *head;
+ struct net_device *ndev;
+ unsigned int hash;
hash = ip_tunnel_hash(key, remote);
head = &itn->tunnels[hash];
@@ -162,8 +163,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
if (t && t->dev->flags & IFF_UP)
return t;
- if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
- return netdev_priv(itn->fb_tunnel_dev);
+ ndev = READ_ONCE(itn->fb_tunnel_dev);
+ if (ndev && ndev->flags & IFF_UP)
+ return netdev_priv(ndev);
return NULL;
}
@@ -1259,9 +1261,9 @@ void ip_tunnel_uninit(struct net_device *dev)
struct ip_tunnel_net *itn;
itn = net_generic(net, tunnel->ip_tnl_net_id);
- /* fb_tunnel_dev will be unregisted in net-exit call. */
- if (itn->fb_tunnel_dev != dev)
- ip_tunnel_del(itn, netdev_priv(dev));
+ ip_tunnel_del(itn, netdev_priv(dev));
+ if (itn->fb_tunnel_dev == dev)
+ WRITE_ONCE(itn->fb_tunnel_dev, NULL);
dst_cache_reset(&tunnel->dst_cache);
}
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index f17b402111ce..a2f4f894be2b 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -94,7 +94,7 @@ config NF_NAT_SNMP_BASIC
depends on NETFILTER_ADVANCED
default NF_NAT && NF_CONNTRACK_SNMP
select ASN1
- ---help---
+ help
This module implements an Application Layer Gateway (ALG) for
SNMP payloads. In conjunction with NAT, it allows a network
@@ -146,7 +146,7 @@ config IP_NF_MATCH_ECN
tristate '"ecn" match support'
depends on NETFILTER_ADVANCED
select NETFILTER_XT_MATCH_ECN
- ---help---
+ help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_MATCH_ECN.
@@ -155,7 +155,7 @@ config IP_NF_MATCH_RPFILTER
tristate '"rpfilter" reverse path filter match support'
depends on NETFILTER_ADVANCED
depends on IP_NF_MANGLE || IP_NF_RAW
- ---help---
+ help
This option allows you to match packets whose replies would
go out via the interface the packet came in.
@@ -166,7 +166,7 @@ config IP_NF_MATCH_TTL
tristate '"ttl" match support'
depends on NETFILTER_ADVANCED
select NETFILTER_XT_MATCH_HL
- ---help---
+ help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_MATCH_HL.
@@ -234,7 +234,7 @@ config IP_NF_TARGET_NETMAP
tristate "NETMAP target support"
depends on NETFILTER_ADVANCED
select NETFILTER_XT_TARGET_NETMAP
- ---help---
+ help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_TARGET_NETMAP.
@@ -243,7 +243,7 @@ config IP_NF_TARGET_REDIRECT
tristate "REDIRECT target support"
depends on NETFILTER_ADVANCED
select NETFILTER_XT_TARGET_REDIRECT
- ---help---
+ help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_TARGET_REDIRECT.
@@ -279,7 +279,7 @@ config IP_NF_TARGET_ECN
tristate "ECN target support"
depends on IP_NF_MANGLE
depends on NETFILTER_ADVANCED
- ---help---
+ help
This option adds a `ECN' target, which can be used in the iptables mangle
table.
@@ -294,7 +294,7 @@ config IP_NF_TARGET_TTL
tristate '"TTL" target support'
depends on NETFILTER_ADVANCED && IP_NF_MANGLE
select NETFILTER_XT_TARGET_HL
- ---help---
+ help
This is a backwards-compatible option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_TARGET_HL.
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 400a9f89ebdb..cc8049b100b2 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -247,12 +247,11 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
if (nla_put_u32(skb, NHA_ID, nh->id))
goto nla_put_failure;
- if (nh->is_fdb_nh && nla_put_flag(skb, NHA_FDB))
- goto nla_put_failure;
-
if (nh->is_group) {
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+ if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB))
+ goto nla_put_failure;
if (nla_put_nh_group(skb, nhg))
goto nla_put_failure;
goto out;
@@ -264,7 +263,10 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
if (nla_put_flag(skb, NHA_BLACKHOLE))
goto nla_put_failure;
goto out;
- } else if (!nh->is_fdb_nh) {
+ } else if (nhi->fdb_nh) {
+ if (nla_put_flag(skb, NHA_FDB))
+ goto nla_put_failure;
+ } else {
const struct net_device *dev;
dev = nhi->fib_nhc.nhc_dev;
@@ -385,7 +387,7 @@ errout:
}
static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
- struct netlink_ext_ack *extack)
+ bool *is_fdb, struct netlink_ext_ack *extack)
{
if (nh->is_group) {
struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
@@ -398,6 +400,7 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
"Multipath group can not be a nexthop within a group");
return false;
}
+ *is_fdb = nhg->fdb_nh;
} else {
struct nh_info *nhi = rtnl_dereference(nh->nh_info);
@@ -406,6 +409,7 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
"Blackhole nexthop can not be used in a group with more than 1 path");
return false;
}
+ *is_fdb = nhi->fdb_nh;
}
return true;
@@ -416,12 +420,13 @@ static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family,
{
struct nh_info *nhi;
- if (!nh->is_fdb_nh) {
+ nhi = rtnl_dereference(nh->nh_info);
+
+ if (!nhi->fdb_nh) {
NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops");
return -EINVAL;
}
- nhi = rtnl_dereference(nh->nh_info);
if (*nh_family == AF_UNSPEC) {
*nh_family = nhi->family;
} else if (*nh_family != nhi->family) {
@@ -473,19 +478,20 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
nhg = nla_data(tb[NHA_GROUP]);
for (i = 0; i < len; ++i) {
struct nexthop *nh;
+ bool is_fdb_nh;
nh = nexthop_find_by_id(net, nhg[i].id);
if (!nh) {
NL_SET_ERR_MSG(extack, "Invalid nexthop id");
return -EINVAL;
}
- if (!valid_group_nh(nh, len, extack))
+ if (!valid_group_nh(nh, len, &is_fdb_nh, extack))
return -EINVAL;
if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack))
return -EINVAL;
- if (!nhg_fdb && nh->is_fdb_nh) {
+ if (!nhg_fdb && is_fdb_nh) {
NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops");
return -EINVAL;
}
@@ -553,13 +559,13 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
if (hash > atomic_read(&nhge->upper_bound))
continue;
- if (nhge->nh->is_fdb_nh)
+ nhi = rcu_dereference(nhge->nh->nh_info);
+ if (nhi->fdb_nh)
return nhge->nh;
/* nexthops always check if it is good and does
* not rely on a sysctl for this behavior
*/
- nhi = rcu_dereference(nhge->nh->nh_info);
switch (nhi->family) {
case AF_INET:
if (ipv4_good_nh(&nhi->fib_nh))
@@ -624,11 +630,7 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
struct nh_info *nhi;
-
- if (nh->is_fdb_nh) {
- NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
- return -EINVAL;
- }
+ bool is_fdb_nh;
/* fib6_src is unique to a fib6_info and limits the ability to cache
* routes in fib6_nh within a nexthop that is potentially shared
@@ -645,10 +647,17 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
nhg = rtnl_dereference(nh->nh_grp);
if (nhg->has_v4)
goto no_v4_nh;
+ is_fdb_nh = nhg->fdb_nh;
} else {
nhi = rtnl_dereference(nh->nh_info);
if (nhi->family == AF_INET)
goto no_v4_nh;
+ is_fdb_nh = nhi->fdb_nh;
+ }
+
+ if (is_fdb_nh) {
+ NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
+ return -EINVAL;
}
return 0;
@@ -677,12 +686,9 @@ static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new,
return fib6_check_nexthop(new, NULL, extack);
}
-static int nexthop_check_scope(struct nexthop *nh, u8 scope,
+static int nexthop_check_scope(struct nh_info *nhi, u8 scope,
struct netlink_ext_ack *extack)
{
- struct nh_info *nhi;
-
- nhi = rtnl_dereference(nh->nh_info);
if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) {
NL_SET_ERR_MSG(extack,
"Route with host scope can not have a gateway");
@@ -704,29 +710,38 @@ static int nexthop_check_scope(struct nexthop *nh, u8 scope,
int fib_check_nexthop(struct nexthop *nh, u8 scope,
struct netlink_ext_ack *extack)
{
+ struct nh_info *nhi;
int err = 0;
- if (nh->is_fdb_nh) {
- NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
- err = -EINVAL;
- goto out;
- }
-
if (nh->is_group) {
struct nh_group *nhg;
+ nhg = rtnl_dereference(nh->nh_grp);
+ if (nhg->fdb_nh) {
+ NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
+ err = -EINVAL;
+ goto out;
+ }
+
if (scope == RT_SCOPE_HOST) {
NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops");
err = -EINVAL;
goto out;
}
- nhg = rtnl_dereference(nh->nh_grp);
/* all nexthops in a group have the same scope */
- err = nexthop_check_scope(nhg->nh_entries[0].nh, scope, extack);
+ nhi = rtnl_dereference(nhg->nh_entries[0].nh->nh_info);
+ err = nexthop_check_scope(nhi, scope, extack);
} else {
- err = nexthop_check_scope(nh, scope, extack);
+ nhi = rtnl_dereference(nh->nh_info);
+ if (nhi->fdb_nh) {
+ NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
+ err = -EINVAL;
+ goto out;
+ }
+ err = nexthop_check_scope(nhi, scope, extack);
}
+
out:
return err;
}
@@ -787,6 +802,7 @@ static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
newg->has_v4 = nhg->has_v4;
newg->mpath = nhg->mpath;
+ newg->fdb_nh = nhg->fdb_nh;
newg->num_nh = nhg->num_nh;
/* copy old entries to new except the one getting removed */
@@ -1216,7 +1232,7 @@ static struct nexthop *nexthop_create_group(struct net *net,
}
if (cfg->nh_fdb)
- nh->is_fdb_nh = 1;
+ nhg->fdb_nh = 1;
rcu_assign_pointer(nh->nh_grp, nhg);
@@ -1255,7 +1271,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh,
goto out;
}
- if (nh->is_fdb_nh)
+ if (nhi->fdb_nh)
goto out;
/* sets nh_dev if successful */
@@ -1326,7 +1342,7 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK;
if (cfg->nh_fdb)
- nh->is_fdb_nh = 1;
+ nhi->fdb_nh = 1;
if (cfg->nh_blackhole) {
nhi->reject_nh = 1;
@@ -1349,7 +1365,7 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
}
/* add the entry to the device based hash */
- if (!nh->is_fdb_nh)
+ if (!nhi->fdb_nh)
nexthop_devhash_add(net, nhi);
rcu_assign_pointer(nh->nh_info, nhi);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 15d47d5e7951..810cc164f795 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1734,7 +1734,7 @@ int tcp_mmap(struct file *file, struct socket *sock,
return -EPERM;
vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
- /* Instruct vm_insert_page() to not down_read(mmap_sem) */
+ /* Instruct vm_insert_page() to not mmap_read_lock(mm) */
vma->vm_flags |= VM_MIXEDMAP;
vma->vm_ops = &tcp_vm_ops;
@@ -1742,14 +1742,48 @@ int tcp_mmap(struct file *file, struct socket *sock,
}
EXPORT_SYMBOL(tcp_mmap);
+static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma,
+ struct page **pages,
+ unsigned long pages_to_map,
+ unsigned long *insert_addr,
+ u32 *length_with_pending,
+ u32 *seq,
+ struct tcp_zerocopy_receive *zc)
+{
+ unsigned long pages_remaining = pages_to_map;
+ int bytes_mapped;
+ int ret;
+
+ ret = vm_insert_pages(vma, *insert_addr, pages, &pages_remaining);
+ bytes_mapped = PAGE_SIZE * (pages_to_map - pages_remaining);
+ /* Even if vm_insert_pages fails, it may have partially succeeded in
+ * mapping (some but not all of the pages).
+ */
+ *seq += bytes_mapped;
+ *insert_addr += bytes_mapped;
+ if (ret) {
+ /* But if vm_insert_pages did fail, we have to unroll some state
+ * we speculatively touched before.
+ */
+ const int bytes_not_mapped = PAGE_SIZE * pages_remaining;
+ *length_with_pending -= bytes_not_mapped;
+ zc->recv_skip_hint += bytes_not_mapped;
+ }
+ return ret;
+}
+
static int tcp_zerocopy_receive(struct sock *sk,
struct tcp_zerocopy_receive *zc)
{
unsigned long address = (unsigned long)zc->address;
u32 length = 0, seq, offset, zap_len;
+ #define PAGE_BATCH_SIZE 8
+ struct page *pages[PAGE_BATCH_SIZE];
const skb_frag_t *frags = NULL;
struct vm_area_struct *vma;
struct sk_buff *skb = NULL;
+ unsigned long pg_idx = 0;
+ unsigned long curr_addr;
struct tcp_sock *tp;
int inq;
int ret;
@@ -1762,16 +1796,17 @@ static int tcp_zerocopy_receive(struct sock *sk,
sock_rps_record_flow(sk);
- down_read(&current->mm->mmap_sem);
+ tp = tcp_sk(sk);
+
+ mmap_read_lock(current->mm);
vma = find_vma(current->mm, address);
if (!vma || vma->vm_start > address || vma->vm_ops != &tcp_vm_ops) {
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
return -EINVAL;
}
zc->length = min_t(unsigned long, zc->length, vma->vm_end - address);
- tp = tcp_sk(sk);
seq = tp->copied_seq;
inq = tcp_inq(sk);
zc->length = min_t(u32, zc->length, inq);
@@ -1783,8 +1818,20 @@ static int tcp_zerocopy_receive(struct sock *sk,
zc->recv_skip_hint = zc->length;
}
ret = 0;
+ curr_addr = address;
while (length + PAGE_SIZE <= zc->length) {
if (zc->recv_skip_hint < PAGE_SIZE) {
+ /* If we're here, finish the current batch. */
+ if (pg_idx) {
+ ret = tcp_zerocopy_vm_insert_batch(vma, pages,
+ pg_idx,
+ &curr_addr,
+ &length,
+ &seq, zc);
+ if (ret)
+ goto out;
+ pg_idx = 0;
+ }
if (skb) {
if (zc->recv_skip_hint > 0)
break;
@@ -1793,7 +1840,6 @@ static int tcp_zerocopy_receive(struct sock *sk,
} else {
skb = tcp_recv_skb(sk, seq, &offset);
}
-
zc->recv_skip_hint = skb->len - offset;
offset -= skb_headlen(skb);
if ((int)offset < 0 || skb_has_frag_list(skb))
@@ -1817,17 +1863,27 @@ static int tcp_zerocopy_receive(struct sock *sk,
zc->recv_skip_hint -= remaining;
break;
}
- ret = vm_insert_page(vma, address + length,
- skb_frag_page(frags));
- if (ret)
- break;
+ pages[pg_idx] = skb_frag_page(frags);
+ pg_idx++;
length += PAGE_SIZE;
- seq += PAGE_SIZE;
zc->recv_skip_hint -= PAGE_SIZE;
frags++;
+ if (pg_idx == PAGE_BATCH_SIZE) {
+ ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx,
+ &curr_addr, &length,
+ &seq, zc);
+ if (ret)
+ goto out;
+ pg_idx = 0;
+ }
+ }
+ if (pg_idx) {
+ ret = tcp_zerocopy_vm_insert_batch(vma, pages, pg_idx,
+ &curr_addr, &length, &seq,
+ zc);
}
out:
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
if (length) {
WRITE_ONCE(tp->copied_seq, seq);
tcp_rcv_space_adjust(sk);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 629aaa9a1eb9..7aa68f4aae6c 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -64,6 +64,9 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
} while (i != msg_rx->sg.end);
if (unlikely(peek)) {
+ if (msg_rx == list_last_entry(&psock->ingress_msg,
+ struct sk_msg, list))
+ break;
msg_rx = list_next_entry(msg_rx, list);
continue;
}
@@ -242,6 +245,9 @@ static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock,
DEFINE_WAIT_FUNC(wait, woken_wake_function);
int ret = 0;
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ return 1;
+
if (!timeo)
return ret;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 83330a6cb242..12fda8f27b08 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4605,7 +4605,11 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb,
skb, &fragstolen)) {
coalesce_done:
- tcp_grow_window(sk, skb);
+ /* For non sack flows, do not grow window to force DUPACK
+ * and trigger fast retransmit.
+ */
+ if (tcp_is_sack(tp))
+ tcp_grow_window(sk, skb);
kfree_skb_partial(skb, fragstolen);
skb = NULL;
goto add_sack;
@@ -4689,7 +4693,11 @@ add_sack:
tcp_sack_new_ofo_skb(sk, seq, end_seq);
end:
if (skb) {
- tcp_grow_window(sk, skb);
+ /* For non sack flows, do not grow window to force DUPACK
+ * and trigger fast retransmit.
+ */
+ if (tcp_is_sack(tp))
+ tcp_grow_window(sk, skb);
skb_condense(skb);
skb_set_owner_r(skb, sk);
}
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 414a68b16869..f4f19e89af5e 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -7,7 +7,7 @@
menuconfig IPV6
tristate "The IPv6 protocol"
default y
- ---help---
+ help
Support for IP version 6 (IPv6).
For general information about IPv6, see
@@ -23,7 +23,7 @@ if IPV6
config IPV6_ROUTER_PREF
bool "IPv6: Router Preference (RFC 4191) support"
- ---help---
+ help
Router Preference is an optional extension to the Router
Advertisement message which improves the ability of hosts
to pick an appropriate router, especially when the hosts
@@ -34,14 +34,14 @@ config IPV6_ROUTER_PREF
config IPV6_ROUTE_INFO
bool "IPv6: Route Information (RFC 4191) support"
depends on IPV6_ROUTER_PREF
- ---help---
+ help
Support of Route Information.
If unsure, say N.
config IPV6_OPTIMISTIC_DAD
bool "IPv6: Enable RFC 4429 Optimistic DAD"
- ---help---
+ help
Support for optimistic Duplicate Address Detection. It allows for
autoconfigured addresses to be used more quickly.
@@ -50,7 +50,7 @@ config IPV6_OPTIMISTIC_DAD
config INET6_AH
tristate "IPv6: AH transformation"
select XFRM_AH
- ---help---
+ help
Support for IPsec AH (Authentication Header).
AH can be used with various authentication algorithms. Besides
@@ -65,7 +65,7 @@ config INET6_AH
config INET6_ESP
tristate "IPv6: ESP transformation"
select XFRM_ESP
- ---help---
+ help
Support for IPsec ESP (Encapsulating Security Payload).
ESP can be used with various encryption and authentication algorithms.
@@ -82,7 +82,7 @@ config INET6_ESP_OFFLOAD
depends on INET6_ESP
select XFRM_OFFLOAD
default n
- ---help---
+ help
Support for ESP transformation offload. This makes sense
only if this system really does IPsec and want to do it
with high throughput. A typical desktop system does not
@@ -106,7 +106,7 @@ config INET6_IPCOMP
tristate "IPv6: IPComp transformation"
select INET6_XFRM_TUNNEL
select XFRM_IPCOMP
- ---help---
+ help
Support for IP Payload Compression Protocol (IPComp) (RFC3173),
typically needed for IPsec.
@@ -115,7 +115,7 @@ config INET6_IPCOMP
config IPV6_MIP6
tristate "IPv6: Mobility"
select XFRM
- ---help---
+ help
Support for IPv6 Mobility described in RFC 3775.
If unsure, say N.
@@ -125,7 +125,7 @@ config IPV6_ILA
depends on NETFILTER
select DST_CACHE
select LWTUNNEL
- ---help---
+ help
Support for IPv6 Identifier Locator Addressing (ILA).
ILA is a mechanism to do network virtualization without
@@ -155,7 +155,7 @@ tristate "Virtual (secure) IPv6: tunneling"
select IPV6_TUNNEL
select NET_IP_TUNNEL
select XFRM
- ---help---
+ help
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
encapsulating protocol. This can be used with xfrm mode tunnel to give
@@ -168,7 +168,7 @@ config IPV6_SIT
select NET_IP_TUNNEL
select IPV6_NDISC_NODETYPE
default y
- ---help---
+ help
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
encapsulating protocol. This driver implements encapsulation of IPv6
@@ -181,7 +181,7 @@ config IPV6_SIT_6RD
bool "IPv6: IPv6 Rapid Deployment (6RD)"
depends on IPV6_SIT
default n
- ---help---
+ help
IPv6 Rapid Deployment (6rd; draft-ietf-softwire-ipv6-6rd) builds upon
mechanisms of 6to4 (RFC3056) to enable a service provider to rapidly
deploy IPv6 unicast service to IPv4 sites to which it provides
@@ -204,7 +204,7 @@ config IPV6_TUNNEL
select INET6_TUNNEL
select DST_CACHE
select GRO_CELLS
- ---help---
+ help
Support for IPv6-in-IPv6 and IPv4-in-IPv6 tunnels described in
RFC 2473.
@@ -215,7 +215,7 @@ config IPV6_GRE
select IPV6_TUNNEL
select NET_IP_TUNNEL
depends on NET_IPGRE_DEMUX
- ---help---
+ help
Tunneling means encapsulating data of one protocol type within
another protocol and sending it over a channel that understands the
encapsulating protocol. This particular tunneling driver implements
@@ -240,13 +240,13 @@ config IPV6_FOU_TUNNEL
config IPV6_MULTIPLE_TABLES
bool "IPv6: Multiple Routing Tables"
select FIB_RULES
- ---help---
+ help
Support multiple routing tables.
config IPV6_SUBTREES
bool "IPv6: source address based routing"
depends on IPV6_MULTIPLE_TABLES
- ---help---
+ help
Enable routing by source address or prefix.
The destination address is still the primary routing key, so mixing
@@ -261,7 +261,7 @@ config IPV6_MROUTE
bool "IPv6: multicast routing"
depends on IPV6
select IP_MROUTE_COMMON
- ---help---
+ help
Support for IPv6 multicast forwarding.
If unsure, say N.
@@ -282,7 +282,7 @@ config IPV6_MROUTE_MULTIPLE_TABLES
config IPV6_PIMSM_V2
bool "IPv6: PIM-SM version 2 support"
depends on IPV6_MROUTE
- ---help---
+ help
Support for IPv6 PIM multicast routing protocol PIM-SMv2.
If unsure, say N.
@@ -292,7 +292,7 @@ config IPV6_SEG6_LWTUNNEL
select LWTUNNEL
select DST_CACHE
select IPV6_MULTIPLE_TABLES
- ---help---
+ help
Support for encapsulation of packets within an outer IPv6
header and a Segment Routing Header using the lightweight
tunnels mechanism. Also enable support for advanced local
@@ -306,7 +306,7 @@ config IPV6_SEG6_HMAC
select CRYPTO_HMAC
select CRYPTO_SHA1
select CRYPTO_SHA256
- ---help---
+ help
Support for HMAC signature generation and verification
of SR-enabled packets.
@@ -321,7 +321,7 @@ config IPV6_RPL_LWTUNNEL
bool "IPv6: RPL Source Routing Header support"
depends on IPV6
select LWTUNNEL
- ---help---
+ help
Support for RFC6554 RPL Source Routing Header using the lightweight
tunnels mechanism.
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index d64b83e85642..ce4fbba4acce 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -779,7 +779,7 @@ static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
{
struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
- state->pid_ns = proc_pid_ns(file_inode(seq->file));
+ state->pid_ns = proc_pid_ns(file_inode(seq->file)->i_sb);
rcu_read_lock_bh();
return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 781ca8c07a0d..6532bde82b40 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -127,6 +127,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
gre_proto == htons(ETH_P_ERSPAN2)) ?
ARPHRD_ETHER : ARPHRD_IP6GRE;
int score, cand_score = 4;
+ struct net_device *ndev;
for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
if (!ipv6_addr_equal(local, &t->parms.laddr) ||
@@ -238,9 +239,9 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
if (t && t->dev->flags & IFF_UP)
return t;
- dev = ign->fb_tunnel_dev;
- if (dev && dev->flags & IFF_UP)
- return netdev_priv(dev);
+ ndev = READ_ONCE(ign->fb_tunnel_dev);
+ if (ndev && ndev->flags & IFF_UP)
+ return netdev_priv(ndev);
return NULL;
}
@@ -413,6 +414,8 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
ip6gre_tunnel_unlink_md(ign, t);
ip6gre_tunnel_unlink(ign, t);
+ if (ign->fb_tunnel_dev == dev)
+ WRITE_ONCE(ign->fb_tunnel_dev, NULL);
dst_cache_reset(&t->dst_cache);
dev_put(dev);
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 2c843ff5e3a9..20576e87a5f7 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -493,7 +493,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)
opt->srcrt;
- if (!seg6_validate_srh(srh, optlen))
+ if (!seg6_validate_srh(srh, optlen, false))
goto sticky_done;
break;
}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 7e12d2114158..8cd2782a31e4 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2615,6 +2615,7 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev)
idev->mc_list = i->next;
write_unlock_bh(&idev->lock);
+ ip6_mc_clear_src(i);
ma_put(i);
write_lock_bh(&idev->lock);
}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 0594131fa46d..262bb51a2d99 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -127,7 +127,7 @@ config IP6_NF_MATCH_HL
tristate '"hl" hoplimit match support'
depends on NETFILTER_ADVANCED
select NETFILTER_XT_MATCH_HL
- ---help---
+ help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_MATCH_HL.
@@ -153,7 +153,7 @@ config IP6_NF_MATCH_RPFILTER
tristate '"rpfilter" reverse path filter match support'
depends on NETFILTER_ADVANCED
depends on IP6_NF_MANGLE || IP6_NF_RAW
- ---help---
+ help
This option allows you to match packets whose replies would
go out via the interface the packet came in.
@@ -183,7 +183,7 @@ config IP6_NF_TARGET_HL
tristate '"HL" hoplimit target support'
depends on NETFILTER_ADVANCED && IP6_NF_MANGLE
select NETFILTER_XT_TARGET_HL
- ---help---
+ help
This is a backwards-compatible option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_TARGET_HL.
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 37b434293bda..d2f8138e5a73 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -25,7 +25,7 @@
#include <net/seg6_hmac.h>
#endif
-bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len)
+bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced)
{
unsigned int tlv_offset;
int max_last_entry;
@@ -37,13 +37,17 @@ bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len)
if (((srh->hdrlen + 1) << 3) != len)
return false;
- max_last_entry = (srh->hdrlen / 2) - 1;
-
- if (srh->first_segment > max_last_entry)
+ if (!reduced && srh->segments_left > srh->first_segment) {
return false;
+ } else {
+ max_last_entry = (srh->hdrlen / 2) - 1;
- if (srh->segments_left > srh->first_segment + 1)
- return false;
+ if (srh->first_segment > max_last_entry)
+ return false;
+
+ if (srh->segments_left > srh->first_segment + 1)
+ return false;
+ }
tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4);
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index c7cbfeae94f5..e0e9f48ab14f 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -426,7 +426,7 @@ static int seg6_build_state(struct net *net, struct nlattr *nla,
}
/* verify that SRH is consistent */
- if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo)))
+ if (!seg6_validate_srh(tuninfo->srh, tuninfo_len - sizeof(*tuninfo), false))
return -EINVAL;
newts = lwtunnel_state_alloc(tuninfo_len + sizeof(*slwt));
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 52493423f329..eba23279912d 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -87,7 +87,7 @@ static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb)
*/
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
- if (!seg6_validate_srh(srh, len))
+ if (!seg6_validate_srh(srh, len, true))
return NULL;
return srh;
@@ -495,7 +495,7 @@ bool seg6_bpf_has_valid_srh(struct sk_buff *skb)
return false;
srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
- if (!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3))
+ if (!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3, true))
return false;
srh_state->valid = true;
@@ -670,7 +670,7 @@ static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt)
if (len < sizeof(*srh) + sizeof(struct in6_addr))
return -EINVAL;
- if (!seg6_validate_srh(srh, len))
+ if (!seg6_validate_srh(srh, len, false))
return -EINVAL;
slwt->srh = kmemdup(srh, len, GFP_KERNEL);
diff --git a/net/kcm/Kconfig b/net/kcm/Kconfig
index bf7e970fad65..16f39f2565d9 100644
--- a/net/kcm/Kconfig
+++ b/net/kcm/Kconfig
@@ -5,7 +5,7 @@ config AF_KCM
depends on INET
select BPF_SYSCALL
select STREAM_PARSER
- ---help---
+ help
KCM (Kernel Connection Multiplexor) sockets provide a method
for multiplexing messages of a message based application
protocol over kernel connectons (e.g. TCP connections).
diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
index 655e0646895b..b7856748e960 100644
--- a/net/l2tp/Kconfig
+++ b/net/l2tp/Kconfig
@@ -8,7 +8,7 @@ menuconfig L2TP
depends on (IPV6 || IPV6=n)
depends on INET
select NET_UDP_TUNNEL
- ---help---
+ help
Layer Two Tunneling Protocol
From RFC 2661 <http://www.ietf.org/rfc/rfc2661.txt>.
diff --git a/net/l3mdev/Kconfig b/net/l3mdev/Kconfig
index de186dff8f63..2b2861e1fb7d 100644
--- a/net/l3mdev/Kconfig
+++ b/net/l3mdev/Kconfig
@@ -6,6 +6,6 @@
config NET_L3_MASTER_DEV
bool "L3 Master device support"
depends on INET || IPV6
- ---help---
+ help
This module provides glue between core networking code and device
drivers to support L3 master devices like VRF.
diff --git a/net/lapb/Kconfig b/net/lapb/Kconfig
index 5b50e8d64f26..da87b47f0dff 100644
--- a/net/lapb/Kconfig
+++ b/net/lapb/Kconfig
@@ -5,7 +5,7 @@
config LAPB
tristate "LAPB Data Link Driver"
- ---help---
+ help
Link Access Procedure, Balanced (LAPB) is the data link layer (i.e.
the lower) part of the X.25 protocol. It offers a reliable
connection service to exchange data frames with one other host, and
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 0c93b1b7a826..cd9a9bd242ba 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -9,7 +9,7 @@ config MAC80211
select CRYPTO_GCM
select CRYPTO_CMAC
select CRC32
- ---help---
+ help
This option enables the hardware independent IEEE 802.11
networking stack.
@@ -25,14 +25,14 @@ config MAC80211_RC_MINSTREL
bool "Minstrel" if EXPERT
select MAC80211_HAS_RC
default y
- ---help---
+ help
This option enables the 'minstrel' TX rate control algorithm
choice
prompt "Default rate control algorithm"
depends on MAC80211_HAS_RC
default MAC80211_RC_DEFAULT_MINSTREL
- ---help---
+ help
This option selects the default rate control algorithm
mac80211 will use. Note that this default can still be
overridden through the ieee80211_default_rc_algo module
@@ -41,7 +41,7 @@ choice
config MAC80211_RC_DEFAULT_MINSTREL
bool "Minstrel"
depends on MAC80211_RC_MINSTREL
- ---help---
+ help
Select Minstrel as the default rate control algorithm.
@@ -60,7 +60,7 @@ comment "Some wireless drivers require a rate control algorithm"
config MAC80211_MESH
bool "Enable mac80211 mesh networking support"
depends on MAC80211
- ---help---
+ help
Select this option to enable 802.11 mesh operation in mac80211
drivers that support it. 802.11 mesh connects multiple stations
over (possibly multi-hop) wireless links to form a single logical
@@ -71,14 +71,14 @@ config MAC80211_LEDS
depends on MAC80211
depends on LEDS_CLASS
select LEDS_TRIGGERS
- ---help---
+ help
This option enables a few LED triggers for different
packet receive/transmit events.
config MAC80211_DEBUGFS
bool "Export mac80211 internals in DebugFS"
depends on MAC80211 && DEBUG_FS
- ---help---
+ help
Select this to see extensive information about
the internal state of mac80211 in debugfs.
@@ -87,7 +87,7 @@ config MAC80211_DEBUGFS
config MAC80211_MESSAGE_TRACING
bool "Trace all mac80211 debug messages"
depends on MAC80211
- ---help---
+ help
Select this option to have mac80211 register the
mac80211_msg trace subsystem with tracepoints to
collect all debugging messages, independent of
@@ -100,13 +100,13 @@ config MAC80211_MESSAGE_TRACING
menuconfig MAC80211_DEBUG_MENU
bool "Select mac80211 debugging features"
depends on MAC80211
- ---help---
+ help
This option collects various mac80211 debug settings.
config MAC80211_NOINLINE
bool "Do not inline TX/RX handlers"
depends on MAC80211_DEBUG_MENU
- ---help---
+ help
This option affects code generation in mac80211, when
selected some functions are marked "noinline" to allow
easier debugging of problems in the transmit and receive
@@ -122,7 +122,7 @@ config MAC80211_NOINLINE
config MAC80211_VERBOSE_DEBUG
bool "Verbose debugging output"
depends on MAC80211_DEBUG_MENU
- ---help---
+ help
Selecting this option causes mac80211 to print out
many debugging messages. It should not be selected
on production systems as some of the messages are
@@ -133,7 +133,7 @@ config MAC80211_VERBOSE_DEBUG
config MAC80211_MLME_DEBUG
bool "Verbose managed MLME output"
depends on MAC80211_DEBUG_MENU
- ---help---
+ help
Selecting this option causes mac80211 to print out
debugging messages for the managed-mode MLME. It
should not be selected on production systems as some
@@ -144,7 +144,7 @@ config MAC80211_MLME_DEBUG
config MAC80211_STA_DEBUG
bool "Verbose station debugging"
depends on MAC80211_DEBUG_MENU
- ---help---
+ help
Selecting this option causes mac80211 to print out
debugging messages for station addition/removal.
@@ -153,7 +153,7 @@ config MAC80211_STA_DEBUG
config MAC80211_HT_DEBUG
bool "Verbose HT debugging"
depends on MAC80211_DEBUG_MENU
- ---help---
+ help
This option enables 802.11n High Throughput features
debug tracing output.
@@ -165,7 +165,7 @@ config MAC80211_HT_DEBUG
config MAC80211_OCB_DEBUG
bool "Verbose OCB debugging"
depends on MAC80211_DEBUG_MENU
- ---help---
+ help
Selecting this option causes mac80211 to print out
very verbose OCB debugging messages. It should not
be selected on production systems as those messages
@@ -176,7 +176,7 @@ config MAC80211_OCB_DEBUG
config MAC80211_IBSS_DEBUG
bool "Verbose IBSS debugging"
depends on MAC80211_DEBUG_MENU
- ---help---
+ help
Selecting this option causes mac80211 to print out
very verbose IBSS debugging messages. It should not
be selected on production systems as those messages
@@ -187,7 +187,7 @@ config MAC80211_IBSS_DEBUG
config MAC80211_PS_DEBUG
bool "Verbose powersave mode debugging"
depends on MAC80211_DEBUG_MENU
- ---help---
+ help
Selecting this option causes mac80211 to print out very
verbose power save mode debugging messages (when mac80211
is an AP and has power saving stations.)
@@ -200,7 +200,7 @@ config MAC80211_MPL_DEBUG
bool "Verbose mesh peer link debugging"
depends on MAC80211_DEBUG_MENU
depends on MAC80211_MESH
- ---help---
+ help
Selecting this option causes mac80211 to print out very
verbose mesh peer link debugging messages (when mac80211
is taking part in a mesh network).
@@ -213,7 +213,7 @@ config MAC80211_MPATH_DEBUG
bool "Verbose mesh path debugging"
depends on MAC80211_DEBUG_MENU
depends on MAC80211_MESH
- ---help---
+ help
Selecting this option causes mac80211 to print out very
verbose mesh path selection debugging messages (when mac80211
is taking part in a mesh network).
@@ -226,7 +226,7 @@ config MAC80211_MHWMP_DEBUG
bool "Verbose mesh HWMP routing debugging"
depends on MAC80211_DEBUG_MENU
depends on MAC80211_MESH
- ---help---
+ help
Selecting this option causes mac80211 to print out very
verbose mesh routing (HWMP) debugging messages (when mac80211
is taking part in a mesh network).
@@ -239,7 +239,7 @@ config MAC80211_MESH_SYNC_DEBUG
bool "Verbose mesh synchronization debugging"
depends on MAC80211_DEBUG_MENU
depends on MAC80211_MESH
- ---help---
+ help
Selecting this option causes mac80211 to print out very verbose mesh
synchronization debugging messages (when mac80211 is taking part in a
mesh network).
@@ -250,7 +250,7 @@ config MAC80211_MESH_CSA_DEBUG
bool "Verbose mesh channel switch debugging"
depends on MAC80211_DEBUG_MENU
depends on MAC80211_MESH
- ---help---
+ help
Selecting this option causes mac80211 to print out very verbose mesh
channel switch debugging messages (when mac80211 is taking part in a
mesh network).
@@ -261,7 +261,7 @@ config MAC80211_MESH_PS_DEBUG
bool "Verbose mesh powersave debugging"
depends on MAC80211_DEBUG_MENU
depends on MAC80211_MESH
- ---help---
+ help
Selecting this option causes mac80211 to print out very verbose mesh
powersave debugging messages (when mac80211 is taking part in a
mesh network).
@@ -271,7 +271,7 @@ config MAC80211_MESH_PS_DEBUG
config MAC80211_TDLS_DEBUG
bool "Verbose TDLS debugging"
depends on MAC80211_DEBUG_MENU
- ---help---
+ help
Selecting this option causes mac80211 to print out very
verbose TDLS selection debugging messages (when mac80211
is a TDLS STA).
@@ -284,7 +284,7 @@ config MAC80211_DEBUG_COUNTERS
bool "Extra statistics for TX/RX debugging"
depends on MAC80211_DEBUG_MENU
depends on MAC80211_DEBUGFS
- ---help---
+ help
Selecting this option causes mac80211 to keep additional
and very verbose statistics about TX and RX handler use
as well as a few selected dot11 counters. These will be
@@ -298,7 +298,7 @@ config MAC80211_DEBUG_COUNTERS
config MAC80211_STA_HASH_MAX_SIZE
int "Station hash table maximum size" if MAC80211_DEBUG_MENU
default 0
- ---help---
+ help
Setting this option to a low value (e.g. 4) allows testing the
hash table with collisions relatively deterministically (just
connect more stations than the number selected here.)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 5820ef02a587..b2a9d47cf86d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -167,6 +167,8 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
ret = IEEE80211_STA_DISABLE_HT |
IEEE80211_STA_DISABLE_VHT |
IEEE80211_STA_DISABLE_HE;
+ else
+ ret = 0;
vht_chandef = *chandef;
goto out;
}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 21854a61a2b7..a88ab6fb16f2 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4694,7 +4694,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
* rate_idx is MCS index, which can be [0-76]
* as documented on:
*
- * http://wireless.kernel.org/en/developers/Documentation/ieee80211/802.11n
+ * https://wireless.wiki.kernel.org/en/developers/Documentation/ieee80211/802.11n
*
* Anything else would be some sort of driver or
* hardware error. The driver should catch hardware
diff --git a/net/mac802154/Kconfig b/net/mac802154/Kconfig
index 742624e4f7bb..901167b1e6f5 100644
--- a/net/mac802154/Kconfig
+++ b/net/mac802154/Kconfig
@@ -8,7 +8,7 @@ config MAC802154
select CRYPTO_CCM
select CRYPTO_CTR
select CRYPTO_AES
- ---help---
+ help
This option enables the hardware independent IEEE 802.15.4
networking stack for SoftMAC devices (the ones implementing
only PHY level of IEEE 802.15.4 standard).
diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig
index d1ad69b7942a..d672ab72ab12 100644
--- a/net/mpls/Kconfig
+++ b/net/mpls/Kconfig
@@ -6,7 +6,7 @@
menuconfig MPLS
bool "MultiProtocol Label Switching"
default n
- ---help---
+ help
MultiProtocol Label Switching routes packets through logical
circuits. Originally conceived as a way of routing packets at
hardware speeds (before hardware was capable of routing ipv4 packets),
@@ -27,13 +27,13 @@ config MPLS_ROUTING
tristate "MPLS: routing support"
depends on NET_IP_TUNNEL || NET_IP_TUNNEL=n
depends on PROC_SYSCTL
- ---help---
+ help
Add support for forwarding of mpls packets.
config MPLS_IPTUNNEL
tristate "MPLS: IP over MPLS tunnel support"
depends on LWTUNNEL && MPLS_ROUTING
- ---help---
+ help
mpls ip tunnel support.
endif # MPLS
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 01f1f4cf4902..490b92534afc 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -273,6 +273,8 @@ static void mptcp_parse_option(const struct sk_buff *skb,
if (opsize != TCPOLEN_MPTCP_RM_ADDR_BASE)
break;
+ ptr++;
+
mp_opt->rm_addr = 1;
mp_opt->rm_id = *ptr++;
pr_debug("RM_ADDR: id=%d", mp_opt->rm_id);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 14b253d10ccf..3980fbb6f31e 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -374,6 +374,27 @@ void mptcp_subflow_eof(struct sock *sk)
sock_hold(sk);
}
+static void mptcp_check_for_eof(struct mptcp_sock *msk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ int receivers = 0;
+
+ mptcp_for_each_subflow(msk, subflow)
+ receivers += !subflow->rx_eof;
+
+ if (!receivers && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
+ /* hopefully temporary hack: propagate shutdown status
+ * to msk, when all subflows agree on it
+ */
+ sk->sk_shutdown |= RCV_SHUTDOWN;
+
+ smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
+ set_bit(MPTCP_DATA_READY, &msk->flags);
+ sk->sk_data_ready(sk);
+ }
+}
+
static void mptcp_stop_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1011,6 +1032,9 @@ fallback:
break;
}
+ if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
+ mptcp_check_for_eof(msk);
+
if (sk->sk_shutdown & RCV_SHUTDOWN)
break;
@@ -1148,27 +1172,6 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
return 0;
}
-static void mptcp_check_for_eof(struct mptcp_sock *msk)
-{
- struct mptcp_subflow_context *subflow;
- struct sock *sk = (struct sock *)msk;
- int receivers = 0;
-
- mptcp_for_each_subflow(msk, subflow)
- receivers += !subflow->rx_eof;
-
- if (!receivers && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
- /* hopefully temporary hack: propagate shutdown status
- * to msk, when all subflows agree on it
- */
- sk->sk_shutdown |= RCV_SHUTDOWN;
-
- smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
- set_bit(MPTCP_DATA_READY, &msk->flags);
- sk->sk_data_ready(sk);
- }
-}
-
static void mptcp_worker(struct work_struct *work)
{
struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 809687d3f410..c6eeaf3e8dcb 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -135,8 +135,6 @@ static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field)
((nib & 0xF) << 8) | field);
}
-#define MPTCP_PM_MAX_ADDR 4
-
struct mptcp_addr_info {
sa_family_t family;
__be16 port;
@@ -234,10 +232,7 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- if (list_empty(&msk->rtx_queue))
- return NULL;
-
- return list_first_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
+ return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
}
struct mptcp_subflow_request_sock {
@@ -254,6 +249,7 @@ struct mptcp_subflow_request_sock {
u64 thmac;
u32 local_nonce;
u32 remote_nonce;
+ struct mptcp_sock *msk;
};
static inline struct mptcp_subflow_request_sock *
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 493b98a0825c..3838a0b3a21f 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -69,6 +69,9 @@ static void subflow_req_destructor(struct request_sock *req)
pr_debug("subflow_req=%p", subflow_req);
+ if (subflow_req->msk)
+ sock_put((struct sock *)subflow_req->msk);
+
if (subflow_req->mp_capable)
mptcp_token_destroy_request(subflow_req->token);
tcp_request_sock_ops.destructor(req);
@@ -86,8 +89,8 @@ static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
}
/* validate received token and create truncated hmac and nonce for SYN-ACK */
-static bool subflow_token_join_request(struct request_sock *req,
- const struct sk_buff *skb)
+static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
+ const struct sk_buff *skb)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
u8 hmac[SHA256_DIGEST_SIZE];
@@ -97,13 +100,13 @@ static bool subflow_token_join_request(struct request_sock *req,
msk = mptcp_token_get_sock(subflow_req->token);
if (!msk) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN);
- return false;
+ return NULL;
}
local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req);
if (local_id < 0) {
sock_put((struct sock *)msk);
- return false;
+ return NULL;
}
subflow_req->local_id = local_id;
@@ -114,9 +117,7 @@ static bool subflow_token_join_request(struct request_sock *req,
subflow_req->remote_nonce, hmac);
subflow_req->thmac = get_unaligned_be64(hmac);
-
- sock_put((struct sock *)msk);
- return true;
+ return msk;
}
static void subflow_init_req(struct request_sock *req,
@@ -133,6 +134,7 @@ static void subflow_init_req(struct request_sock *req,
subflow_req->mp_capable = 0;
subflow_req->mp_join = 0;
+ subflow_req->msk = NULL;
#ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
@@ -166,12 +168,9 @@ static void subflow_init_req(struct request_sock *req,
subflow_req->remote_id = mp_opt.join_id;
subflow_req->token = mp_opt.token;
subflow_req->remote_nonce = mp_opt.nonce;
- pr_debug("token=%u, remote_nonce=%u", subflow_req->token,
- subflow_req->remote_nonce);
- if (!subflow_token_join_request(req, skb)) {
- subflow_req->mp_join = 0;
- // @@ need to trigger RST
- }
+ subflow_req->msk = subflow_token_join_request(req, skb);
+ pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token,
+ subflow_req->remote_nonce, subflow_req->msk);
}
}
@@ -354,10 +353,9 @@ static bool subflow_hmac_valid(const struct request_sock *req,
const struct mptcp_subflow_request_sock *subflow_req;
u8 hmac[SHA256_DIGEST_SIZE];
struct mptcp_sock *msk;
- bool ret;
subflow_req = mptcp_subflow_rsk(req);
- msk = mptcp_token_get_sock(subflow_req->token);
+ msk = subflow_req->msk;
if (!msk)
return false;
@@ -365,12 +363,7 @@ static bool subflow_hmac_valid(const struct request_sock *req,
subflow_req->remote_nonce,
subflow_req->local_nonce, hmac);
- ret = true;
- if (crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN))
- ret = false;
-
- sock_put((struct sock *)msk);
- return ret;
+ return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN);
}
static void mptcp_sock_destruct(struct sock *sk)
@@ -393,6 +386,7 @@ static void mptcp_sock_destruct(struct sock *sk)
sock_orphan(sk);
}
+ mptcp_token_destroy(mptcp_sk(sk)->token);
inet_sock_destruct(sk);
}
@@ -437,22 +431,25 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
struct mptcp_subflow_request_sock *subflow_req;
struct mptcp_options_received mp_opt;
- bool fallback_is_fatal = false;
+ bool fallback, fallback_is_fatal;
struct sock *new_msk = NULL;
- bool fallback = false;
struct sock *child;
pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
- /* we need later a valid 'mp_capable' value even when options are not
- * parsed
+ /* After child creation we must look for 'mp_capable' even when options
+ * are not parsed
*/
mp_opt.mp_capable = 0;
- if (tcp_rsk(req)->is_mptcp == 0)
+
+ /* hopefully temporary handling for MP_JOIN+syncookie */
+ subflow_req = mptcp_subflow_rsk(req);
+ fallback_is_fatal = subflow_req->mp_join;
+ fallback = !tcp_rsk(req)->is_mptcp;
+ if (fallback)
goto create_child;
/* if the sk is MP_CAPABLE, we try to fetch the client key */
- subflow_req = mptcp_subflow_rsk(req);
if (subflow_req->mp_capable) {
if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) {
/* here we can receive and accept an in-window,
@@ -473,12 +470,11 @@ create_msk:
if (!new_msk)
fallback = true;
} else if (subflow_req->mp_join) {
- fallback_is_fatal = true;
mptcp_get_options(skb, &mp_opt);
if (!mp_opt.mp_join ||
!subflow_hmac_valid(req, &mp_opt)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
- return NULL;
+ fallback = true;
}
}
@@ -521,10 +517,12 @@ create_child:
} else if (ctx->mp_join) {
struct mptcp_sock *owner;
- owner = mptcp_token_get_sock(ctx->token);
+ owner = subflow_req->msk;
if (!owner)
goto dispose_child;
+ /* move the msk reference ownership to the subflow */
+ subflow_req->msk = NULL;
ctx->conn = (struct sock *)owner;
if (!mptcp_finish_join(child))
goto dispose_child;
@@ -1052,8 +1050,10 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
err = tcp_set_ulp(sf->sk, "mptcp");
release_sock(sf->sk);
- if (err)
+ if (err) {
+ sock_release(sf);
return err;
+ }
/* the newly created socket really belongs to the owning MPTCP master
* socket, even if for additional subflows the allocation is performed
diff --git a/net/ncsi/Kconfig b/net/ncsi/Kconfig
index 2f1e5756c03a..93309081f5a4 100644
--- a/net/ncsi/Kconfig
+++ b/net/ncsi/Kconfig
@@ -6,7 +6,7 @@
config NET_NCSI
bool "NCSI interface support"
depends on INET
- ---help---
+ help
This module provides NCSI (Network Controller Sideband Interface)
support. Enable this only if your system connects to a network
device via NCSI and the ethernet driver you're using supports
@@ -14,6 +14,6 @@ config NET_NCSI
config NCSI_OEM_CMD_GET_MAC
bool "Get NCSI OEM MAC Address"
depends on NET_NCSI
- ---help---
+ help
This allows to get MAC address from NCSI firmware and set them back to
controller.
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 3a3915d2e1ea..0ffe2b8723c4 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -120,7 +120,7 @@ config NF_CONNTRACK_PROCFS
bool "Supply CT list in procfs (OBSOLETE)"
default y
depends on PROC_FS
- ---help---
+ help
This option enables for the list of known conntrack entries
to be shown in procfs under net/netfilter/nf_conntrack. This
is considered obsolete in favor of using the conntrack(8)
@@ -717,7 +717,7 @@ comment "Xtables combined modules"
config NETFILTER_XT_MARK
tristate 'nfmark target and match support'
default m if NETFILTER_ADVANCED=n
- ---help---
+ help
This option adds the "MARK" target and "mark" match.
Netfilter mark matching allows you to match packets based on the
@@ -733,7 +733,7 @@ config NETFILTER_XT_CONNMARK
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NF_CONNTRACK_MARK
- ---help---
+ help
This option adds the "CONNMARK" target and "connmark" match.
Netfilter allows you to store a mark value per connection (a.k.a.
@@ -760,7 +760,7 @@ config NETFILTER_XT_TARGET_AUDIT
tristate "AUDIT target support"
depends on AUDIT
depends on NETFILTER_ADVANCED
- ---help---
+ help
This option adds a 'AUDIT' target, which can be used to create
audit records for packets dropped/accepted.
@@ -770,7 +770,7 @@ config NETFILTER_XT_TARGET_CHECKSUM
tristate "CHECKSUM target support"
depends on IP_NF_MANGLE || IP6_NF_MANGLE
depends on NETFILTER_ADVANCED
- ---help---
+ help
This option adds a `CHECKSUM' target, which can be used in the iptables mangle
table to work around buggy DHCP clients in virtualized environments.
@@ -799,7 +799,7 @@ config NETFILTER_XT_TARGET_CONNMARK
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NETFILTER_XT_CONNMARK
- ---help---
+ help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_CONNMARK (combined connmark/CONNMARK module).
@@ -848,7 +848,7 @@ config NETFILTER_XT_TARGET_HL
tristate '"HL" hoplimit target support'
depends on IP_NF_MANGLE || IP6_NF_MANGLE
depends on NETFILTER_ADVANCED
- ---help---
+ help
This option adds the "HL" (for IPv6) and "TTL" (for IPv4)
targets, which enable the user to change the
hoplimit/time-to-live value of the IP header.
@@ -863,7 +863,7 @@ config NETFILTER_XT_TARGET_HMARK
tristate '"HMARK" target support'
depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n
depends on NETFILTER_ADVANCED
- ---help---
+ help
This option adds the "HMARK" target.
The target allows you to create rules in the "raw" and "mangle" tables
@@ -925,7 +925,7 @@ config NETFILTER_XT_TARGET_MARK
tristate '"MARK" target support'
depends on NETFILTER_ADVANCED
select NETFILTER_XT_MARK
- ---help---
+ help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_MARK (combined mark/MARK module).
@@ -933,7 +933,7 @@ config NETFILTER_XT_TARGET_MARK
config NETFILTER_XT_NAT
tristate '"SNAT and DNAT" targets support'
depends on NF_NAT
- ---help---
+ help
This option enables the SNAT and DNAT targets.
To compile it as a module, choose M here. If unsure, say N.
@@ -941,7 +941,7 @@ config NETFILTER_XT_NAT
config NETFILTER_XT_TARGET_NETMAP
tristate '"NETMAP" target support'
depends on NF_NAT
- ---help---
+ help
NETMAP is an implementation of static 1:1 NAT mapping of network
addresses. It maps the network address part, while keeping the host
address part intact.
@@ -991,7 +991,7 @@ config NETFILTER_XT_TARGET_REDIRECT
tristate "REDIRECT target support"
depends on NF_NAT
select NF_NAT_REDIRECT
- ---help---
+ help
REDIRECT is a special case of NAT: all incoming connections are
mapped onto the incoming interface's address, causing the packets to
come to the local machine instead of passing through. This is
@@ -1021,7 +1021,7 @@ config NETFILTER_XT_TARGET_TEE
depends on IP6_NF_IPTABLES || !IP6_NF_IPTABLES
select NF_DUP_IPV4
select NF_DUP_IPV6 if IP6_NF_IPTABLES
- ---help---
+ help
This option adds a "TEE" target with which a packet can be cloned and
this clone be rerouted to another nexthop.
@@ -1073,7 +1073,7 @@ config NETFILTER_XT_TARGET_TCPMSS
tristate '"TCPMSS" target support'
depends on IPV6 || IPV6=n
default m if NETFILTER_ADVANCED=n
- ---help---
+ help
This option adds a `TCPMSS' target, which allows you to alter the
MSS value of TCP SYN packets, to control the maximum size for that
connection (usually limiting it to your outgoing interface's MTU
@@ -1111,7 +1111,7 @@ comment "Xtables matches"
config NETFILTER_XT_MATCH_ADDRTYPE
tristate '"addrtype" address type match support'
default m if NETFILTER_ADVANCED=n
- ---help---
+ help
This option allows you to match what routing thinks of an address,
eg. UNICAST, LOCAL, BROADCAST, ...
@@ -1132,7 +1132,7 @@ config NETFILTER_XT_MATCH_CGROUP
depends on NETFILTER_ADVANCED
depends on CGROUPS
select CGROUP_NET_CLASSID
- ---help---
+ help
Socket/process control group matching allows you to match locally
generated packets based on which net_cls control group processes
belong to.
@@ -1141,7 +1141,7 @@ config NETFILTER_XT_MATCH_CLUSTER
tristate '"cluster" match support'
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
- ---help---
+ help
This option allows you to build work-load-sharing clusters of
network servers/stateful firewalls without having a dedicated
load-balancing router/server/switch. Basically, this match returns
@@ -1179,7 +1179,7 @@ config NETFILTER_XT_MATCH_CONNLABEL
select NF_CONNTRACK_LABELS
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
- ---help---
+ help
This match allows you to test and assign userspace-defined labels names
to a connection. The kernel only stores bit values - mapping
names to bits is done by userspace.
@@ -1192,7 +1192,7 @@ config NETFILTER_XT_MATCH_CONNLIMIT
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NETFILTER_CONNCOUNT
- ---help---
+ help
This match allows you to match against the number of parallel
connections to a server per client IP address (or address block).
@@ -1201,7 +1201,7 @@ config NETFILTER_XT_MATCH_CONNMARK
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NETFILTER_XT_CONNMARK
- ---help---
+ help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_CONNMARK (combined connmark/CONNMARK module).
@@ -1267,7 +1267,7 @@ config NETFILTER_XT_MATCH_DSCP
config NETFILTER_XT_MATCH_ECN
tristate '"ecn" match support'
depends on NETFILTER_ADVANCED
- ---help---
+ help
This option adds an "ECN" match, which allows you to match against
the IPv4 and TCP header ECN fields.
@@ -1310,7 +1310,7 @@ config NETFILTER_XT_MATCH_HELPER
config NETFILTER_XT_MATCH_HL
tristate '"hl" hoplimit/TTL match support'
depends on NETFILTER_ADVANCED
- ---help---
+ help
HL matching allows you to match packets based on the hoplimit
in the IPv6 header, or the time-to-live field in the IPv4
header of the packet.
@@ -1327,7 +1327,7 @@ config NETFILTER_XT_MATCH_IPCOMP
config NETFILTER_XT_MATCH_IPRANGE
tristate '"iprange" address range match support'
depends on NETFILTER_ADVANCED
- ---help---
+ help
This option adds a "iprange" match, which allows you to match based on
an IP address range. (Normal iptables only matches on single addresses
with an optional mask.)
@@ -1348,7 +1348,7 @@ config NETFILTER_XT_MATCH_L2TP
tristate '"l2tp" match support'
depends on NETFILTER_ADVANCED
default L2TP
- ---help---
+ help
This option adds an "L2TP" match, which allows you to match against
L2TP protocol header fields.
@@ -1386,7 +1386,7 @@ config NETFILTER_XT_MATCH_MARK
tristate '"mark" match support'
depends on NETFILTER_ADVANCED
select NETFILTER_XT_MARK
- ---help---
+ help
This is a backwards-compat option for the user's convenience
(e.g. when running oldconfig). It selects
CONFIG_NETFILTER_XT_MARK (combined mark/MARK module).
@@ -1428,7 +1428,7 @@ config NETFILTER_XT_MATCH_OSF
config NETFILTER_XT_MATCH_OWNER
tristate '"owner" match support'
depends on NETFILTER_ADVANCED
- ---help---
+ help
Socket owner matching allows you to match locally-generated packets
based on who created the socket: the user or group. It is also
possible to check whether a socket actually exists.
@@ -1503,7 +1503,7 @@ config NETFILTER_XT_MATCH_REALM
config NETFILTER_XT_MATCH_RECENT
tristate '"recent" match support'
depends on NETFILTER_ADVANCED
- ---help---
+ help
This match is used for creating one or many lists of recently
used addresses and then matching against that/those list(s).
@@ -1586,7 +1586,7 @@ config NETFILTER_XT_MATCH_TCPMSS
config NETFILTER_XT_MATCH_TIME
tristate '"time" match support'
depends on NETFILTER_ADVANCED
- ---help---
+ help
This option adds a "time" match, which allows you to match based on
the packet arrival time (at the machine which netfilter is running)
on) or departure time/date (for locally generated packets).
@@ -1600,7 +1600,7 @@ config NETFILTER_XT_MATCH_TIME
config NETFILTER_XT_MATCH_U32
tristate '"u32" match support'
depends on NETFILTER_ADVANCED
- ---help---
+ help
u32 allows you to extract quantities of up to 4 bytes from a packet,
AND them with specified masks, shift them by specified amounts and
test whether the results are in any of a set of specified ranges.
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 78f046ec506f..3ac7c8c1548d 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -376,7 +376,7 @@ static bool nf_remove_net_hook(struct nf_hook_entries *old,
if (orig_ops[i] != unreg)
continue;
WRITE_ONCE(old->hooks[i].hook, accept_all);
- WRITE_ONCE(orig_ops[i], &dummy_ops);
+ WRITE_ONCE(orig_ops[i], (void *)&dummy_ops);
return true;
}
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 5b672e05d758..2c1593089ede 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -6,7 +6,7 @@ menuconfig IP_VS
tristate "IP virtual server support"
depends on NET && INET && NETFILTER
depends on (NF_CONNTRACK || NF_CONNTRACK=n)
- ---help---
+ help
IP Virtual Server support will let you build a high-performance
virtual server based on cluster of two or more real servers. This
option must be enabled for at least one of the clustered computers
@@ -31,14 +31,14 @@ config IP_VS_IPV6
depends on IPV6 = y || IP_VS = IPV6
select IP6_NF_IPTABLES
select NF_DEFRAG_IPV6
- ---help---
+ help
Add IPv6 support to IPVS.
Say Y if unsure.
config IP_VS_DEBUG
bool "IP virtual server debugging"
- ---help---
+ help
Say Y here if you want to get additional messages useful in
debugging the IP virtual server code. You can change the debug
level in /proc/sys/net/ipv4/vs/debug_level
@@ -47,7 +47,7 @@ config IP_VS_TAB_BITS
int "IPVS connection table size (the Nth power of 2)"
range 8 20
default 12
- ---help---
+ help
The IPVS connection hash table uses the chaining scheme to handle
hash collisions. Using a big IPVS connection hash table will greatly
reduce conflicts when there are hundreds of thousands of connections
@@ -78,13 +78,13 @@ comment "IPVS transport protocol load balancing support"
config IP_VS_PROTO_TCP
bool "TCP load balancing support"
- ---help---
+ help
This option enables support for load balancing TCP transport
protocol. Say Y if unsure.
config IP_VS_PROTO_UDP
bool "UDP load balancing support"
- ---help---
+ help
This option enables support for load balancing UDP transport
protocol. Say Y if unsure.
@@ -93,20 +93,20 @@ config IP_VS_PROTO_AH_ESP
config IP_VS_PROTO_ESP
bool "ESP load balancing support"
- ---help---
+ help
This option enables support for load balancing ESP (Encapsulation
Security Payload) transport protocol. Say Y if unsure.
config IP_VS_PROTO_AH
bool "AH load balancing support"
- ---help---
+ help
This option enables support for load balancing AH (Authentication
Header) transport protocol. Say Y if unsure.
config IP_VS_PROTO_SCTP
bool "SCTP load balancing support"
select LIBCRC32C
- ---help---
+ help
This option enables support for load balancing SCTP transport
protocol. Say Y if unsure.
@@ -114,7 +114,7 @@ comment "IPVS scheduler"
config IP_VS_RR
tristate "round-robin scheduling"
- ---help---
+ help
The robin-robin scheduling algorithm simply directs network
connections to different real servers in a round-robin manner.
@@ -123,7 +123,7 @@ config IP_VS_RR
config IP_VS_WRR
tristate "weighted round-robin scheduling"
- ---help---
+ help
The weighted robin-robin scheduling algorithm directs network
connections to different real servers based on server weights
in a round-robin manner. Servers with higher weights receive
@@ -136,7 +136,7 @@ config IP_VS_WRR
config IP_VS_LC
tristate "least-connection scheduling"
- ---help---
+ help
The least-connection scheduling algorithm directs network
connections to the server with the least number of active
connections.
@@ -146,7 +146,7 @@ config IP_VS_LC
config IP_VS_WLC
tristate "weighted least-connection scheduling"
- ---help---
+ help
The weighted least-connection scheduling algorithm directs network
connections to the server with the least active connections
normalized by the server weight.
@@ -156,7 +156,7 @@ config IP_VS_WLC
config IP_VS_FO
tristate "weighted failover scheduling"
- ---help---
+ help
The weighted failover scheduling algorithm directs network
connections to the server with the highest weight that is
currently available.
@@ -166,7 +166,7 @@ config IP_VS_FO
config IP_VS_OVF
tristate "weighted overflow scheduling"
- ---help---
+ help
The weighted overflow scheduling algorithm directs network
connections to the server with the highest weight that is
currently available and overflows to the next when active
@@ -177,7 +177,7 @@ config IP_VS_OVF
config IP_VS_LBLC
tristate "locality-based least-connection scheduling"
- ---help---
+ help
The locality-based least-connection scheduling algorithm is for
destination IP load balancing. It is usually used in cache cluster.
This algorithm usually directs packet destined for an IP address to
@@ -191,7 +191,7 @@ config IP_VS_LBLC
config IP_VS_LBLCR
tristate "locality-based least-connection with replication scheduling"
- ---help---
+ help
The locality-based least-connection with replication scheduling
algorithm is also for destination IP load balancing. It is
usually used in cache cluster. It differs from the LBLC scheduling
@@ -209,7 +209,7 @@ config IP_VS_LBLCR
config IP_VS_DH
tristate "destination hashing scheduling"
- ---help---
+ help
The destination hashing scheduling algorithm assigns network
connections to the servers through looking up a statically assigned
hash table by their destination IP addresses.
@@ -219,7 +219,7 @@ config IP_VS_DH
config IP_VS_SH
tristate "source hashing scheduling"
- ---help---
+ help
The source hashing scheduling algorithm assigns network
connections to the servers through looking up a statically assigned
hash table by their source IP addresses.
@@ -229,7 +229,7 @@ config IP_VS_SH
config IP_VS_MH
tristate "maglev hashing scheduling"
- ---help---
+ help
The maglev consistent hashing scheduling algorithm provides the
Google's Maglev hashing algorithm as a IPVS scheduler. It assigns
network connections to the servers through looking up a statically
@@ -248,7 +248,7 @@ config IP_VS_MH
config IP_VS_SED
tristate "shortest expected delay scheduling"
- ---help---
+ help
The shortest expected delay scheduling algorithm assigns network
connections to the server with the shortest expected delay. The
expected delay that the job will experience is (Ci + 1) / Ui if
@@ -261,7 +261,7 @@ config IP_VS_SED
config IP_VS_NQ
tristate "never queue scheduling"
- ---help---
+ help
The never queue scheduling algorithm adopts a two-speed model.
When there is an idle server available, the job will be sent to
the idle server, instead of waiting for a fast one. When there
@@ -278,7 +278,7 @@ config IP_VS_SH_TAB_BITS
int "IPVS source hashing table size (the Nth power of 2)"
range 4 20
default 8
- ---help---
+ help
The source hashing scheduler maps source IPs to destinations
stored in a hash table. This table is tiled by each destination
until all slots in the table are filled. When using weights to
@@ -293,7 +293,7 @@ config IP_VS_MH_TAB_INDEX
int "IPVS maglev hashing table index of size (the prime numbers)"
range 8 17
default 12
- ---help---
+ help
The maglev hashing scheduler maps source IPs to destinations
stored in a hash table. This table is assigned by a preference
list of the positions to each destination until all slots in
@@ -312,7 +312,7 @@ config IP_VS_FTP
depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT && \
NF_CONNTRACK_FTP
select IP_VS_NFCT
- ---help---
+ help
FTP is a protocol that transfers IP address and/or port number in
the payload. In the virtual server via Network Address Translation,
the IP address and port number of real servers cannot be sent to
@@ -326,7 +326,7 @@ config IP_VS_FTP
config IP_VS_NFCT
bool "Netfilter connection tracking"
depends on NF_CONNTRACK
- ---help---
+ help
The Netfilter connection tracking support allows the IPVS
connection state to be exported to the Netfilter framework
for filtering purposes.
@@ -335,7 +335,7 @@ config IP_VS_PE_SIP
tristate "SIP persistence engine"
depends on IP_VS_PROTO_UDP
depends on NF_CONNTRACK_SIP
- ---help---
+ help
Allow persistence based on the SIP Call-ID
endif # IP_VS
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d7bd8b1f27d5..832eabecfbdd 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -939,7 +939,8 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
filter->mark.mask = 0xffffffff;
}
} else if (cda[CTA_MARK_MASK]) {
- return ERR_PTR(-EINVAL);
+ err = -EINVAL;
+ goto err_filter;
}
#endif
if (!cda[CTA_FILTER])
@@ -947,15 +948,17 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone);
if (err < 0)
- return ERR_PTR(err);
+ goto err_filter;
err = ctnetlink_parse_filter(cda[CTA_FILTER], filter);
if (err < 0)
- return ERR_PTR(err);
+ goto err_filter;
if (filter->orig_flags) {
- if (!cda[CTA_TUPLE_ORIG])
- return ERR_PTR(-EINVAL);
+ if (!cda[CTA_TUPLE_ORIG]) {
+ err = -EINVAL;
+ goto err_filter;
+ }
err = ctnetlink_parse_tuple_filter(cda, &filter->orig,
CTA_TUPLE_ORIG,
@@ -963,23 +966,32 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
&filter->zone,
filter->orig_flags);
if (err < 0)
- return ERR_PTR(err);
+ goto err_filter;
}
if (filter->reply_flags) {
- if (!cda[CTA_TUPLE_REPLY])
- return ERR_PTR(-EINVAL);
+ if (!cda[CTA_TUPLE_REPLY]) {
+ err = -EINVAL;
+ goto err_filter;
+ }
err = ctnetlink_parse_tuple_filter(cda, &filter->reply,
CTA_TUPLE_REPLY,
filter->family,
&filter->zone,
filter->orig_flags);
- if (err < 0)
- return ERR_PTR(err);
+ if (err < 0) {
+ err = -EINVAL;
+ goto err_filter;
+ }
}
return filter;
+
+err_filter:
+ kfree(filter);
+
+ return ERR_PTR(err);
}
static bool ctnetlink_needs_filter(u8 family, const struct nlattr * const *cda)
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 6a3034f84ab6..afa85171df38 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -387,51 +387,6 @@ static void nf_flow_offload_work_gc(struct work_struct *work)
queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
}
-int nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
- flow_setup_cb_t *cb, void *cb_priv)
-{
- struct flow_block *block = &flow_table->flow_block;
- struct flow_block_cb *block_cb;
- int err = 0;
-
- down_write(&flow_table->flow_block_lock);
- block_cb = flow_block_cb_lookup(block, cb, cb_priv);
- if (block_cb) {
- err = -EEXIST;
- goto unlock;
- }
-
- block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL);
- if (IS_ERR(block_cb)) {
- err = PTR_ERR(block_cb);
- goto unlock;
- }
-
- list_add_tail(&block_cb->list, &block->cb_list);
-
-unlock:
- up_write(&flow_table->flow_block_lock);
- return err;
-}
-EXPORT_SYMBOL_GPL(nf_flow_table_offload_add_cb);
-
-void nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
- flow_setup_cb_t *cb, void *cb_priv)
-{
- struct flow_block *block = &flow_table->flow_block;
- struct flow_block_cb *block_cb;
-
- down_write(&flow_table->flow_block_lock);
- block_cb = flow_block_cb_lookup(block, cb, cb_priv);
- if (block_cb) {
- list_del(&block_cb->list);
- flow_block_cb_free(block_cb);
- } else {
- WARN_ON(true);
- }
- up_write(&flow_table->flow_block_lock);
-}
-EXPORT_SYMBOL_GPL(nf_flow_table_offload_del_cb);
static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
__be16 port, __be16 new_port)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 073aa1051d43..7647ecfa0d40 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6550,12 +6550,22 @@ err1:
return err;
}
+static void nft_flowtable_hook_release(struct nft_flowtable_hook *flowtable_hook)
+{
+ struct nft_hook *this, *next;
+
+ list_for_each_entry_safe(this, next, &flowtable_hook->list, list) {
+ list_del(&this->list);
+ kfree(this);
+ }
+}
+
static int nft_delflowtable_hook(struct nft_ctx *ctx,
struct nft_flowtable *flowtable)
{
const struct nlattr * const *nla = ctx->nla;
struct nft_flowtable_hook flowtable_hook;
- struct nft_hook *this, *next, *hook;
+ struct nft_hook *this, *hook;
struct nft_trans *trans;
int err;
@@ -6564,33 +6574,40 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
if (err < 0)
return err;
- list_for_each_entry_safe(this, next, &flowtable_hook.list, list) {
+ list_for_each_entry(this, &flowtable_hook.list, list) {
hook = nft_hook_list_find(&flowtable->hook_list, this);
if (!hook) {
err = -ENOENT;
goto err_flowtable_del_hook;
}
hook->inactive = true;
- list_del(&this->list);
- kfree(this);
}
trans = nft_trans_alloc(ctx, NFT_MSG_DELFLOWTABLE,
sizeof(struct nft_trans_flowtable));
- if (!trans)
- return -ENOMEM;
+ if (!trans) {
+ err = -ENOMEM;
+ goto err_flowtable_del_hook;
+ }
nft_trans_flowtable(trans) = flowtable;
nft_trans_flowtable_update(trans) = true;
INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
+ nft_flowtable_hook_release(&flowtable_hook);
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
err_flowtable_del_hook:
- list_for_each_entry(hook, &flowtable_hook.list, list)
+ list_for_each_entry(this, &flowtable_hook.list, list) {
+ hook = nft_hook_list_find(&flowtable->hook_list, this);
+ if (!hook)
+ break;
+
hook->inactive = false;
+ }
+ nft_flowtable_hook_release(&flowtable_hook);
return err;
}
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 8b5acc6910fd..8c04388296b0 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1242,7 +1242,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
}
- if (!*this_cpu_ptr(m->scratch) || bsize_max > m->bsize_max) {
+ if (!*get_cpu_ptr(m->scratch) || bsize_max > m->bsize_max) {
+ put_cpu_ptr(m->scratch);
+
err = pipapo_realloc_scratch(m, bsize_max);
if (err)
return err;
@@ -1250,6 +1252,8 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
this_cpu_write(nft_pipapo_scratch_index, false);
m->bsize_max = bsize_max;
+ } else {
+ put_cpu_ptr(m->scratch);
}
*ext2 = &e->ext;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 62f416bc0579..b6aad3fc46c3 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -271,12 +271,14 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
if (nft_rbtree_interval_start(new)) {
if (nft_rbtree_interval_end(rbe) &&
- nft_set_elem_active(&rbe->ext, genmask))
+ nft_set_elem_active(&rbe->ext, genmask) &&
+ !nft_set_elem_expired(&rbe->ext))
overlap = false;
} else {
overlap = nft_rbtree_interval_end(rbe) &&
nft_set_elem_active(&rbe->ext,
- genmask);
+ genmask) &&
+ !nft_set_elem_expired(&rbe->ext);
}
} else if (d > 0) {
p = &parent->rb_right;
@@ -284,9 +286,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
if (nft_rbtree_interval_end(new)) {
overlap = nft_rbtree_interval_end(rbe) &&
nft_set_elem_active(&rbe->ext,
- genmask);
+ genmask) &&
+ !nft_set_elem_expired(&rbe->ext);
} else if (nft_rbtree_interval_end(rbe) &&
- nft_set_elem_active(&rbe->ext, genmask)) {
+ nft_set_elem_active(&rbe->ext, genmask) &&
+ !nft_set_elem_expired(&rbe->ext)) {
overlap = true;
}
} else {
@@ -294,15 +298,18 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
nft_rbtree_interval_start(new)) {
p = &parent->rb_left;
- if (nft_set_elem_active(&rbe->ext, genmask))
+ if (nft_set_elem_active(&rbe->ext, genmask) &&
+ !nft_set_elem_expired(&rbe->ext))
overlap = false;
} else if (nft_rbtree_interval_start(rbe) &&
nft_rbtree_interval_end(new)) {
p = &parent->rb_right;
- if (nft_set_elem_active(&rbe->ext, genmask))
+ if (nft_set_elem_active(&rbe->ext, genmask) &&
+ !nft_set_elem_expired(&rbe->ext))
overlap = false;
- } else if (nft_set_elem_active(&rbe->ext, genmask)) {
+ } else if (nft_set_elem_active(&rbe->ext, genmask) &&
+ !nft_set_elem_expired(&rbe->ext)) {
*ext = &rbe->ext;
return -EEXIST;
} else {
diff --git a/net/netlabel/Kconfig b/net/netlabel/Kconfig
index 07b03c306f28..4383ac29693e 100644
--- a/net/netlabel/Kconfig
+++ b/net/netlabel/Kconfig
@@ -8,7 +8,7 @@ config NETLABEL
depends on SECURITY
select CRC_CCITT if IPV6
default n
- ---help---
+ help
NetLabel provides support for explicit network packet labeling
protocols such as CIPSO and RIPSO. For more information see
Documentation/netlabel as well as the NetLabel SourceForge project
diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig
index 20f967974da0..1039d4f2ce11 100644
--- a/net/netlink/Kconfig
+++ b/net/netlink/Kconfig
@@ -6,6 +6,6 @@
config NETLINK_DIAG
tristate "NETLINK: socket monitoring interface"
default n
- ---help---
+ help
Support for NETLINK socket monitoring interface used by the ss tool.
If unsure, say Y.
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 2f049692e012..55ee680e9db1 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -474,8 +474,7 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family,
struct netlink_ext_ack *extack,
const struct genl_ops *ops,
int hdrlen,
- enum genl_validate_flags no_strict_flag,
- bool parallel)
+ enum genl_validate_flags no_strict_flag)
{
enum netlink_validation validate = ops->validate & no_strict_flag ?
NL_VALIDATE_LIBERAL :
@@ -486,7 +485,7 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family,
if (!family->maxattr)
return NULL;
- if (parallel) {
+ if (family->parallel_ops) {
attrbuf = kmalloc_array(family->maxattr + 1,
sizeof(struct nlattr *), GFP_KERNEL);
if (!attrbuf)
@@ -498,7 +497,7 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family,
err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
family->policy, validate, extack);
if (err) {
- if (parallel)
+ if (family->parallel_ops)
kfree(attrbuf);
return ERR_PTR(err);
}
@@ -506,22 +505,63 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family,
}
static void genl_family_rcv_msg_attrs_free(const struct genl_family *family,
- struct nlattr **attrbuf,
- bool parallel)
+ struct nlattr **attrbuf)
{
- if (parallel)
+ if (family->parallel_ops)
kfree(attrbuf);
}
-static int genl_lock_start(struct netlink_callback *cb)
+struct genl_start_context {
+ const struct genl_family *family;
+ struct nlmsghdr *nlh;
+ struct netlink_ext_ack *extack;
+ const struct genl_ops *ops;
+ int hdrlen;
+};
+
+static int genl_start(struct netlink_callback *cb)
{
- const struct genl_ops *ops = genl_dumpit_info(cb)->ops;
+ struct genl_start_context *ctx = cb->data;
+ const struct genl_ops *ops = ctx->ops;
+ struct genl_dumpit_info *info;
+ struct nlattr **attrs = NULL;
int rc = 0;
+ if (ops->validate & GENL_DONT_VALIDATE_DUMP)
+ goto no_attrs;
+
+ if (ctx->nlh->nlmsg_len < nlmsg_msg_size(ctx->hdrlen))
+ return -EINVAL;
+
+ attrs = genl_family_rcv_msg_attrs_parse(ctx->family, ctx->nlh, ctx->extack,
+ ops, ctx->hdrlen,
+ GENL_DONT_VALIDATE_DUMP_STRICT);
+ if (IS_ERR(attrs))
+ return PTR_ERR(attrs);
+
+no_attrs:
+ info = genl_dumpit_info_alloc();
+ if (!info) {
+ genl_family_rcv_msg_attrs_free(ctx->family, attrs);
+ return -ENOMEM;
+ }
+ info->family = ctx->family;
+ info->ops = ops;
+ info->attrs = attrs;
+
+ cb->data = info;
if (ops->start) {
- genl_lock();
+ if (!ctx->family->parallel_ops)
+ genl_lock();
rc = ops->start(cb);
- genl_unlock();
+ if (!ctx->family->parallel_ops)
+ genl_unlock();
+ }
+
+ if (rc) {
+ genl_family_rcv_msg_attrs_free(info->family, info->attrs);
+ genl_dumpit_info_free(info);
+ cb->data = NULL;
}
return rc;
}
@@ -548,7 +588,7 @@ static int genl_lock_done(struct netlink_callback *cb)
rc = ops->done(cb);
genl_unlock();
}
- genl_family_rcv_msg_attrs_free(info->family, info->attrs, true);
+ genl_family_rcv_msg_attrs_free(info->family, info->attrs);
genl_dumpit_info_free(info);
return rc;
}
@@ -561,7 +601,7 @@ static int genl_parallel_done(struct netlink_callback *cb)
if (ops->done)
rc = ops->done(cb);
- genl_family_rcv_msg_attrs_free(info->family, info->attrs, true);
+ genl_family_rcv_msg_attrs_free(info->family, info->attrs);
genl_dumpit_info_free(info);
return rc;
}
@@ -573,43 +613,23 @@ static int genl_family_rcv_msg_dumpit(const struct genl_family *family,
const struct genl_ops *ops,
int hdrlen, struct net *net)
{
- struct genl_dumpit_info *info;
- struct nlattr **attrs = NULL;
+ struct genl_start_context ctx;
int err;
if (!ops->dumpit)
return -EOPNOTSUPP;
- if (ops->validate & GENL_DONT_VALIDATE_DUMP)
- goto no_attrs;
-
- if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
- return -EINVAL;
-
- attrs = genl_family_rcv_msg_attrs_parse(family, nlh, extack,
- ops, hdrlen,
- GENL_DONT_VALIDATE_DUMP_STRICT,
- true);
- if (IS_ERR(attrs))
- return PTR_ERR(attrs);
-
-no_attrs:
- /* Allocate dumpit info. It is going to be freed by done() callback. */
- info = genl_dumpit_info_alloc();
- if (!info) {
- genl_family_rcv_msg_attrs_free(family, attrs, true);
- return -ENOMEM;
- }
-
- info->family = family;
- info->ops = ops;
- info->attrs = attrs;
+ ctx.family = family;
+ ctx.nlh = nlh;
+ ctx.extack = extack;
+ ctx.ops = ops;
+ ctx.hdrlen = hdrlen;
if (!family->parallel_ops) {
struct netlink_dump_control c = {
.module = family->module,
- .data = info,
- .start = genl_lock_start,
+ .data = &ctx,
+ .start = genl_start,
.dump = genl_lock_dumpit,
.done = genl_lock_done,
};
@@ -617,12 +637,11 @@ no_attrs:
genl_unlock();
err = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
genl_lock();
-
} else {
struct netlink_dump_control c = {
.module = family->module,
- .data = info,
- .start = ops->start,
+ .data = &ctx,
+ .start = genl_start,
.dump = ops->dumpit,
.done = genl_parallel_done,
};
@@ -649,8 +668,7 @@ static int genl_family_rcv_msg_doit(const struct genl_family *family,
attrbuf = genl_family_rcv_msg_attrs_parse(family, nlh, extack,
ops, hdrlen,
- GENL_DONT_VALIDATE_STRICT,
- family->parallel_ops);
+ GENL_DONT_VALIDATE_STRICT);
if (IS_ERR(attrbuf))
return PTR_ERR(attrbuf);
@@ -676,7 +694,7 @@ static int genl_family_rcv_msg_doit(const struct genl_family *family,
family->post_doit(ops, skb, &info);
out:
- genl_family_rcv_msg_attrs_free(family, attrbuf, family->parallel_ops);
+ genl_family_rcv_msg_attrs_free(family, attrbuf);
return err;
}
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index eccc7d366e17..f90ef6934b8f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -70,6 +70,7 @@ static const struct proto_ops nr_proto_ops;
* separate class since they always nest.
*/
static struct lock_class_key nr_netdev_xmit_lock_key;
+static struct lock_class_key nr_netdev_addr_lock_key;
static void nr_set_lockdep_one(struct net_device *dev,
struct netdev_queue *txq,
@@ -80,6 +81,7 @@ static void nr_set_lockdep_one(struct net_device *dev,
static void nr_set_lockdep_key(struct net_device *dev)
{
+ lockdep_set_class(&dev->addr_list_lock, &nr_netdev_addr_lock_key);
netdev_for_each_tx_queue(dev, nr_set_lockdep_one, NULL);
}
diff --git a/net/nfc/hci/Kconfig b/net/nfc/hci/Kconfig
index 4822d6f46947..9500b8a27475 100644
--- a/net/nfc/hci/Kconfig
+++ b/net/nfc/hci/Kconfig
@@ -13,6 +13,6 @@ config NFC_SHDLC
select CRC_CCITT
bool "SHDLC link layer for HCI based NFC drivers"
default n
- ---help---
+ help
Say yes if you use an NFC HCI driver that requires SHDLC link layer.
If unsure, say N here.
diff --git a/net/nsh/Kconfig b/net/nsh/Kconfig
index 19af948ab6f0..84f2a2823417 100644
--- a/net/nsh/Kconfig
+++ b/net/nsh/Kconfig
@@ -2,7 +2,7 @@
menuconfig NET_NSH
tristate "Network Service Header (NSH) protocol"
default n
- ---help---
+ help
Network Service Header is an implementation of Service Function
Chaining (RFC 7665). The current implementation in Linux supports
only MD type 1 and only with the openvswitch module.
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 22d7d5604b4c..15bd287f5cbd 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -15,7 +15,7 @@ config OPENVSWITCH
select NET_MPLS_GSO
select DST_CACHE
select NET_NSH
- ---help---
+ help
Open vSwitch is a multilayer Ethernet switch targeted at virtualized
environments. In addition to supporting a variety of features
expected in a traditional hardware switch, it enables fine-grained
@@ -43,7 +43,7 @@ config OPENVSWITCH_GRE
depends on OPENVSWITCH
depends on NET_IPGRE
default OPENVSWITCH
- ---help---
+ help
If you say Y here, then the Open vSwitch will be able create GRE
vport.
@@ -56,7 +56,7 @@ config OPENVSWITCH_VXLAN
depends on OPENVSWITCH
depends on VXLAN
default OPENVSWITCH
- ---help---
+ help
If you say Y here, then the Open vSwitch will be able create vxlan vport.
Say N to exclude this support and reduce the binary size.
@@ -68,7 +68,7 @@ config OPENVSWITCH_GENEVE
depends on OPENVSWITCH
depends on GENEVE
default OPENVSWITCH
- ---help---
+ help
If you say Y here, then the Open vSwitch will be able create geneve vport.
Say N to exclude this support and reduce the binary size.
diff --git a/net/packet/Kconfig b/net/packet/Kconfig
index b4abad135294..2997382d597c 100644
--- a/net/packet/Kconfig
+++ b/net/packet/Kconfig
@@ -5,7 +5,7 @@
config PACKET
tristate "Packet socket"
- ---help---
+ help
The Packet protocol is used by applications which communicate
directly with network devices without an intermediate network
protocol implemented in the kernel, e.g. tcpdump. If you want them
@@ -20,6 +20,6 @@ config PACKET_DIAG
tristate "Packet: sockets monitoring interface"
depends on PACKET
default n
- ---help---
+ help
Support for PF_PACKET sockets monitoring interface used by the ss tool.
If unsure, say Y.
diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig
index f362ca316015..b4020b84760f 100644
--- a/net/qrtr/Kconfig
+++ b/net/qrtr/Kconfig
@@ -4,7 +4,7 @@
config QRTR
tristate "Qualcomm IPC Router support"
- ---help---
+ help
Say Y if you intend to use Qualcomm IPC router protocol. The
protocol is used to communicate with services provided by other
hardware blocks in the system.
@@ -17,13 +17,13 @@ if QRTR
config QRTR_SMD
tristate "SMD IPC Router channels"
depends on RPMSG || (COMPILE_TEST && RPMSG=n)
- ---help---
+ help
Say Y here to support SMD based ipcrouter channels. SMD is the
most common transport for IPC Router.
config QRTR_TUN
tristate "TUN device for Qualcomm IPC Router"
- ---help---
+ help
Say Y here to expose a character device that allows user space to
implement endpoints of QRTR, for purpose of tunneling data to other
hosts or testing purposes.
diff --git a/net/rds/Kconfig b/net/rds/Kconfig
index c64e154bc18f..75cd696963b2 100644
--- a/net/rds/Kconfig
+++ b/net/rds/Kconfig
@@ -3,14 +3,14 @@
config RDS
tristate "The Reliable Datagram Sockets Protocol"
depends on INET
- ---help---
+ help
The RDS (Reliable Datagram Sockets) protocol provides reliable,
sequenced delivery of datagrams over Infiniband or TCP.
config RDS_RDMA
tristate "RDS over Infiniband"
depends on RDS && INFINIBAND && INFINIBAND_ADDR_TRANS
- ---help---
+ help
Allow RDS to use Infiniband as a transport.
This transport supports RDMA operations.
@@ -18,7 +18,7 @@ config RDS_TCP
tristate "RDS over TCP"
depends on RDS
depends on IPV6 || !IPV6
- ---help---
+ help
Allow RDS to use TCP as a transport.
This transport does not support RDMA operations.
diff --git a/net/rds/Makefile b/net/rds/Makefile
index e647f9de104a..8fdc118e2927 100644
--- a/net/rds/Makefile
+++ b/net/rds/Makefile
@@ -7,7 +7,7 @@ rds-y := af_rds.o bind.o cong.o connection.o info.o message.o \
obj-$(CONFIG_RDS_RDMA) += rds_rdma.o
rds_rdma-y := rdma_transport.o \
ib.o ib_cm.o ib_recv.o ib_ring.o ib_send.o ib_stats.o \
- ib_sysctl.o ib_rdma.o ib_fmr.o ib_frmr.o
+ ib_sysctl.o ib_rdma.o ib_frmr.o
obj-$(CONFIG_RDS_TCP) += rds_tcp.o
diff --git a/net/rds/ib.c b/net/rds/ib.c
index a792d8a3872a..deecbdcdae84 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -127,19 +127,23 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
queue_work(rds_wq, &rds_ibdev->free_work);
}
-static void rds_ib_add_one(struct ib_device *device)
+static int rds_ib_add_one(struct ib_device *device)
{
struct rds_ib_device *rds_ibdev;
- bool has_fr, has_fmr;
+ int ret;
/* Only handle IB (no iWARP) devices */
if (device->node_type != RDMA_NODE_IB_CA)
- return;
+ return -EOPNOTSUPP;
+
+ /* Device must support FRWR */
+ if (!(device->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
+ return -EOPNOTSUPP;
rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL,
ibdev_to_node(device));
if (!rds_ibdev)
- return;
+ return -ENOMEM;
spin_lock_init(&rds_ibdev->spinlock);
refcount_set(&rds_ibdev->refcount, 1);
@@ -151,11 +155,6 @@ static void rds_ib_add_one(struct ib_device *device)
rds_ibdev->max_wrs = device->attrs.max_qp_wr;
rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE);
- has_fr = (device->attrs.device_cap_flags &
- IB_DEVICE_MEM_MGT_EXTENSIONS);
- has_fmr = (device->ops.alloc_fmr && device->ops.dealloc_fmr &&
- device->ops.map_phys_fmr && device->ops.unmap_fmr);
- rds_ibdev->use_fastreg = (has_fr && !has_fmr);
rds_ibdev->odp_capable =
!!(device->attrs.device_cap_flags &
IB_DEVICE_ON_DEMAND_PAGING) &&
@@ -164,7 +163,6 @@ static void rds_ib_add_one(struct ib_device *device)
!!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps &
IB_ODP_SUPPORT_READ);
- rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32;
rds_ibdev->max_1m_mrs = device->attrs.max_mr ?
min_t(unsigned int, (device->attrs.max_mr / 2),
rds_ib_mr_1m_pool_size) : rds_ib_mr_1m_pool_size;
@@ -182,12 +180,14 @@ static void rds_ib_add_one(struct ib_device *device)
if (!rds_ibdev->vector_load) {
pr_err("RDS/IB: %s failed to allocate vector memory\n",
__func__);
+ ret = -ENOMEM;
goto put_dev;
}
rds_ibdev->dev = device;
rds_ibdev->pd = ib_alloc_pd(device, 0);
if (IS_ERR(rds_ibdev->pd)) {
+ ret = PTR_ERR(rds_ibdev->pd);
rds_ibdev->pd = NULL;
goto put_dev;
}
@@ -195,12 +195,15 @@ static void rds_ib_add_one(struct ib_device *device)
device->dma_device,
sizeof(struct rds_header),
L1_CACHE_BYTES, 0);
- if (!rds_ibdev->rid_hdrs_pool)
+ if (!rds_ibdev->rid_hdrs_pool) {
+ ret = -ENOMEM;
goto put_dev;
+ }
rds_ibdev->mr_1m_pool =
rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL);
if (IS_ERR(rds_ibdev->mr_1m_pool)) {
+ ret = PTR_ERR(rds_ibdev->mr_1m_pool);
rds_ibdev->mr_1m_pool = NULL;
goto put_dev;
}
@@ -208,18 +211,16 @@ static void rds_ib_add_one(struct ib_device *device)
rds_ibdev->mr_8k_pool =
rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_8K_POOL);
if (IS_ERR(rds_ibdev->mr_8k_pool)) {
+ ret = PTR_ERR(rds_ibdev->mr_8k_pool);
rds_ibdev->mr_8k_pool = NULL;
goto put_dev;
}
- rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_mrs = %d, max_8k_mrs = %d\n",
- device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
- rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_mrs,
- rds_ibdev->max_8k_mrs);
+ rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, max_1m_mrs = %d, max_8k_mrs = %d\n",
+ device->attrs.max_mr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
+ rds_ibdev->max_1m_mrs, rds_ibdev->max_8k_mrs);
- pr_info("RDS/IB: %s: %s supported and preferred\n",
- device->name,
- rds_ibdev->use_fastreg ? "FRMR" : "FMR");
+ pr_info("RDS/IB: %s: added\n", device->name);
down_write(&rds_ib_devices_lock);
list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices);
@@ -227,12 +228,13 @@ static void rds_ib_add_one(struct ib_device *device)
refcount_inc(&rds_ibdev->refcount);
ib_set_client_data(device, &rds_ib_client, rds_ibdev);
- refcount_inc(&rds_ibdev->refcount);
rds_ib_nodev_connect();
+ return 0;
put_dev:
rds_ib_dev_put(rds_ibdev);
+ return ret;
}
/*
@@ -274,9 +276,6 @@ static void rds_ib_remove_one(struct ib_device *device, void *client_data)
{
struct rds_ib_device *rds_ibdev = client_data;
- if (!rds_ibdev)
- return;
-
rds_ib_dev_shutdown(rds_ibdev);
/* stop connection attempts from getting a reference to this device. */
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 0296f1f7acda..8dfff43cf07f 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -247,13 +247,11 @@ struct rds_ib_device {
struct ib_device *dev;
struct ib_pd *pd;
struct dma_pool *rid_hdrs_pool; /* RDS headers DMA pool */
- u8 use_fastreg:1;
u8 odp_capable:1;
unsigned int max_mrs;
struct rds_ib_mr_pool *mr_1m_pool;
struct rds_ib_mr_pool *mr_8k_pool;
- unsigned int fmr_max_remaps;
unsigned int max_8k_mrs;
unsigned int max_1m_mrs;
int max_sge;
@@ -266,7 +264,13 @@ struct rds_ib_device {
int *vector_load;
};
-#define ibdev_to_node(ibdev) dev_to_node((ibdev)->dev.parent)
+static inline int ibdev_to_node(struct ib_device *ibdev)
+{
+ struct device *parent;
+
+ parent = ibdev->dev.parent;
+ return parent ? dev_to_node(parent) : NUMA_NO_NODE;
+}
#define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev)
/* bits for i_ack_flags */
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index c71f4328d138..c3319ff3ee11 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -37,6 +37,7 @@
#include <linux/vmalloc.h>
#include <linux/ratelimit.h>
#include <net/addrconf.h>
+#include <rdma/ib_cm.h>
#include "rds_single_path.h"
#include "rds.h"
@@ -526,10 +527,10 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
return -EOPNOTSUPP;
/* The fr_queue_space is currently set to 512, to add extra space on
- * completion queue and send queue. This extra space is used for FRMR
+ * completion queue and send queue. This extra space is used for FRWR
* registration and invalidation work requests
*/
- fr_queue_space = (rds_ibdev->use_fastreg ? RDS_IB_DEFAULT_FR_WR : 0);
+ fr_queue_space = RDS_IB_DEFAULT_FR_WR;
/* add the conn now so that connection establishment has the dev */
rds_ib_add_conn(rds_ibdev, conn);
@@ -927,7 +928,8 @@ out:
if (conn)
mutex_unlock(&conn->c_cm_lock);
if (err)
- rdma_reject(cm_id, &err, sizeof(int));
+ rdma_reject(cm_id, &err, sizeof(int),
+ IB_CM_REJ_CONSUMER_DEFINED);
return destroy;
}
diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c
deleted file mode 100644
index 93c0437e6a5f..000000000000
--- a/net/rds/ib_fmr.c
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * Copyright (c) 2016 Oracle. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ib_mr.h"
-
-struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev, int npages)
-{
- struct rds_ib_mr_pool *pool;
- struct rds_ib_mr *ibmr = NULL;
- struct rds_ib_fmr *fmr;
- int err = 0;
-
- if (npages <= RDS_MR_8K_MSG_SIZE)
- pool = rds_ibdev->mr_8k_pool;
- else
- pool = rds_ibdev->mr_1m_pool;
-
- if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
- queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
-
- /* Switch pools if one of the pool is reaching upper limit */
- if (atomic_read(&pool->dirty_count) >= pool->max_items * 9 / 10) {
- if (pool->pool_type == RDS_IB_MR_8K_POOL)
- pool = rds_ibdev->mr_1m_pool;
- else
- pool = rds_ibdev->mr_8k_pool;
- }
-
- ibmr = rds_ib_try_reuse_ibmr(pool);
- if (ibmr)
- return ibmr;
-
- ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL,
- rdsibdev_to_node(rds_ibdev));
- if (!ibmr) {
- err = -ENOMEM;
- goto out_no_cigar;
- }
-
- fmr = &ibmr->u.fmr;
- fmr->fmr = ib_alloc_fmr(rds_ibdev->pd,
- (IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_READ |
- IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_ATOMIC),
- &pool->fmr_attr);
- if (IS_ERR(fmr->fmr)) {
- err = PTR_ERR(fmr->fmr);
- fmr->fmr = NULL;
- pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, err);
- goto out_no_cigar;
- }
-
- ibmr->pool = pool;
- if (pool->pool_type == RDS_IB_MR_8K_POOL)
- rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc);
- else
- rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc);
-
- return ibmr;
-
-out_no_cigar:
- kfree(ibmr);
- atomic_dec(&pool->item_count);
-
- return ERR_PTR(err);
-}
-
-static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev,
- struct rds_ib_mr *ibmr, struct scatterlist *sg,
- unsigned int nents)
-{
- struct ib_device *dev = rds_ibdev->dev;
- struct rds_ib_fmr *fmr = &ibmr->u.fmr;
- struct scatterlist *scat = sg;
- u64 io_addr = 0;
- u64 *dma_pages;
- u32 len;
- int page_cnt, sg_dma_len;
- int i, j;
- int ret;
-
- sg_dma_len = ib_dma_map_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
- if (unlikely(!sg_dma_len)) {
- pr_warn("RDS/IB: %s failed!\n", __func__);
- return -EBUSY;
- }
-
- len = 0;
- page_cnt = 0;
-
- for (i = 0; i < sg_dma_len; ++i) {
- unsigned int dma_len = sg_dma_len(&scat[i]);
- u64 dma_addr = sg_dma_address(&scat[i]);
-
- if (dma_addr & ~PAGE_MASK) {
- if (i > 0) {
- ib_dma_unmap_sg(dev, sg, nents,
- DMA_BIDIRECTIONAL);
- return -EINVAL;
- } else {
- ++page_cnt;
- }
- }
- if ((dma_addr + dma_len) & ~PAGE_MASK) {
- if (i < sg_dma_len - 1) {
- ib_dma_unmap_sg(dev, sg, nents,
- DMA_BIDIRECTIONAL);
- return -EINVAL;
- } else {
- ++page_cnt;
- }
- }
-
- len += dma_len;
- }
-
- page_cnt += len >> PAGE_SHIFT;
- if (page_cnt > ibmr->pool->fmr_attr.max_pages) {
- ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
- return -EINVAL;
- }
-
- dma_pages = kmalloc_array_node(sizeof(u64), page_cnt, GFP_ATOMIC,
- rdsibdev_to_node(rds_ibdev));
- if (!dma_pages) {
- ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
- return -ENOMEM;
- }
-
- page_cnt = 0;
- for (i = 0; i < sg_dma_len; ++i) {
- unsigned int dma_len = sg_dma_len(&scat[i]);
- u64 dma_addr = sg_dma_address(&scat[i]);
-
- for (j = 0; j < dma_len; j += PAGE_SIZE)
- dma_pages[page_cnt++] =
- (dma_addr & PAGE_MASK) + j;
- }
-
- ret = ib_map_phys_fmr(fmr->fmr, dma_pages, page_cnt, io_addr);
- if (ret) {
- ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL);
- goto out;
- }
-
- /* Success - we successfully remapped the MR, so we can
- * safely tear down the old mapping.
- */
- rds_ib_teardown_mr(ibmr);
-
- ibmr->sg = scat;
- ibmr->sg_len = nents;
- ibmr->sg_dma_len = sg_dma_len;
- ibmr->remap_count++;
-
- if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
- rds_ib_stats_inc(s_ib_rdma_mr_8k_used);
- else
- rds_ib_stats_inc(s_ib_rdma_mr_1m_used);
- ret = 0;
-
-out:
- kfree(dma_pages);
-
- return ret;
-}
-
-struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *rds_ibdev,
- struct scatterlist *sg,
- unsigned long nents,
- u32 *key)
-{
- struct rds_ib_mr *ibmr = NULL;
- struct rds_ib_fmr *fmr;
- int ret;
-
- ibmr = rds_ib_alloc_fmr(rds_ibdev, nents);
- if (IS_ERR(ibmr))
- return ibmr;
-
- ibmr->device = rds_ibdev;
- fmr = &ibmr->u.fmr;
- ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents);
- if (ret == 0)
- *key = fmr->fmr->rkey;
- else
- rds_ib_free_mr(ibmr, 0);
-
- return ibmr;
-}
-
-void rds_ib_unreg_fmr(struct list_head *list, unsigned int *nfreed,
- unsigned long *unpinned, unsigned int goal)
-{
- struct rds_ib_mr *ibmr, *next;
- struct rds_ib_fmr *fmr;
- LIST_HEAD(fmr_list);
- int ret = 0;
- unsigned int freed = *nfreed;
-
- /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
- list_for_each_entry(ibmr, list, unmap_list) {
- fmr = &ibmr->u.fmr;
- list_add(&fmr->fmr->list, &fmr_list);
- }
-
- ret = ib_unmap_fmr(&fmr_list);
- if (ret)
- pr_warn("RDS/IB: FMR invalidation failed (err=%d)\n", ret);
-
- /* Now we can destroy the DMA mapping and unpin any pages */
- list_for_each_entry_safe(ibmr, next, list, unmap_list) {
- fmr = &ibmr->u.fmr;
- *unpinned += ibmr->sg_len;
- __rds_ib_teardown_mr(ibmr);
- if (freed < goal ||
- ibmr->remap_count >= ibmr->pool->fmr_attr.max_maps) {
- if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
- rds_ib_stats_inc(s_ib_rdma_mr_8k_free);
- else
- rds_ib_stats_inc(s_ib_rdma_mr_1m_free);
- list_del(&ibmr->unmap_list);
- ib_dealloc_fmr(fmr->fmr);
- kfree(ibmr);
- freed++;
- }
- }
- *nfreed = freed;
-}
-
-void rds_ib_free_fmr_list(struct rds_ib_mr *ibmr)
-{
- struct rds_ib_mr_pool *pool = ibmr->pool;
-
- if (ibmr->remap_count >= pool->fmr_attr.max_maps)
- llist_add(&ibmr->llnode, &pool->drop_list);
- else
- llist_add(&ibmr->llnode, &pool->free_list);
-}
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
index 06ecf9d2d4bf..9b6ffff72f2d 100644
--- a/net/rds/ib_frmr.c
+++ b/net/rds/ib_frmr.c
@@ -76,7 +76,7 @@ static struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
frmr = &ibmr->u.frmr;
frmr->mr = ib_alloc_mr(rds_ibdev->pd, IB_MR_TYPE_MEM_REG,
- pool->fmr_attr.max_pages);
+ pool->max_pages);
if (IS_ERR(frmr->mr)) {
pr_warn("RDS/IB: %s failed to allocate MR", __func__);
err = PTR_ERR(frmr->mr);
@@ -240,7 +240,7 @@ static int rds_ib_map_frmr(struct rds_ib_device *rds_ibdev,
}
frmr->dma_npages += len >> PAGE_SHIFT;
- if (frmr->dma_npages > ibmr->pool->fmr_attr.max_pages) {
+ if (frmr->dma_npages > ibmr->pool->max_pages) {
ret = -EMSGSIZE;
goto out_unmap;
}
diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h
index 0c8252d7fe2b..ea5e9aee4959 100644
--- a/net/rds/ib_mr.h
+++ b/net/rds/ib_mr.h
@@ -43,10 +43,6 @@
#define RDS_MR_8K_SCALE (256 / (RDS_MR_8K_MSG_SIZE + 1))
#define RDS_MR_8K_POOL_SIZE (RDS_MR_8K_SCALE * (8192 / 2))
-struct rds_ib_fmr {
- struct ib_fmr *fmr;
-};
-
enum rds_ib_fr_state {
FRMR_IS_FREE, /* mr invalidated & ready for use */
FRMR_IS_INUSE, /* mr is in use or used & can be invalidated */
@@ -84,7 +80,6 @@ struct rds_ib_mr {
u8 odp:1;
union {
- struct rds_ib_fmr fmr;
struct rds_ib_frmr frmr;
struct ib_mr *mr;
} u;
@@ -109,8 +104,7 @@ struct rds_ib_mr_pool {
unsigned long max_items;
unsigned long max_items_soft;
unsigned long max_free_pinned;
- struct ib_fmr_attr fmr_attr;
- bool use_fastreg;
+ unsigned int max_pages;
};
extern struct workqueue_struct *rds_ib_mr_wq;
@@ -136,15 +130,9 @@ u32 rds_ib_get_lkey(void *trans_private);
void __rds_ib_teardown_mr(struct rds_ib_mr *);
void rds_ib_teardown_mr(struct rds_ib_mr *);
-struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *, int);
struct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *);
int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *, int, struct rds_ib_mr **);
-struct rds_ib_mr *rds_ib_reg_fmr(struct rds_ib_device *, struct scatterlist *,
- unsigned long, u32 *);
struct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *);
-void rds_ib_unreg_fmr(struct list_head *, unsigned int *,
- unsigned long *, unsigned int);
-void rds_ib_free_fmr_list(struct rds_ib_mr *);
struct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev,
struct rds_ib_connection *ic,
struct scatterlist *sg,
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index b34b24e237f8..8f070ee7e742 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -181,7 +181,7 @@ void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_co
struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool;
iinfo->rdma_mr_max = pool_1m->max_items;
- iinfo->rdma_mr_size = pool_1m->fmr_attr.max_pages;
+ iinfo->rdma_mr_size = pool_1m->max_pages;
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -191,7 +191,7 @@ void rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev,
struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool;
iinfo6->rdma_mr_max = pool_1m->max_items;
- iinfo6->rdma_mr_size = pool_1m->fmr_attr.max_pages;
+ iinfo6->rdma_mr_size = pool_1m->max_pages;
}
#endif
@@ -406,10 +406,7 @@ int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
if (list_empty(&unmap_list))
goto out;
- if (pool->use_fastreg)
- rds_ib_unreg_frmr(&unmap_list, &nfreed, &unpinned, free_goal);
- else
- rds_ib_unreg_fmr(&unmap_list, &nfreed, &unpinned, free_goal);
+ rds_ib_unreg_frmr(&unmap_list, &nfreed, &unpinned, free_goal);
if (!list_empty(&unmap_list)) {
unsigned long flags;
@@ -503,10 +500,7 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
}
/* Return it to the pool's free list */
- if (rds_ibdev->use_fastreg)
- rds_ib_free_frmr_list(ibmr);
- else
- rds_ib_free_fmr_list(ibmr);
+ rds_ib_free_frmr_list(ibmr);
atomic_add(ibmr->sg_len, &pool->free_pinned);
atomic_inc(&pool->dirty_count);
@@ -622,10 +616,7 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
goto out;
}
- if (rds_ibdev->use_fastreg)
- ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret);
- else
- ibmr = rds_ib_reg_fmr(rds_ibdev, sg, nents, key_ret);
+ ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret);
if (IS_ERR(ibmr)) {
ret = PTR_ERR(ibmr);
pr_warn("RDS/IB: rds_ib_get_mr failed (errno=%d)\n", ret);
@@ -669,19 +660,16 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
if (pool_type == RDS_IB_MR_1M_POOL) {
/* +1 allows for unaligned MRs */
- pool->fmr_attr.max_pages = RDS_MR_1M_MSG_SIZE + 1;
+ pool->max_pages = RDS_MR_1M_MSG_SIZE + 1;
pool->max_items = rds_ibdev->max_1m_mrs;
} else {
/* pool_type == RDS_IB_MR_8K_POOL */
- pool->fmr_attr.max_pages = RDS_MR_8K_MSG_SIZE + 1;
+ pool->max_pages = RDS_MR_8K_MSG_SIZE + 1;
pool->max_items = rds_ibdev->max_8k_mrs;
}
- pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4;
- pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps;
- pool->fmr_attr.page_shift = PAGE_SHIFT;
+ pool->max_free_pinned = pool->max_items * pool->max_pages / 4;
pool->max_items_soft = rds_ibdev->max_mrs * 3 / 4;
- pool->use_fastreg = rds_ibdev->use_fastreg;
return pool;
}
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index e7a872207b46..ce85656ac9c1 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -71,6 +71,7 @@ ax25_address rose_callsign;
* separate class since they always nest.
*/
static struct lock_class_key rose_netdev_xmit_lock_key;
+static struct lock_class_key rose_netdev_addr_lock_key;
static void rose_set_lockdep_one(struct net_device *dev,
struct netdev_queue *txq,
@@ -81,6 +82,7 @@ static void rose_set_lockdep_one(struct net_device *dev,
static void rose_set_lockdep_key(struct net_device *dev)
{
+ lockdep_set_class(&dev->addr_list_lock, &rose_netdev_addr_lock_key);
netdev_for_each_tx_queue(dev, rose_set_lockdep_one, NULL);
}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 9fe264bec70c..9a2139ebd67d 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -810,100 +810,6 @@ static inline bool rxrpc_is_client_call(const struct rxrpc_call *call)
}
/*
- * Transition a call to the complete state.
- */
-static inline bool __rxrpc_set_call_completion(struct rxrpc_call *call,
- enum rxrpc_call_completion compl,
- u32 abort_code,
- int error)
-{
- if (call->state < RXRPC_CALL_COMPLETE) {
- call->abort_code = abort_code;
- call->error = error;
- call->completion = compl,
- call->state = RXRPC_CALL_COMPLETE;
- trace_rxrpc_call_complete(call);
- wake_up(&call->waitq);
- return true;
- }
- return false;
-}
-
-static inline bool rxrpc_set_call_completion(struct rxrpc_call *call,
- enum rxrpc_call_completion compl,
- u32 abort_code,
- int error)
-{
- bool ret;
-
- write_lock_bh(&call->state_lock);
- ret = __rxrpc_set_call_completion(call, compl, abort_code, error);
- write_unlock_bh(&call->state_lock);
- return ret;
-}
-
-/*
- * Record that a call successfully completed.
- */
-static inline bool __rxrpc_call_completed(struct rxrpc_call *call)
-{
- return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0);
-}
-
-static inline bool rxrpc_call_completed(struct rxrpc_call *call)
-{
- bool ret;
-
- write_lock_bh(&call->state_lock);
- ret = __rxrpc_call_completed(call);
- write_unlock_bh(&call->state_lock);
- return ret;
-}
-
-/*
- * Record that a call is locally aborted.
- */
-static inline bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call,
- rxrpc_seq_t seq,
- u32 abort_code, int error)
-{
- trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq,
- abort_code, error);
- return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED,
- abort_code, error);
-}
-
-static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call,
- rxrpc_seq_t seq, u32 abort_code, int error)
-{
- bool ret;
-
- write_lock_bh(&call->state_lock);
- ret = __rxrpc_abort_call(why, call, seq, abort_code, error);
- write_unlock_bh(&call->state_lock);
- return ret;
-}
-
-/*
- * Abort a call due to a protocol error.
- */
-static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call,
- struct sk_buff *skb,
- const char *eproto_why,
- const char *why,
- u32 abort_code)
-{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
- trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why);
- return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO);
-}
-
-#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \
- __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \
- (abort_why), (abort_code))
-
-/*
* conn_client.c
*/
extern unsigned int rxrpc_max_client_connections;
@@ -1101,9 +1007,34 @@ extern const struct seq_operations rxrpc_peer_seq_ops;
* recvmsg.c
*/
void rxrpc_notify_socket(struct rxrpc_call *);
+bool __rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int);
+bool rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int);
+bool __rxrpc_call_completed(struct rxrpc_call *);
+bool rxrpc_call_completed(struct rxrpc_call *);
+bool __rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int);
+bool rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int);
int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int);
/*
+ * Abort a call due to a protocol error.
+ */
+static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call,
+ struct sk_buff *skb,
+ const char *eproto_why,
+ const char *why,
+ u32 abort_code)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why);
+ return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO);
+}
+
+#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \
+ __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \
+ (abort_why), (abort_code))
+
+/*
* rtt.c
*/
void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace,
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 2a65ac41055f..aa1c8eee6557 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -248,7 +248,18 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
if (anno_type != RXRPC_TX_ANNO_RETRANS)
continue;
+ /* We need to reset the retransmission state, but we need to do
+ * so before we drop the lock as a new ACK/NAK may come in and
+ * confuse things
+ */
+ annotation &= ~RXRPC_TX_ANNO_MASK;
+ annotation |= RXRPC_TX_ANNO_RESENT;
+ call->rxtx_annotations[ix] = annotation;
+
skb = call->rxtx_buffer[ix];
+ if (!skb)
+ continue;
+
rxrpc_get_skb(skb, rxrpc_skb_got);
spin_unlock_bh(&call->lock);
@@ -262,24 +273,6 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
rxrpc_free_skb(skb, rxrpc_skb_freed);
spin_lock_bh(&call->lock);
-
- /* We need to clear the retransmit state, but there are two
- * things we need to be aware of: A new ACK/NAK might have been
- * received and the packet might have been hard-ACK'd (in which
- * case it will no longer be in the buffer).
- */
- if (after(seq, call->tx_hard_ack)) {
- annotation = call->rxtx_annotations[ix];
- anno_type = annotation & RXRPC_TX_ANNO_MASK;
- if (anno_type == RXRPC_TX_ANNO_RETRANS ||
- anno_type == RXRPC_TX_ANNO_NAK) {
- annotation &= ~RXRPC_TX_ANNO_MASK;
- annotation |= RXRPC_TX_ANNO_UNACK;
- }
- annotation |= RXRPC_TX_ANNO_RESENT;
- call->rxtx_annotations[ix] = annotation;
- }
-
if (after(call->tx_hard_ack, seq))
seq = call->tx_hard_ack;
}
@@ -320,7 +313,6 @@ recheck_state:
if (call->state == RXRPC_CALL_COMPLETE) {
del_timer_sync(&call->timer);
- rxrpc_notify_socket(call);
goto out_put;
}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 06fcff2ebbba..447f55ca6886 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -173,10 +173,9 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn,
else
trace_rxrpc_rx_abort(call, serial,
conn->abort_code);
- if (rxrpc_set_call_completion(call, compl,
- conn->abort_code,
- conn->error))
- rxrpc_notify_socket(call);
+ rxrpc_set_call_completion(call, compl,
+ conn->abort_code,
+ conn->error);
}
}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 3be4177baf70..299ac98e9754 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -275,7 +275,6 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
case RXRPC_CALL_SERVER_AWAIT_ACK:
__rxrpc_call_completed(call);
- rxrpc_notify_socket(call);
state = call->state;
break;
@@ -1013,9 +1012,8 @@ static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb)
_proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code);
- if (rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
- abort_code, -ECONNABORTED))
- rxrpc_notify_socket(call);
+ rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
+ abort_code, -ECONNABORTED);
}
/*
@@ -1102,7 +1100,6 @@ static void rxrpc_input_implicit_end_call(struct rxrpc_sock *rx,
spin_lock(&rx->incoming_lock);
__rxrpc_disconnect_call(conn, call);
spin_unlock(&rx->incoming_lock);
- rxrpc_notify_socket(call);
}
/*
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index b1449d971883..a852f46d5234 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -271,6 +271,9 @@ static void rxrpc_store_error(struct rxrpc_peer *peer,
break;
case SO_EE_ORIGIN_ICMP6:
+ if (err == EACCES)
+ err = EHOSTUNREACH;
+ /* Fall through */
default:
_proto("Rx Received error report { orig=%u }", ee->ee_origin);
break;
@@ -289,9 +292,7 @@ static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error,
hlist_for_each_entry_rcu(call, &peer->error_targets, error_link) {
rxrpc_see_call(call);
- if (call->state < RXRPC_CALL_COMPLETE &&
- rxrpc_set_call_completion(call, compl, 0, -error))
- rxrpc_notify_socket(call);
+ rxrpc_set_call_completion(call, compl, 0, -error);
}
}
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 8b179e3c802a..543afd9bd664 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -68,7 +68,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
"Proto Local "
" Remote "
" SvID ConnID CallID End Use State Abort "
- " UserID TxSeq TW RxSeq RW RxSerial RxTimo\n");
+ " DebugId TxSeq TW RxSeq RW RxSerial RxTimo\n");
return 0;
}
@@ -100,7 +100,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
rx_hard_ack = READ_ONCE(call->rx_hard_ack);
seq_printf(seq,
"UDP %-47.47s %-47.47s %4x %08x %08x %s %3u"
- " %-8.8s %08x %lx %08x %02x %08x %02x %08x %06lx\n",
+ " %-8.8s %08x %08x %08x %02x %08x %02x %08x %06lx\n",
lbuff,
rbuff,
call->service_id,
@@ -110,7 +110,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
atomic_read(&call->usage),
rxrpc_call_states[call->state],
call->abort_code,
- call->user_call_ID,
+ call->debug_id,
tx_hard_ack, READ_ONCE(call->tx_top) - tx_hard_ack,
rx_hard_ack, READ_ONCE(call->rx_top) - rx_hard_ack,
call->rx_serial,
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 8578c39ec839..2989742a4aa1 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -59,6 +59,85 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
}
/*
+ * Transition a call to the complete state.
+ */
+bool __rxrpc_set_call_completion(struct rxrpc_call *call,
+ enum rxrpc_call_completion compl,
+ u32 abort_code,
+ int error)
+{
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ call->abort_code = abort_code;
+ call->error = error;
+ call->completion = compl,
+ call->state = RXRPC_CALL_COMPLETE;
+ trace_rxrpc_call_complete(call);
+ wake_up(&call->waitq);
+ rxrpc_notify_socket(call);
+ return true;
+ }
+ return false;
+}
+
+bool rxrpc_set_call_completion(struct rxrpc_call *call,
+ enum rxrpc_call_completion compl,
+ u32 abort_code,
+ int error)
+{
+ bool ret = false;
+
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ write_lock_bh(&call->state_lock);
+ ret = __rxrpc_set_call_completion(call, compl, abort_code, error);
+ write_unlock_bh(&call->state_lock);
+ }
+ return ret;
+}
+
+/*
+ * Record that a call successfully completed.
+ */
+bool __rxrpc_call_completed(struct rxrpc_call *call)
+{
+ return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0);
+}
+
+bool rxrpc_call_completed(struct rxrpc_call *call)
+{
+ bool ret = false;
+
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ write_lock_bh(&call->state_lock);
+ ret = __rxrpc_call_completed(call);
+ write_unlock_bh(&call->state_lock);
+ }
+ return ret;
+}
+
+/*
+ * Record that a call is locally aborted.
+ */
+bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call,
+ rxrpc_seq_t seq, u32 abort_code, int error)
+{
+ trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq,
+ abort_code, error);
+ return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED,
+ abort_code, error);
+}
+
+bool rxrpc_abort_call(const char *why, struct rxrpc_call *call,
+ rxrpc_seq_t seq, u32 abort_code, int error)
+{
+ bool ret;
+
+ write_lock_bh(&call->state_lock);
+ ret = __rxrpc_abort_call(why, call, seq, abort_code, error);
+ write_unlock_bh(&call->state_lock);
+ return ret;
+}
+
+/*
* Pass a call terminating message to userspace.
*/
static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 5e9c43d4a314..1304b8608f56 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -261,10 +261,8 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
case -ENETUNREACH:
case -EHOSTUNREACH:
case -ECONNREFUSED:
- rxrpc_set_call_completion(call,
- RXRPC_CALL_LOCAL_ERROR,
+ rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
0, ret);
- rxrpc_notify_socket(call);
goto out;
}
_debug("need instant resend %d", ret);
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2f20073f4f84..84badf00647e 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -6,7 +6,7 @@
menuconfig NET_SCHED
bool "QoS and/or fair queueing"
select NET_SCH_FIFO
- ---help---
+ help
When the kernel has several packets to send out over a network
device, it has to decide which ones to send first, which ones to
delay, and which ones to drop. This is the job of the queueing
@@ -47,7 +47,7 @@ comment "Queueing/Scheduling"
config NET_SCH_CBQ
tristate "Class Based Queueing (CBQ)"
- ---help---
+ help
Say Y here if you want to use the Class-Based Queueing (CBQ) packet
scheduling algorithm. This algorithm classifies the waiting packets
into a tree-like hierarchy of classes; the leaves of this tree are
@@ -64,7 +64,7 @@ config NET_SCH_CBQ
config NET_SCH_HTB
tristate "Hierarchical Token Bucket (HTB)"
- ---help---
+ help
Say Y here if you want to use the Hierarchical Token Buckets (HTB)
packet scheduling algorithm. See
<http://luxik.cdi.cz/~devik/qos/htb/> for complete manual and
@@ -78,7 +78,7 @@ config NET_SCH_HTB
config NET_SCH_HFSC
tristate "Hierarchical Fair Service Curve (HFSC)"
- ---help---
+ help
Say Y here if you want to use the Hierarchical Fair Service Curve
(HFSC) packet scheduling algorithm.
@@ -88,7 +88,7 @@ config NET_SCH_HFSC
config NET_SCH_ATM
tristate "ATM Virtual Circuits (ATM)"
depends on ATM
- ---help---
+ help
Say Y here if you want to use the ATM pseudo-scheduler. This
provides a framework for invoking classifiers, which in turn
select classes of this queuing discipline. Each class maps
@@ -101,7 +101,7 @@ config NET_SCH_ATM
config NET_SCH_PRIO
tristate "Multi Band Priority Queueing (PRIO)"
- ---help---
+ help
Say Y here if you want to use an n-band priority queue packet
scheduler.
@@ -110,7 +110,7 @@ config NET_SCH_PRIO
config NET_SCH_MULTIQ
tristate "Hardware Multiqueue-aware Multi Band Queuing (MULTIQ)"
- ---help---
+ help
Say Y here if you want to use an n-band queue packet scheduler
to support devices that have multiple hardware transmit queues.
@@ -119,7 +119,7 @@ config NET_SCH_MULTIQ
config NET_SCH_RED
tristate "Random Early Detection (RED)"
- ---help---
+ help
Say Y here if you want to use the Random Early Detection (RED)
packet scheduling algorithm.
@@ -130,7 +130,7 @@ config NET_SCH_RED
config NET_SCH_SFB
tristate "Stochastic Fair Blue (SFB)"
- ---help---
+ help
Say Y here if you want to use the Stochastic Fair Blue (SFB)
packet scheduling algorithm.
@@ -141,7 +141,7 @@ config NET_SCH_SFB
config NET_SCH_SFQ
tristate "Stochastic Fairness Queueing (SFQ)"
- ---help---
+ help
Say Y here if you want to use the Stochastic Fairness Queueing (SFQ)
packet scheduling algorithm.
@@ -152,7 +152,7 @@ config NET_SCH_SFQ
config NET_SCH_TEQL
tristate "True Link Equalizer (TEQL)"
- ---help---
+ help
Say Y here if you want to use the True Link Equalizer (TLE) packet
scheduling algorithm. This queueing discipline allows the combination
of several physical devices into one virtual device.
@@ -164,7 +164,7 @@ config NET_SCH_TEQL
config NET_SCH_TBF
tristate "Token Bucket Filter (TBF)"
- ---help---
+ help
Say Y here if you want to use the Token Bucket Filter (TBF) packet
scheduling algorithm.
@@ -175,7 +175,7 @@ config NET_SCH_TBF
config NET_SCH_CBS
tristate "Credit Based Shaper (CBS)"
- ---help---
+ help
Say Y here if you want to use the Credit Based Shaper (CBS) packet
scheduling algorithm.
@@ -208,7 +208,7 @@ config NET_SCH_TAPRIO
config NET_SCH_GRED
tristate "Generic Random Early Detection (GRED)"
- ---help---
+ help
Say Y here if you want to use the Generic Random Early Detection
(GRED) packet scheduling algorithm for some of your network devices
(see the top of <file:net/sched/sch_red.c> for details and
@@ -219,7 +219,7 @@ config NET_SCH_GRED
config NET_SCH_DSMARK
tristate "Differentiated Services marker (DSMARK)"
- ---help---
+ help
Say Y if you want to schedule packets according to the
Differentiated Services architecture proposed in RFC 2475.
Technical information on this method, with pointers to associated
@@ -230,7 +230,7 @@ config NET_SCH_DSMARK
config NET_SCH_NETEM
tristate "Network emulator (NETEM)"
- ---help---
+ help
Say Y if you want to emulate network delay, loss, and packet
re-ordering. This is often useful to simulate networks when
testing applications or protocols.
@@ -384,7 +384,7 @@ config NET_SCH_INGRESS
depends on NET_CLS_ACT
select NET_INGRESS
select NET_EGRESS
- ---help---
+ help
Say Y here if you want to use classifiers for incoming and/or outgoing
packets. This qdisc doesn't do anything else besides running classifiers,
which can also have actions attached to them. In case of outgoing packets,
@@ -398,7 +398,7 @@ config NET_SCH_INGRESS
config NET_SCH_PLUG
tristate "Plug network traffic until release (PLUG)"
- ---help---
+ help
This queuing discipline allows userspace to plug/unplug a network
output queue, using the netlink interface. When it receives an
@@ -441,7 +441,7 @@ config NET_SCH_ETS
menuconfig NET_SCH_DEFAULT
bool "Allow override default queue discipline"
- ---help---
+ help
Support for selection of default queuing discipline.
Nearly all users can safely say no here, and the default
@@ -492,7 +492,7 @@ config NET_CLS
config NET_CLS_BASIC
tristate "Elementary classification (BASIC)"
select NET_CLS
- ---help---
+ help
Say Y here if you want to be able to classify packets using
only extended matches and actions.
@@ -502,7 +502,7 @@ config NET_CLS_BASIC
config NET_CLS_TCINDEX
tristate "Traffic-Control Index (TCINDEX)"
select NET_CLS
- ---help---
+ help
Say Y here if you want to be able to classify packets based on
traffic control indices. You will want this feature if you want
to implement Differentiated Services together with DSMARK.
@@ -515,7 +515,7 @@ config NET_CLS_ROUTE4
depends on INET
select IP_ROUTE_CLASSID
select NET_CLS
- ---help---
+ help
If you say Y here, you will be able to classify packets
according to the route table entry they matched.
@@ -525,7 +525,7 @@ config NET_CLS_ROUTE4
config NET_CLS_FW
tristate "Netfilter mark (FW)"
select NET_CLS
- ---help---
+ help
If you say Y here, you will be able to classify packets
according to netfilter/firewall marks.
@@ -535,7 +535,7 @@ config NET_CLS_FW
config NET_CLS_U32
tristate "Universal 32bit comparisons w/ hashing (U32)"
select NET_CLS
- ---help---
+ help
Say Y here to be able to classify packets using a universal
32bit pieces based comparison scheme.
@@ -545,20 +545,20 @@ config NET_CLS_U32
config CLS_U32_PERF
bool "Performance counters support"
depends on NET_CLS_U32
- ---help---
+ help
Say Y here to make u32 gather additional statistics useful for
fine tuning u32 classifiers.
config CLS_U32_MARK
bool "Netfilter marks support"
depends on NET_CLS_U32
- ---help---
+ help
Say Y here to be able to use netfilter marks as u32 key.
config NET_CLS_RSVP
tristate "IPv4 Resource Reservation Protocol (RSVP)"
select NET_CLS
- ---help---
+ help
The Resource Reservation Protocol (RSVP) permits end systems to
request a minimum and maximum data flow rate for a connection; this
is important for real time data such as streaming sound or video.
@@ -572,7 +572,7 @@ config NET_CLS_RSVP
config NET_CLS_RSVP6
tristate "IPv6 Resource Reservation Protocol (RSVP6)"
select NET_CLS
- ---help---
+ help
The Resource Reservation Protocol (RSVP) permits end systems to
request a minimum and maximum data flow rate for a connection; this
is important for real time data such as streaming sound or video.
@@ -586,7 +586,7 @@ config NET_CLS_RSVP6
config NET_CLS_FLOW
tristate "Flow classifier"
select NET_CLS
- ---help---
+ help
If you say Y here, you will be able to classify packets based on
a configurable combination of packet keys. This is mostly useful
in combination with SFQ.
@@ -599,7 +599,7 @@ config NET_CLS_CGROUP
select NET_CLS
select CGROUP_NET_CLASSID
depends on CGROUPS
- ---help---
+ help
Say Y here if you want to classify packets based on the control
cgroup of their process.
@@ -609,7 +609,7 @@ config NET_CLS_CGROUP
config NET_CLS_BPF
tristate "BPF-based classifier"
select NET_CLS
- ---help---
+ help
If you say Y here, you will be able to classify packets based on
programmable BPF (JIT'ed) filters as an alternative to ematches.
@@ -619,7 +619,7 @@ config NET_CLS_BPF
config NET_CLS_FLOWER
tristate "Flower classifier"
select NET_CLS
- ---help---
+ help
If you say Y here, you will be able to classify packets based on
a configurable combination of packet keys and masks.
@@ -629,7 +629,7 @@ config NET_CLS_FLOWER
config NET_CLS_MATCHALL
tristate "Match-all classifier"
select NET_CLS
- ---help---
+ help
If you say Y here, you will be able to classify packets based on
nothing. Every packet will match.
@@ -639,7 +639,7 @@ config NET_CLS_MATCHALL
config NET_EMATCH
bool "Extended Matches"
select NET_CLS
- ---help---
+ help
Say Y here if you want to use extended matches on top of classifiers
and select the extended matches below.
@@ -653,7 +653,7 @@ config NET_EMATCH_STACK
int "Stack size"
depends on NET_EMATCH
default "32"
- ---help---
+ help
Size of the local stack variable used while evaluating the tree of
ematches. Limits the depth of the tree, i.e. the number of
encapsulated precedences. Every level requires 4 bytes of additional
@@ -662,7 +662,7 @@ config NET_EMATCH_STACK
config NET_EMATCH_CMP
tristate "Simple packet data comparison"
depends on NET_EMATCH
- ---help---
+ help
Say Y here if you want to be able to classify packets based on
simple packet data comparisons for 8, 16, and 32bit values.
@@ -672,7 +672,7 @@ config NET_EMATCH_CMP
config NET_EMATCH_NBYTE
tristate "Multi byte comparison"
depends on NET_EMATCH
- ---help---
+ help
Say Y here if you want to be able to classify packets based on
multiple byte comparisons mainly useful for IPv6 address comparisons.
@@ -682,7 +682,7 @@ config NET_EMATCH_NBYTE
config NET_EMATCH_U32
tristate "U32 key"
depends on NET_EMATCH
- ---help---
+ help
Say Y here if you want to be able to classify packets using
the famous u32 key in combination with logic relations.
@@ -692,7 +692,7 @@ config NET_EMATCH_U32
config NET_EMATCH_META
tristate "Metadata"
depends on NET_EMATCH
- ---help---
+ help
Say Y here if you want to be able to classify packets based on
metadata such as load average, netfilter attributes, socket
attributes and routing decisions.
@@ -707,7 +707,7 @@ config NET_EMATCH_TEXT
select TEXTSEARCH_KMP
select TEXTSEARCH_BM
select TEXTSEARCH_FSM
- ---help---
+ help
Say Y here if you want to be able to classify packets based on
textsearch comparisons.
@@ -717,7 +717,7 @@ config NET_EMATCH_TEXT
config NET_EMATCH_CANID
tristate "CAN Identifier"
depends on NET_EMATCH && (CAN=y || CAN=m)
- ---help---
+ help
Say Y here if you want to be able to classify CAN frames based
on CAN Identifier.
@@ -727,7 +727,7 @@ config NET_EMATCH_CANID
config NET_EMATCH_IPSET
tristate "IPset"
depends on NET_EMATCH && IP_SET
- ---help---
+ help
Say Y here if you want to be able to classify packets based on
ipset membership.
@@ -737,7 +737,7 @@ config NET_EMATCH_IPSET
config NET_EMATCH_IPT
tristate "IPtables Matches"
depends on NET_EMATCH && NETFILTER && NETFILTER_XTABLES
- ---help---
+ help
Say Y here to be able to classify packets based on iptables
matches.
Current supported match is "policy" which allows packet classification
@@ -749,7 +749,7 @@ config NET_EMATCH_IPT
config NET_CLS_ACT
bool "Actions"
select NET_CLS
- ---help---
+ help
Say Y here if you want to use traffic control actions. Actions
get attached to classifiers and are invoked after a successful
classification. They are used to overwrite the classification
@@ -761,7 +761,7 @@ config NET_CLS_ACT
config NET_ACT_POLICE
tristate "Traffic Policing"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here if you want to do traffic policing, i.e. strict
bandwidth limiting. This action replaces the existing policing
module.
@@ -772,7 +772,7 @@ config NET_ACT_POLICE
config NET_ACT_GACT
tristate "Generic actions"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to take generic actions such as dropping and
accepting packets.
@@ -782,13 +782,13 @@ config NET_ACT_GACT
config GACT_PROB
bool "Probability support"
depends on NET_ACT_GACT
- ---help---
+ help
Say Y here to use the generic action randomly or deterministically.
config NET_ACT_MIRRED
tristate "Redirecting and Mirroring"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to allow packets to be mirrored or redirected to
other devices.
@@ -799,7 +799,7 @@ config NET_ACT_SAMPLE
tristate "Traffic Sampling"
depends on NET_CLS_ACT
select PSAMPLE
- ---help---
+ help
Say Y here to allow packet sampling tc action. The packet sample
action consists of statistically choosing packets and sampling
them using the psample module.
@@ -810,7 +810,7 @@ config NET_ACT_SAMPLE
config NET_ACT_IPT
tristate "IPtables targets"
depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
- ---help---
+ help
Say Y here to be able to invoke iptables targets after successful
classification.
@@ -820,7 +820,7 @@ config NET_ACT_IPT
config NET_ACT_NAT
tristate "Stateless NAT"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to do stateless NAT on IPv4 packets. You should use
netfilter for NAT unless you know what you are doing.
@@ -830,7 +830,7 @@ config NET_ACT_NAT
config NET_ACT_PEDIT
tristate "Packet Editing"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here if you want to mangle the content of packets.
To compile this code as a module, choose M here: the
@@ -839,7 +839,7 @@ config NET_ACT_PEDIT
config NET_ACT_SIMP
tristate "Simple Example (Debug)"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to add a simple action for demonstration purposes.
It is meant as an example and for debugging purposes. It will
print a configured policy string followed by the packet count
@@ -853,7 +853,7 @@ config NET_ACT_SIMP
config NET_ACT_SKBEDIT
tristate "SKB Editing"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to change skb priority or queue_mapping settings.
If unsure, say N.
@@ -865,7 +865,7 @@ config NET_ACT_CSUM
tristate "Checksum Updating"
depends on NET_CLS_ACT && INET
select LIBCRC32C
- ---help---
+ help
Say Y here to update some common checksum after some direct
packet alterations.
@@ -886,7 +886,7 @@ config NET_ACT_MPLS
config NET_ACT_VLAN
tristate "Vlan manipulation"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to push or pop vlan headers.
If unsure, say N.
@@ -897,7 +897,7 @@ config NET_ACT_VLAN
config NET_ACT_BPF
tristate "BPF based action"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to execute BPF code on packets. The BPF code will decide
if the packet should be dropped or not.
@@ -910,7 +910,7 @@ config NET_ACT_CONNMARK
tristate "Netfilter Connection Mark Retriever"
depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
depends on NF_CONNTRACK && NF_CONNTRACK_MARK
- ---help---
+ help
Say Y here to allow retrieving of conn mark
If unsure, say N.
@@ -938,7 +938,7 @@ config NET_ACT_CTINFO
config NET_ACT_SKBMOD
tristate "skb data modification action"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to allow modification of skb data
If unsure, say N.
@@ -950,7 +950,7 @@ config NET_ACT_IFE
tristate "Inter-FE action based on IETF ForCES InterFE LFB"
depends on NET_CLS_ACT
select NET_IFE
- ---help---
+ help
Say Y here to allow for sourcing and terminating metadata
For details refer to netdev01 paper:
"Distributing Linux Traffic Control Classifier-Action Subsystem"
@@ -962,7 +962,7 @@ config NET_ACT_IFE
config NET_ACT_TUNNEL_KEY
tristate "IP tunnel metadata manipulation"
depends on NET_CLS_ACT
- ---help---
+ help
Say Y here to set/release ip tunnel metadata.
If unsure, say N.
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index e29f0f45d688..e9f3576cbf71 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1543,17 +1543,6 @@ static void __exit ct_cleanup_module(void)
destroy_workqueue(act_ct_wq);
}
-void tcf_ct_flow_table_restore_skb(struct sk_buff *skb, unsigned long cookie)
-{
- enum ip_conntrack_info ctinfo = cookie & NFCT_INFOMASK;
- struct nf_conn *ct;
-
- ct = (struct nf_conn *)(cookie & NFCT_PTRMASK);
- nf_conntrack_get(&ct->ct_general);
- nf_ct_set(skb, ct, ctinfo);
-}
-EXPORT_SYMBOL_GPL(tcf_ct_flow_table_restore_skb);
-
module_init(ct_init_module);
module_exit(ct_cleanup_module);
MODULE_AUTHOR("Paul Blakey <paulb@mellanox.com>");
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index 9c628591f452..323ae7f6315d 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -32,7 +32,7 @@ static ktime_t gate_get_time(struct tcf_gate *gact)
return KTIME_MAX;
}
-static int gate_get_start_time(struct tcf_gate *gact, ktime_t *start)
+static void gate_get_start_time(struct tcf_gate *gact, ktime_t *start)
{
struct tcf_gate_params *param = &gact->param;
ktime_t now, base, cycle;
@@ -43,18 +43,13 @@ static int gate_get_start_time(struct tcf_gate *gact, ktime_t *start)
if (ktime_after(base, now)) {
*start = base;
- return 0;
+ return;
}
cycle = param->tcfg_cycletime;
- /* cycle time should not be zero */
- if (!cycle)
- return -EFAULT;
-
n = div64_u64(ktime_sub_ns(now, base), cycle);
*start = ktime_add_ns(base, (n + 1) * cycle);
- return 0;
}
static void gate_start_timer(struct tcf_gate *gact, ktime_t start)
@@ -277,6 +272,27 @@ release_list:
return err;
}
+static void gate_setup_timer(struct tcf_gate *gact, u64 basetime,
+ enum tk_offsets tko, s32 clockid,
+ bool do_init)
+{
+ if (!do_init) {
+ if (basetime == gact->param.tcfg_basetime &&
+ tko == gact->tk_offset &&
+ clockid == gact->param.tcfg_clockid)
+ return;
+
+ spin_unlock_bh(&gact->tcf_lock);
+ hrtimer_cancel(&gact->hitimer);
+ spin_lock_bh(&gact->tcf_lock);
+ }
+ gact->param.tcfg_basetime = basetime;
+ gact->param.tcfg_clockid = clockid;
+ gact->tk_offset = tko;
+ hrtimer_init(&gact->hitimer, clockid, HRTIMER_MODE_ABS_SOFT);
+ gact->hitimer.function = gate_timer_func;
+}
+
static int tcf_gate_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
@@ -287,12 +303,12 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
enum tk_offsets tk_offset = TK_OFFS_TAI;
struct nlattr *tb[TCA_GATE_MAX + 1];
struct tcf_chain *goto_ch = NULL;
+ u64 cycletime = 0, basetime = 0;
struct tcf_gate_params *p;
s32 clockid = CLOCK_TAI;
struct tcf_gate *gact;
struct tc_gate *parm;
int ret = 0, err;
- u64 basetime = 0;
u32 gflags = 0;
s32 prio = -1;
ktime_t start;
@@ -308,6 +324,27 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
if (!tb[TCA_GATE_PARMS])
return -EINVAL;
+ if (tb[TCA_GATE_CLOCKID]) {
+ clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ tk_offset = TK_OFFS_REAL;
+ break;
+ case CLOCK_MONOTONIC:
+ tk_offset = TK_OFFS_MAX;
+ break;
+ case CLOCK_BOOTTIME:
+ tk_offset = TK_OFFS_BOOT;
+ break;
+ case CLOCK_TAI:
+ tk_offset = TK_OFFS_TAI;
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
+ return -EINVAL;
+ }
+ }
+
parm = nla_data(tb[TCA_GATE_PARMS]);
index = parm->index;
@@ -331,10 +368,6 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
tcf_idr_release(*a, bind);
return -EEXIST;
}
- if (ret == ACT_P_CREATED) {
- to_gate(*a)->param.tcfg_clockid = -1;
- INIT_LIST_HEAD(&(to_gate(*a)->param.entries));
- }
if (tb[TCA_GATE_PRIORITY])
prio = nla_get_s32(tb[TCA_GATE_PRIORITY]);
@@ -345,41 +378,19 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
if (tb[TCA_GATE_FLAGS])
gflags = nla_get_u32(tb[TCA_GATE_FLAGS]);
- if (tb[TCA_GATE_CLOCKID]) {
- clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
- switch (clockid) {
- case CLOCK_REALTIME:
- tk_offset = TK_OFFS_REAL;
- break;
- case CLOCK_MONOTONIC:
- tk_offset = TK_OFFS_MAX;
- break;
- case CLOCK_BOOTTIME:
- tk_offset = TK_OFFS_BOOT;
- break;
- case CLOCK_TAI:
- tk_offset = TK_OFFS_TAI;
- break;
- default:
- NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
- goto release_idr;
- }
- }
+ gact = to_gate(*a);
+ if (ret == ACT_P_CREATED)
+ INIT_LIST_HEAD(&gact->param.entries);
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
if (err < 0)
goto release_idr;
- gact = to_gate(*a);
-
spin_lock_bh(&gact->tcf_lock);
p = &gact->param;
- if (tb[TCA_GATE_CYCLE_TIME]) {
- p->tcfg_cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]);
- if (!p->tcfg_cycletime_ext)
- goto chain_put;
- }
+ if (tb[TCA_GATE_CYCLE_TIME])
+ cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]);
if (tb[TCA_GATE_ENTRY_LIST]) {
err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack);
@@ -387,35 +398,29 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
goto chain_put;
}
- if (!p->tcfg_cycletime) {
+ if (!cycletime) {
struct tcfg_gate_entry *entry;
ktime_t cycle = 0;
list_for_each_entry(entry, &p->entries, list)
cycle = ktime_add_ns(cycle, entry->interval);
- p->tcfg_cycletime = cycle;
+ cycletime = cycle;
+ if (!cycletime) {
+ err = -EINVAL;
+ goto chain_put;
+ }
}
+ p->tcfg_cycletime = cycletime;
if (tb[TCA_GATE_CYCLE_TIME_EXT])
p->tcfg_cycletime_ext =
nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]);
+ gate_setup_timer(gact, basetime, tk_offset, clockid,
+ ret == ACT_P_CREATED);
p->tcfg_priority = prio;
- p->tcfg_basetime = basetime;
- p->tcfg_clockid = clockid;
p->tcfg_flags = gflags;
-
- gact->tk_offset = tk_offset;
- hrtimer_init(&gact->hitimer, clockid, HRTIMER_MODE_ABS_SOFT);
- gact->hitimer.function = gate_timer_func;
-
- err = gate_get_start_time(gact, &start);
- if (err < 0) {
- NL_SET_ERR_MSG(extack,
- "Internal error: failed get start time");
- release_entry_list(&p->entries);
- goto chain_put;
- }
+ gate_get_start_time(gact, &start);
gact->current_close_time = start;
gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING;
@@ -443,6 +448,13 @@ chain_put:
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
release_idr:
+ /* action is not inserted in any list: it's safe to init hitimer
+ * without taking tcf_lock.
+ */
+ if (ret == ACT_P_CREATED)
+ gate_setup_timer(gact, gact->param.tcfg_basetime,
+ gact->tk_offset, gact->param.tcfg_clockid,
+ true);
tcf_idr_release(*a, bind);
return err;
}
@@ -453,9 +465,7 @@ static void tcf_gate_cleanup(struct tc_action *a)
struct tcf_gate_params *p;
p = &gact->param;
- if (p->tcfg_clockid != -1)
- hrtimer_cancel(&gact->hitimer);
-
+ hrtimer_cancel(&gact->hitimer);
release_entry_list(&p->entries);
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index b19a0021a0bd..265a61d011df 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -464,6 +464,7 @@ void __netdev_watchdog_up(struct net_device *dev)
dev_hold(dev);
}
}
+EXPORT_SYMBOL_GPL(__netdev_watchdog_up);
static void dev_watchdog_up(struct net_device *dev)
{
diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index 68934438ee19..39d7fa9569f8 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -11,7 +11,7 @@ menuconfig IP_SCTP
select CRYPTO_HMAC
select CRYPTO_SHA1
select LIBCRC32C
- ---help---
+ help
Stream Control Transmission Protocol
From RFC 2960 <http://www.ietf.org/rfc/rfc2960.txt>.
diff --git a/net/smc/Kconfig b/net/smc/Kconfig
index f54a70b8da82..1ab3c5a2c5ad 100644
--- a/net/smc/Kconfig
+++ b/net/smc/Kconfig
@@ -2,7 +2,7 @@
config SMC
tristate "SMC socket protocol family"
depends on INET && INFINIBAND
- ---help---
+ help
SMC-R provides a "sockets over RDMA" solution making use of
RDMA over Converged Ethernet (RoCE) technology to upgrade
AF_INET TCP connections transparently.
@@ -14,7 +14,7 @@ config SMC
config SMC_DIAG
tristate "SMC: socket monitoring interface"
depends on SMC
- ---help---
+ help
Support for SMC socket monitoring interface used by tools such as
smcss.
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index f0a5064bf9bd..562a52d01ad1 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -548,18 +548,18 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev)
static struct ib_client smc_ib_client;
/* callback function for ib_register_client() */
-static void smc_ib_add_dev(struct ib_device *ibdev)
+static int smc_ib_add_dev(struct ib_device *ibdev)
{
struct smc_ib_device *smcibdev;
u8 port_cnt;
int i;
if (ibdev->node_type != RDMA_NODE_IB_CA)
- return;
+ return -EOPNOTSUPP;
smcibdev = kzalloc(sizeof(*smcibdev), GFP_KERNEL);
if (!smcibdev)
- return;
+ return -ENOMEM;
smcibdev->ibdev = ibdev;
INIT_WORK(&smcibdev->port_event_work, smc_ib_port_event_work);
@@ -594,17 +594,14 @@ static void smc_ib_add_dev(struct ib_device *ibdev)
"");
}
schedule_work(&smcibdev->port_event_work);
+ return 0;
}
/* callback function for ib_unregister_client() */
static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
{
- struct smc_ib_device *smcibdev;
+ struct smc_ib_device *smcibdev = client_data;
- smcibdev = ib_get_client_data(ibdev, &smc_ib_client);
- if (!smcibdev || smcibdev->ibdev != ibdev)
- return;
- ib_set_client_data(ibdev, &smc_ib_client, NULL);
spin_lock(&smc_ib_devices.lock);
list_del_init(&smcibdev->list); /* remove from smc_ib_devices */
spin_unlock(&smc_ib_devices.lock);
diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index 8b4d72b1a066..010dcb876f9d 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -82,11 +82,11 @@ static size_t rpc_ntop6(const struct sockaddr *sap,
rc = snprintf(scopebuf, sizeof(scopebuf), "%c%u",
IPV6_SCOPE_DELIMITER, sin6->sin6_scope_id);
- if (unlikely((size_t)rc > sizeof(scopebuf)))
+ if (unlikely((size_t)rc >= sizeof(scopebuf)))
return 0;
len += rc;
- if (unlikely(len > buflen))
+ if (unlikely(len >= buflen))
return 0;
strcat(buf, scopebuf);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 5748ad0ba1bd..a9f0d17fdb0d 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -81,7 +81,7 @@ static int param_get_hashtbl_sz(char *buffer, const struct kernel_param *kp)
unsigned int nbits;
nbits = *(unsigned int *)kp->arg;
- return sprintf(buffer, "%u", 1U << nbits);
+ return sprintf(buffer, "%u\n", 1U << nbits);
}
#define param_check_hashtbl_sz(name, p) __param_check(name, p, unsigned int);
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index ac5cac0dd24b..4ecc2a959567 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -254,7 +254,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
if (IS_ERR(p))
goto err;
done:
- trace_rpcgss_context(ctx->gc_expiry, now, timeout,
+ trace_rpcgss_context(window_size, ctx->gc_expiry, now, timeout,
ctx->gc_acceptor.len, ctx->gc_acceptor.data);
err:
return p;
@@ -697,10 +697,12 @@ retry:
}
schedule();
}
- if (gss_msg->ctx)
+ if (gss_msg->ctx) {
+ trace_rpcgss_ctx_init(gss_cred);
gss_cred_set_ctx(cred, gss_msg->ctx);
- else
+ } else {
err = gss_msg->msg.errno;
+ }
spin_unlock(&pipe->lock);
out_intr:
finish_wait(&gss_msg->waitqueue, &wait);
@@ -1054,11 +1056,11 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
auth->au_rslack = GSS_KRB5_MAX_SLACK_NEEDED >> 2;
auth->au_verfsize = GSS_VERF_SLACK >> 2;
auth->au_ralign = GSS_VERF_SLACK >> 2;
- auth->au_flags = 0;
+ __set_bit(RPCAUTH_AUTH_UPDATE_SLACK, &auth->au_flags);
auth->au_ops = &authgss_ops;
auth->au_flavor = flavor;
if (gss_pseudoflavor_to_datatouch(gss_auth->mech, flavor))
- auth->au_flags |= RPCAUTH_AUTH_DATATOUCH;
+ __set_bit(RPCAUTH_AUTH_DATATOUCH, &auth->au_flags);
refcount_set(&auth->au_count, 1);
kref_init(&gss_auth->kref);
@@ -1284,8 +1286,9 @@ gss_send_destroy_context(struct rpc_cred *cred)
if (new) {
ctx->gc_proc = RPC_GSS_PROC_DESTROY;
+ trace_rpcgss_ctx_destroy(gss_cred);
task = rpc_call_null(gss_auth->client, &new->gc_base,
- RPC_TASK_ASYNC|RPC_TASK_SOFT);
+ RPC_TASK_ASYNC);
if (!IS_ERR(task))
rpc_put_task(task);
@@ -1349,7 +1352,6 @@ gss_destroy_nullcred(struct rpc_cred *cred)
static void
gss_destroy_cred(struct rpc_cred *cred)
{
-
if (test_and_clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0)
gss_send_destroy_context(cred);
gss_destroy_nullcred(cred);
@@ -1613,6 +1615,7 @@ static int gss_renew_cred(struct rpc_task *task)
new = gss_lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW);
if (IS_ERR(new))
return PTR_ERR(new);
+
task->tk_rqstp->rq_cred = new;
put_rpccred(oldcred);
return 0;
@@ -1709,7 +1712,8 @@ gss_validate(struct rpc_task *task, struct xdr_stream *xdr)
/* We leave it to unwrap to calculate au_rslack. For now we just
* calculate the length of the verifier: */
- cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2;
+ if (test_bit(RPCAUTH_AUTH_UPDATE_SLACK, &cred->cr_auth->au_flags))
+ cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2;
status = 0;
out:
gss_put_ctx(ctx);
@@ -1927,13 +1931,30 @@ out:
return status;
}
-static int
-gss_unwrap_resp_auth(struct rpc_cred *cred)
+/**
+ * gss_update_rslack - Possibly update RPC receive buffer size estimates
+ * @task: rpc_task for incoming RPC Reply being unwrapped
+ * @cred: controlling rpc_cred for @task
+ * @before: XDR words needed before each RPC Reply message
+ * @after: XDR words needed following each RPC Reply message
+ *
+ */
+static void gss_update_rslack(struct rpc_task *task, struct rpc_cred *cred,
+ unsigned int before, unsigned int after)
{
struct rpc_auth *auth = cred->cr_auth;
- auth->au_rslack = auth->au_verfsize;
- auth->au_ralign = auth->au_verfsize;
+ if (test_and_clear_bit(RPCAUTH_AUTH_UPDATE_SLACK, &auth->au_flags)) {
+ auth->au_ralign = auth->au_verfsize + before;
+ auth->au_rslack = auth->au_verfsize + after;
+ trace_rpcgss_update_slack(task, auth);
+ }
+}
+
+static int
+gss_unwrap_resp_auth(struct rpc_task *task, struct rpc_cred *cred)
+{
+ gss_update_rslack(task, cred, 0, 0);
return 0;
}
@@ -1956,7 +1977,6 @@ gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred,
struct xdr_stream *xdr)
{
struct xdr_buf gss_data, *rcv_buf = &rqstp->rq_rcv_buf;
- struct rpc_auth *auth = cred->cr_auth;
u32 len, offset, seqno, maj_stat;
struct xdr_netobj mic;
int ret;
@@ -2005,8 +2025,7 @@ gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred,
if (maj_stat != GSS_S_COMPLETE)
goto bad_mic;
- auth->au_rslack = auth->au_verfsize + 2 + 1 + XDR_QUADLEN(mic.len);
- auth->au_ralign = auth->au_verfsize + 2;
+ gss_update_rslack(task, cred, 2, 2 + 1 + XDR_QUADLEN(mic.len));
ret = 0;
out:
@@ -2031,7 +2050,6 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
{
struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
struct kvec *head = rqstp->rq_rcv_buf.head;
- struct rpc_auth *auth = cred->cr_auth;
u32 offset, opaque_len, maj_stat;
__be32 *p;
@@ -2058,8 +2076,8 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
*/
xdr_init_decode(xdr, rcv_buf, p, rqstp);
- auth->au_rslack = auth->au_verfsize + 2 + ctx->gc_gss_ctx->slack;
- auth->au_ralign = auth->au_verfsize + 2 + ctx->gc_gss_ctx->align;
+ gss_update_rslack(task, cred, 2 + ctx->gc_gss_ctx->align,
+ 2 + ctx->gc_gss_ctx->slack);
return 0;
unwrap_failed:
@@ -2130,7 +2148,7 @@ gss_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr)
goto out_decode;
switch (gss_cred->gc_service) {
case RPC_GSS_SVC_NONE:
- status = gss_unwrap_resp_auth(cred);
+ status = gss_unwrap_resp_auth(task, cred);
break;
case RPC_GSS_SVC_INTEGRITY:
status = gss_unwrap_resp_integ(task, cred, ctx, rqstp, xdr);
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 69316ab1b9fa..fae632da1058 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -37,6 +37,8 @@ gss_mech_free(struct gss_api_mech *gm)
for (i = 0; i < gm->gm_pf_num; i++) {
pf = &gm->gm_pfs[i];
+ if (pf->domain)
+ auth_domain_put(pf->domain);
kfree(pf->auth_domain_name);
pf->auth_domain_name = NULL;
}
@@ -59,6 +61,7 @@ make_auth_domain_name(char *name)
static int
gss_mech_svc_setup(struct gss_api_mech *gm)
{
+ struct auth_domain *dom;
struct pf_desc *pf;
int i, status;
@@ -68,10 +71,13 @@ gss_mech_svc_setup(struct gss_api_mech *gm)
status = -ENOMEM;
if (pf->auth_domain_name == NULL)
goto out;
- status = svcauth_gss_register_pseudoflavor(pf->pseudoflavor,
- pf->auth_domain_name);
- if (status)
+ dom = svcauth_gss_register_pseudoflavor(
+ pf->pseudoflavor, pf->auth_domain_name);
+ if (IS_ERR(dom)) {
+ status = PTR_ERR(dom);
goto out;
+ }
+ pf->domain = dom;
}
return 0;
out:
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index 0349f455a862..af9c7f43859c 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -223,7 +223,7 @@ static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg)
static char *gssp_stringify(struct xdr_netobj *netobj)
{
- return kstrndup(netobj->data, netobj->len, GFP_KERNEL);
+ return kmemdup_nul(netobj->data, netobj->len, GFP_KERNEL);
}
static void gssp_hostbased_service(char **principal)
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 50d93c49ef1a..46027d0c903f 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -809,7 +809,7 @@ u32 svcauth_gss_flavor(struct auth_domain *dom)
EXPORT_SYMBOL_GPL(svcauth_gss_flavor);
-int
+struct auth_domain *
svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name)
{
struct gss_domain *new;
@@ -826,21 +826,23 @@ svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name)
new->h.flavour = &svcauthops_gss;
new->pseudoflavor = pseudoflavor;
- stat = 0;
test = auth_domain_lookup(name, &new->h);
- if (test != &new->h) { /* Duplicate registration */
+ if (test != &new->h) {
+ pr_warn("svc: duplicate registration of gss pseudo flavour %s.\n",
+ name);
+ stat = -EADDRINUSE;
auth_domain_put(test);
- kfree(new->h.name);
- goto out_free_dom;
+ goto out_free_name;
}
- return 0;
+ return test;
+out_free_name:
+ kfree(new->h.name);
out_free_dom:
kfree(new);
out:
- return stat;
+ return ERR_PTR(stat);
}
-
EXPORT_SYMBOL_GPL(svcauth_gss_register_pseudoflavor);
static inline int
diff --git a/net/sunrpc/auth_gss/trace.c b/net/sunrpc/auth_gss/trace.c
index 5576f1e66de9..49fa583d7f91 100644
--- a/net/sunrpc/auth_gss/trace.c
+++ b/net/sunrpc/auth_gss/trace.c
@@ -6,6 +6,7 @@
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/gss_err.h>
+#include <linux/sunrpc/auth_gss.h>
#define CREATE_TRACE_POINTS
#include <trace/events/rpcgss.h>
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 61b21dafd7c0..a91d1cdad9d7 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -370,10 +370,6 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
const char *nodename = args->nodename;
int err;
- /* sanity check the name before trying to print it */
- dprintk("RPC: creating %s client for %s (xprt %p)\n",
- program->name, args->servername, xprt);
-
err = rpciod_up();
if (err)
goto out_no_rpciod;
@@ -436,6 +432,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
goto out_no_path;
if (parent)
atomic_inc(&parent->cl_count);
+
+ trace_rpc_clnt_new(clnt, xprt, program->name, args->servername);
return clnt;
out_no_path:
@@ -450,6 +448,7 @@ out_err:
out_no_rpciod:
xprt_switch_put(xps);
xprt_put(xprt);
+ trace_rpc_clnt_new_err(program->name, args->servername, err);
return ERR_PTR(err);
}
@@ -634,10 +633,8 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
args->nodename = clnt->cl_nodename;
new = rpc_new_client(args, xps, xprt, clnt);
- if (IS_ERR(new)) {
- err = PTR_ERR(new);
- goto out_err;
- }
+ if (IS_ERR(new))
+ return new;
/* Turn off autobind on clones */
new->cl_autobind = 0;
@@ -650,7 +647,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
return new;
out_err:
- dprintk("RPC: %s: returned error %d\n", __func__, err);
+ trace_rpc_clnt_clone_err(clnt, err);
return ERR_PTR(err);
}
@@ -723,11 +720,8 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt,
int err;
xprt = xprt_create_transport(args);
- if (IS_ERR(xprt)) {
- dprintk("RPC: failed to create new xprt for clnt %p\n",
- clnt);
+ if (IS_ERR(xprt))
return PTR_ERR(xprt);
- }
xps = xprt_switch_alloc(xprt, GFP_KERNEL);
if (xps == NULL) {
@@ -767,7 +761,7 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt,
rpc_release_client(parent);
xprt_switch_put(oldxps);
xprt_put(old);
- dprintk("RPC: replaced xprt for clnt %p\n", clnt);
+ trace_rpc_clnt_replace_xprt(clnt);
return 0;
out_revert:
@@ -777,7 +771,7 @@ out_revert:
rpc_client_register(clnt, pseudoflavor, NULL);
xprt_switch_put(xps);
xprt_put(xprt);
- dprintk("RPC: failed to switch xprt for clnt %p\n", clnt);
+ trace_rpc_clnt_replace_xprt_err(clnt);
return err;
}
EXPORT_SYMBOL_GPL(rpc_switch_client_transport);
@@ -844,10 +838,11 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
if (list_empty(&clnt->cl_tasks))
return;
- dprintk("RPC: killing all tasks for client %p\n", clnt);
+
/*
* Spin lock all_tasks to prevent changes...
*/
+ trace_rpc_clnt_killall(clnt);
spin_lock(&clnt->cl_lock);
list_for_each_entry(rovr, &clnt->cl_tasks, tk_task)
rpc_signal_task(rovr);
@@ -863,9 +858,7 @@ void rpc_shutdown_client(struct rpc_clnt *clnt)
{
might_sleep();
- dprintk_rcu("RPC: shutting down %s client for %s\n",
- clnt->cl_program->name,
- rcu_dereference(clnt->cl_xprt)->servername);
+ trace_rpc_clnt_shutdown(clnt);
while (!list_empty(&clnt->cl_tasks)) {
rpc_killall_tasks(clnt);
@@ -884,6 +877,8 @@ static void rpc_free_client_work(struct work_struct *work)
{
struct rpc_clnt *clnt = container_of(work, struct rpc_clnt, cl_work);
+ trace_rpc_clnt_free(clnt);
+
/* These might block on processes that might allocate memory,
* so they cannot be called in rpciod, so they are handled separately
* here.
@@ -901,9 +896,7 @@ rpc_free_client(struct rpc_clnt *clnt)
{
struct rpc_clnt *parent = NULL;
- dprintk_rcu("RPC: destroying %s client for %s\n",
- clnt->cl_program->name,
- rcu_dereference(clnt->cl_xprt)->servername);
+ trace_rpc_clnt_release(clnt);
if (clnt->cl_parent != clnt)
parent = clnt->cl_parent;
rpc_unregister_client(clnt);
@@ -945,8 +938,6 @@ rpc_free_auth(struct rpc_clnt *clnt)
void
rpc_release_client(struct rpc_clnt *clnt)
{
- dprintk("RPC: rpc_release_client(%p)\n", clnt);
-
do {
if (list_empty(&clnt->cl_tasks))
wake_up(&destroy_wait);
@@ -1270,7 +1261,7 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_ralign - 1;
xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
- trace_rpc_reply_pages(req);
+ trace_rpc_xdr_reply_pages(req->rq_task, &req->rq_rcv_buf);
}
EXPORT_SYMBOL_GPL(rpc_prepare_reply_pages);
@@ -1624,6 +1615,7 @@ const char
static void
__rpc_call_rpcerror(struct rpc_task *task, int tk_status, int rpc_status)
{
+ trace_rpc_call_rpcerror(task, tk_status, rpc_status);
task->tk_rpc_status = rpc_status;
rpc_exit(task, tk_status);
}
@@ -2531,7 +2523,7 @@ call_decode(struct rpc_task *task)
goto out;
req->rq_rcv_buf.len = req->rq_private_buf.len;
- trace_xprt_recvfrom(&req->rq_rcv_buf);
+ trace_rpc_xdr_recvfrom(task, &req->rq_rcv_buf);
/* Check that the softirq receive buffer is valid */
WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf,
@@ -2760,7 +2752,8 @@ struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt,
.rpc_op_cred = cred,
.callback_ops = (ops != NULL) ? ops : &rpc_default_ops,
.callback_data = data,
- .flags = flags | RPC_TASK_NULLCREDS,
+ .flags = flags | RPC_TASK_SOFT | RPC_TASK_SOFTCONN |
+ RPC_TASK_NULLCREDS,
};
return rpc_run_task(&task_setup_data);
@@ -2823,8 +2816,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
goto success;
}
- task = rpc_call_null_helper(clnt, xprt, NULL,
- RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC|RPC_TASK_NULLCREDS,
+ task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_ASYNC,
&rpc_cb_add_xprt_call_ops, data);
rpc_put_task(task);
@@ -2867,9 +2859,7 @@ int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *clnt,
goto out_err;
/* Test the connection */
- task = rpc_call_null_helper(clnt, xprt, NULL,
- RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS,
- NULL, NULL);
+ task = rpc_call_null_helper(clnt, xprt, NULL, 0, NULL, NULL);
if (IS_ERR(task)) {
status = PTR_ERR(task);
goto out_err;
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 4a020b688860..c27123e6ba80 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -795,12 +795,6 @@ void rpcb_getport_async(struct rpc_task *task)
child = rpcb_call_async(rpcb_clnt, map, proc);
rpc_release_client(rpcb_clnt);
- if (IS_ERR(child)) {
- /* rpcb_map_release() has freed the arguments */
- dprintk("RPC: %5u %s: rpc_run_task failed\n",
- task->tk_pid, __func__);
- return;
- }
xprt->stat.bind_count++;
rpc_put_task(child);
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index 47a756503d11..f6fe2e6cd65a 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -52,4 +52,5 @@ static inline int sock_is_loopback(struct sock *sk)
int rpc_clients_notifier_register(void);
void rpc_clients_notifier_unregister(void);
+void auth_domain_cleanup(void);
#endif /* _NET_SUNRPC_SUNRPC_H */
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index f9edaa9174a4..236fadc4a439 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -23,6 +23,7 @@
#include <linux/sunrpc/rpc_pipe_fs.h>
#include <linux/sunrpc/xprtsock.h>
+#include "sunrpc.h"
#include "netns.h"
unsigned int sunrpc_net_id;
@@ -131,6 +132,7 @@ cleanup_sunrpc(void)
unregister_rpc_pipefs();
rpc_destroy_mempool();
unregister_pernet_subsys(&sunrpc_net_ops);
+ auth_domain_cleanup();
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
rpc_unregister_sysctl();
#endif
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 9ed3126600ce..c211b607239e 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -88,15 +88,15 @@ param_get_pool_mode(char *buf, const struct kernel_param *kp)
switch (*ip)
{
case SVC_POOL_AUTO:
- return strlcpy(buf, "auto", 20);
+ return strlcpy(buf, "auto\n", 20);
case SVC_POOL_GLOBAL:
- return strlcpy(buf, "global", 20);
+ return strlcpy(buf, "global\n", 20);
case SVC_POOL_PERCPU:
- return strlcpy(buf, "percpu", 20);
+ return strlcpy(buf, "percpu\n", 20);
case SVC_POOL_PERNODE:
- return strlcpy(buf, "pernode", 20);
+ return strlcpy(buf, "pernode\n", 20);
default:
- return sprintf(buf, "%d", *ip);
+ return sprintf(buf, "%d\n", *ip);
}
}
@@ -991,6 +991,7 @@ static int __svc_register(struct net *net, const char *progname,
#endif
}
+ trace_svc_register(progname, version, protocol, port, family, error);
return error;
}
@@ -1000,11 +1001,6 @@ int svc_rpcbind_set_version(struct net *net,
unsigned short proto,
unsigned short port)
{
- dprintk("svc: svc_register(%sv%d, %s, %u, %u)\n",
- progp->pg_name, version,
- proto == IPPROTO_UDP? "udp" : "tcp",
- port, family);
-
return __svc_register(net, progp->pg_name, progp->pg_prog,
version, family, proto, port);
@@ -1024,11 +1020,8 @@ int svc_generic_rpcbind_set(struct net *net,
return 0;
if (vers->vs_hidden) {
- dprintk("svc: svc_register(%sv%d, %s, %u, %u)"
- " (but not telling portmap)\n",
- progp->pg_name, version,
- proto == IPPROTO_UDP? "udp" : "tcp",
- port, family);
+ trace_svc_noregister(progp->pg_name, version, proto,
+ port, family, 0);
return 0;
}
@@ -1106,8 +1099,7 @@ static void __svc_unregister(struct net *net, const u32 program, const u32 versi
if (error == -EPROTONOSUPPORT)
error = rpcb_register(net, program, version, 0, 0);
- dprintk("svc: %s(%sv%u), error %d\n",
- __func__, progname, version, error);
+ trace_svc_unregister(progname, version, error);
}
/*
@@ -1132,9 +1124,6 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net)
continue;
if (progp->pg_vers[i]->vs_hidden)
continue;
-
- dprintk("svc: attempting to unregister %sv%u\n",
- progp->pg_name, i);
__svc_unregister(net, progp->pg_prog, i, progp->pg_name);
}
}
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 2284ff038dad..43cf8dbde898 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -153,6 +153,7 @@ static void svc_xprt_free(struct kref *kref)
xprt_put(xprt->xpt_bc_xprt);
if (xprt->xpt_bc_xps)
xprt_switch_put(xprt->xpt_bc_xps);
+ trace_svc_xprt_free(xprt);
xprt->xpt_ops->xpo_free(xprt);
module_put(owner);
}
@@ -206,6 +207,7 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
.sin6_port = htons(port),
};
#endif
+ struct svc_xprt *xprt;
struct sockaddr *sap;
size_t len;
@@ -224,7 +226,11 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
return ERR_PTR(-EAFNOSUPPORT);
}
- return xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
+ xprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
+ if (IS_ERR(xprt))
+ trace_svc_xprt_create_err(serv->sv_program->pg_name,
+ xcl->xcl_name, sap, xprt);
+ return xprt;
}
/*
@@ -304,15 +310,11 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
{
int err;
- dprintk("svc: creating transport %s[%d]\n", xprt_name, port);
err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
if (err == -EPROTONOSUPPORT) {
request_module("svc%s", xprt_name);
err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred);
}
- if (err < 0)
- dprintk("svc: transport %s not found, err %d\n",
- xprt_name, -err);
return err;
}
EXPORT_SYMBOL_GPL(svc_create_xprt);
@@ -780,7 +782,6 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
int len = 0;
if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
- dprintk("svc_recv: found XPT_CLOSE\n");
if (test_and_clear_bit(XPT_KILL_TEMP, &xprt->xpt_flags))
xprt->xpt_ops->xpo_kill_temp_xprt(xprt);
svc_delete_xprt(xprt);
@@ -799,6 +800,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
if (newxpt) {
newxpt->xpt_cred = get_cred(xprt->xpt_cred);
svc_add_new_temp_xprt(serv, newxpt);
+ trace_svc_xprt_accept(newxpt, serv->sv_name);
} else
module_put(xprt->xpt_class->xcl_owner);
} else if (svc_xprt_reserve_slot(rqstp, xprt)) {
@@ -812,7 +814,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
else
len = xprt->xpt_ops->xpo_recvfrom(rqstp);
if (len > 0)
- trace_svc_recvfrom(&rqstp->rq_arg);
+ trace_svc_xdr_recvfrom(rqstp, &rqstp->rq_arg);
rqstp->rq_stime = ktime_get();
rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
@@ -835,14 +837,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
struct svc_serv *serv = rqstp->rq_server;
int len, err;
- dprintk("svc: server %p waiting for data (to = %ld)\n",
- rqstp, timeout);
-
- if (rqstp->rq_xprt)
- printk(KERN_ERR
- "svc_recv: service %p, transport not NULL!\n",
- rqstp);
-
err = svc_alloc_arg(rqstp);
if (err)
goto out;
@@ -890,7 +884,6 @@ EXPORT_SYMBOL_GPL(svc_recv);
void svc_drop(struct svc_rqst *rqstp)
{
trace_svc_drop(rqstp);
- dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt);
svc_xprt_release(rqstp);
}
EXPORT_SYMBOL_GPL(svc_drop);
@@ -913,17 +906,11 @@ int svc_send(struct svc_rqst *rqstp)
xb->len = xb->head[0].iov_len +
xb->page_len +
xb->tail[0].iov_len;
- trace_svc_sendto(xb);
-
- /* Grab mutex to serialize outgoing data. */
- mutex_lock(&xprt->xpt_mutex);
+ trace_svc_xdr_sendto(rqstp, xb);
trace_svc_stats_latency(rqstp);
- if (test_bit(XPT_DEAD, &xprt->xpt_flags)
- || test_bit(XPT_CLOSE, &xprt->xpt_flags))
- len = -ENOTCONN;
- else
- len = xprt->xpt_ops->xpo_sendto(rqstp);
- mutex_unlock(&xprt->xpt_mutex);
+
+ len = xprt->xpt_ops->xpo_sendto(rqstp);
+
trace_svc_send(rqstp, len);
svc_xprt_release(rqstp);
@@ -1031,11 +1018,10 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
struct svc_serv *serv = xprt->xpt_server;
struct svc_deferred_req *dr;
- /* Only do this once */
if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags))
- BUG();
+ return;
- dprintk("svc: svc_delete_xprt(%p)\n", xprt);
+ trace_svc_xprt_detach(xprt);
xprt->xpt_ops->xpo_detach(xprt);
if (xprt->xpt_bc_xprt)
xprt->xpt_bc_xprt->ops->close(xprt->xpt_bc_xprt);
@@ -1056,6 +1042,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
void svc_close_xprt(struct svc_xprt *xprt)
{
+ trace_svc_xprt_close(xprt);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))
/* someone else will have to effect the close */
@@ -1158,16 +1145,15 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
set_bit(XPT_DEFERRED, &xprt->xpt_flags);
if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) {
spin_unlock(&xprt->xpt_lock);
- dprintk("revisit canceled\n");
+ trace_svc_defer_drop(dr);
svc_xprt_put(xprt);
- trace_svc_drop_deferred(dr);
kfree(dr);
return;
}
- dprintk("revisit queued\n");
dr->xprt = NULL;
list_add(&dr->handle.recent, &xprt->xpt_deferred);
spin_unlock(&xprt->xpt_lock);
+ trace_svc_defer_queue(dr);
svc_xprt_enqueue(xprt);
svc_xprt_put(xprt);
}
@@ -1213,22 +1199,24 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip,
dr->argslen << 2);
}
+ trace_svc_defer(rqstp);
svc_xprt_get(rqstp->rq_xprt);
dr->xprt = rqstp->rq_xprt;
set_bit(RQ_DROPME, &rqstp->rq_flags);
dr->handle.revisit = svc_revisit;
- trace_svc_defer(rqstp);
return &dr->handle;
}
/*
* recv data from a deferred request into an active one
*/
-static int svc_deferred_recv(struct svc_rqst *rqstp)
+static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
{
struct svc_deferred_req *dr = rqstp->rq_deferred;
+ trace_svc_defer_recv(dr);
+
/* setup iov_base past transport header */
rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2);
/* The iov_len does not include the transport header bytes */
@@ -1259,7 +1247,6 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
struct svc_deferred_req,
handle.recent);
list_del_init(&dr->handle.recent);
- trace_svc_revisit_deferred(dr);
} else
clear_bit(XPT_DEFERRED, &xprt->xpt_flags);
spin_unlock(&xprt->xpt_lock);
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 552617e3467b..998b196b6176 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -21,6 +21,8 @@
#include <trace/events/sunrpc.h>
+#include "sunrpc.h"
+
#define RPCDBG_FACILITY RPCDBG_AUTH
@@ -205,3 +207,26 @@ struct auth_domain *auth_domain_find(char *name)
return NULL;
}
EXPORT_SYMBOL_GPL(auth_domain_find);
+
+/**
+ * auth_domain_cleanup - check that the auth_domain table is empty
+ *
+ * On module unload the auth_domain_table must be empty. To make it
+ * easier to catch bugs which don't clean up domains properly, we
+ * warn if anything remains in the table at cleanup time.
+ *
+ * Note that we cannot proactively remove the domains at this stage.
+ * The ->release() function might be in a module that has already been
+ * unloaded.
+ */
+
+void auth_domain_cleanup(void)
+{
+ int h;
+ struct auth_domain *hp;
+
+ for (h = 0; h < DN_HASHMAX; h++)
+ hlist_for_each_entry(hp, &auth_domain_table[h], hash)
+ pr_warn("svc: domain %s still present at module unload.\n",
+ hp->name);
+}
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 6c8f802c4261..97c0bddba7a3 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -332,15 +332,6 @@ static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm,
return 0;
}
-static inline int ip_map_update(struct net *net, struct ip_map *ipm,
- struct unix_domain *udom, time64_t expiry)
-{
- struct sunrpc_net *sn;
-
- sn = net_generic(net, sunrpc_net_id);
- return __ip_map_update(sn->ip_map_cache, ipm, udom, expiry);
-}
-
void svcauth_unix_purge(struct net *net)
{
struct sunrpc_net *sn;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index e7a0037d9b56..5c4ec9386f81 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -45,7 +45,6 @@
#include <net/tcp_states.h>
#include <linux/uaccess.h>
#include <asm/ioctls.h>
-#include <trace/events/skb.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/clnt.h>
@@ -55,6 +54,8 @@
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/xprt.h>
+#include <trace/events/sunrpc.h>
+
#include "socklib.h"
#include "sunrpc.h"
@@ -108,31 +109,35 @@ static void svc_reclassify_socket(struct socket *sock)
}
#endif
-/*
- * Release an skbuff after use
+/**
+ * svc_tcp_release_rqst - Release transport-related resources
+ * @rqstp: request structure with resources to be released
+ *
*/
-static void svc_release_skb(struct svc_rqst *rqstp)
+static void svc_tcp_release_rqst(struct svc_rqst *rqstp)
{
struct sk_buff *skb = rqstp->rq_xprt_ctxt;
if (skb) {
struct svc_sock *svsk =
container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
- rqstp->rq_xprt_ctxt = NULL;
- dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
+ rqstp->rq_xprt_ctxt = NULL;
skb_free_datagram_locked(svsk->sk_sk, skb);
}
}
-static void svc_release_udp_skb(struct svc_rqst *rqstp)
+/**
+ * svc_udp_release_rqst - Release transport-related resources
+ * @rqstp: request structure with resources to be released
+ *
+ */
+static void svc_udp_release_rqst(struct svc_rqst *rqstp)
{
struct sk_buff *skb = rqstp->rq_xprt_ctxt;
if (skb) {
rqstp->rq_xprt_ctxt = NULL;
-
- dprintk("svc: service %p, releasing skb %p\n", rqstp, skb);
consume_skb(skb);
}
}
@@ -218,34 +223,68 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
return len;
}
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+static void svc_flush_bvec(const struct bio_vec *bvec, size_t size, size_t seek)
+{
+ struct bvec_iter bi = {
+ .bi_size = size,
+ };
+ struct bio_vec bv;
+
+ bvec_iter_advance(bvec, &bi, seek & PAGE_MASK);
+ for_each_bvec(bv, bvec, bi, bi)
+ flush_dcache_page(bv.bv_page);
+}
+#else
+static inline void svc_flush_bvec(const struct bio_vec *bvec, size_t size,
+ size_t seek)
+{
+}
+#endif
+
/*
- * Generic recvfrom routine.
+ * Read from @rqstp's transport socket. The incoming message fills whole
+ * pages in @rqstp's rq_pages array until the last page of the message
+ * has been received into a partial page.
*/
-static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov,
- unsigned int nr, size_t buflen, unsigned int base)
+static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen,
+ size_t seek)
{
struct svc_sock *svsk =
container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
+ struct bio_vec *bvec = rqstp->rq_bvec;
struct msghdr msg = { NULL };
+ unsigned int i;
ssize_t len;
+ size_t t;
rqstp->rq_xprt_hlen = 0;
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
- iov_iter_kvec(&msg.msg_iter, READ, iov, nr, buflen);
- if (base != 0) {
- iov_iter_advance(&msg.msg_iter, base);
- buflen -= base;
+
+ for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE) {
+ bvec[i].bv_page = rqstp->rq_pages[i];
+ bvec[i].bv_len = PAGE_SIZE;
+ bvec[i].bv_offset = 0;
+ }
+ rqstp->rq_respages = &rqstp->rq_pages[i];
+ rqstp->rq_next_page = rqstp->rq_respages + 1;
+
+ iov_iter_bvec(&msg.msg_iter, READ, bvec, i, buflen);
+ if (seek) {
+ iov_iter_advance(&msg.msg_iter, seek);
+ buflen -= seek;
}
len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT);
+ if (len > 0)
+ svc_flush_bvec(bvec, len, seek);
+
/* If we read a full record, then assume there may be more
* data to read (stream based sockets only!)
*/
if (len == buflen)
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
- dprintk("svc: socket %p recvfrom(%p, %zu) = %zd\n",
- svsk, iov[0].iov_base, iov[0].iov_len, len);
return len;
}
@@ -282,13 +321,10 @@ static void svc_data_ready(struct sock *sk)
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
if (svsk) {
- dprintk("svc: socket %p(inet %p), busy=%d\n",
- svsk, sk,
- test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
-
/* Refer to svc_setup_socket() for details. */
rmb();
svsk->sk_odata(sk);
+ trace_svcsock_data_ready(&svsk->sk_xprt, 0);
if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
svc_xprt_enqueue(&svsk->sk_xprt);
}
@@ -302,11 +338,9 @@ static void svc_write_space(struct sock *sk)
struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
if (svsk) {
- dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
- svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
-
/* Refer to svc_setup_socket() for details. */
rmb();
+ trace_svcsock_write_space(&svsk->sk_xprt, 0);
svsk->sk_owspace(sk);
svc_xprt_enqueue(&svsk->sk_xprt);
}
@@ -383,8 +417,15 @@ static int svc_udp_get_dest_address(struct svc_rqst *rqstp,
return 0;
}
-/*
- * Receive a datagram from a UDP socket.
+/**
+ * svc_udp_recvfrom - Receive a datagram from a UDP socket.
+ * @rqstp: request structure into which to receive an RPC Call
+ *
+ * Called in a loop when XPT_DATA has been set.
+ *
+ * Returns:
+ * On success, the number of bytes in a received RPC Call, or
+ * %0 if a complete RPC Call message was not ready to return
*/
static int svc_udp_recvfrom(struct svc_rqst *rqstp)
{
@@ -418,20 +459,14 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
svc_sock_setbufsize(svsk, serv->sv_nrthreads + 3);
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
- skb = NULL;
err = kernel_recvmsg(svsk->sk_sock, &msg, NULL,
0, 0, MSG_PEEK | MSG_DONTWAIT);
- if (err >= 0)
- skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err);
-
- if (skb == NULL) {
- if (err != -EAGAIN) {
- /* possibly an icmp error */
- dprintk("svc: recvfrom returned error %d\n", -err);
- set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
- }
- return 0;
- }
+ if (err < 0)
+ goto out_recv_err;
+ skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err);
+ if (!skb)
+ goto out_recv_err;
+
len = svc_addr_len(svc_addr(rqstp));
rqstp->rq_addrlen = len;
if (skb->tstamp == 0) {
@@ -442,26 +477,21 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
sock_write_timestamp(svsk->sk_sk, skb->tstamp);
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
- len = skb->len;
+ len = skb->len;
rqstp->rq_arg.len = len;
+ trace_svcsock_udp_recv(&svsk->sk_xprt, len);
rqstp->rq_prot = IPPROTO_UDP;
- if (!svc_udp_get_dest_address(rqstp, cmh)) {
- net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n",
- cmh->cmsg_level, cmh->cmsg_type);
- goto out_free;
- }
+ if (!svc_udp_get_dest_address(rqstp, cmh))
+ goto out_cmsg_err;
rqstp->rq_daddrlen = svc_addr_len(svc_daddr(rqstp));
if (skb_is_nonlinear(skb)) {
/* we have to copy */
local_bh_disable();
- if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) {
- local_bh_enable();
- /* checksum error */
- goto out_free;
- }
+ if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb))
+ goto out_bh_enable;
local_bh_enable();
consume_skb(skb);
} else {
@@ -489,6 +519,20 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
serv->sv_stats->netudpcnt++;
return len;
+
+out_recv_err:
+ if (err != -EAGAIN) {
+ /* possibly an icmp error */
+ set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
+ }
+ trace_svcsock_udp_recv_err(&svsk->sk_xprt, err);
+ return 0;
+out_cmsg_err:
+ net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n",
+ cmh->cmsg_level, cmh->cmsg_type);
+ goto out_free;
+out_bh_enable:
+ local_bh_enable();
out_free:
kfree_skb(skb);
return 0;
@@ -498,6 +542,9 @@ out_free:
* svc_udp_sendto - Send out a reply on a UDP socket
* @rqstp: completed svc_rqst
*
+ * xpt_mutex ensures @rqstp's whole message is written to the socket
+ * without interruption.
+ *
* Returns the number of bytes sent, or a negative errno.
*/
static int svc_udp_sendto(struct svc_rqst *rqstp)
@@ -519,10 +566,15 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
unsigned int uninitialized_var(sent);
int err;
- svc_release_udp_skb(rqstp);
+ svc_udp_release_rqst(rqstp);
svc_set_cmsg_data(rqstp, cmh);
+ mutex_lock(&xprt->xpt_mutex);
+
+ if (svc_xprt_is_dead(xprt))
+ goto out_notconn;
+
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
xdr_free_bvec(xdr);
if (err == -ECONNREFUSED) {
@@ -530,9 +582,16 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
xdr_free_bvec(xdr);
}
+ trace_svcsock_udp_send(xprt, err);
+
+ mutex_unlock(&xprt->xpt_mutex);
if (err < 0)
return err;
return sent;
+
+out_notconn:
+ mutex_unlock(&xprt->xpt_mutex);
+ return -ENOTCONN;
}
static int svc_udp_has_wspace(struct svc_xprt *xprt)
@@ -576,7 +635,7 @@ static const struct svc_xprt_ops svc_udp_ops = {
.xpo_recvfrom = svc_udp_recvfrom,
.xpo_sendto = svc_udp_sendto,
.xpo_read_payload = svc_sock_read_payload,
- .xpo_release_rqst = svc_release_udp_skb,
+ .xpo_release_rqst = svc_udp_release_rqst,
.xpo_detach = svc_sock_detach,
.xpo_free = svc_sock_free,
.xpo_has_wspace = svc_udp_has_wspace,
@@ -632,9 +691,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
- dprintk("svc: socket %p TCP (listen) state change %d\n",
- sk, sk->sk_state);
-
if (svsk) {
/* Refer to svc_setup_socket() for details. */
rmb();
@@ -655,8 +711,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
if (svsk) {
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
- } else
- printk("svc: socket %p: no user data\n", sk);
+ }
}
}
@@ -667,15 +722,11 @@ static void svc_tcp_state_change(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
- dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",
- sk, sk->sk_state, sk->sk_user_data);
-
- if (!svsk)
- printk("svc: socket %p: no user data\n", sk);
- else {
+ if (svsk) {
/* Refer to svc_setup_socket() for details. */
rmb();
svsk->sk_ostate(sk);
+ trace_svcsock_tcp_state(&svsk->sk_xprt, svsk->sk_sock);
if (sk->sk_state != TCP_ESTABLISHED) {
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
svc_xprt_enqueue(&svsk->sk_xprt);
@@ -696,9 +747,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
struct socket *newsock;
struct svc_sock *newsvsk;
int err, slen;
- RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
- dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
if (!sock)
return NULL;
@@ -711,30 +760,18 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
else if (err != -EAGAIN)
net_warn_ratelimited("%s: accept failed (err %d)!\n",
serv->sv_name, -err);
+ trace_svcsock_accept_err(xprt, serv->sv_name, err);
return NULL;
}
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
err = kernel_getpeername(newsock, sin);
if (err < 0) {
- net_warn_ratelimited("%s: peername failed (err %d)!\n",
- serv->sv_name, -err);
+ trace_svcsock_getpeername_err(xprt, serv->sv_name, err);
goto failed; /* aborted connection or whatever */
}
slen = err;
- /* Ideally, we would want to reject connections from unauthorized
- * hosts here, but when we get encryption, the IP of the host won't
- * tell us anything. For now just warn about unpriv connections.
- */
- if (!svc_port_is_privileged(sin)) {
- dprintk("%s: connect from unprivileged port: %s\n",
- serv->sv_name,
- __svc_print_addr(sin, buf, sizeof(buf)));
- }
- dprintk("%s: connect from %s\n", serv->sv_name,
- __svc_print_addr(sin, buf, sizeof(buf)));
-
/* Reset the inherited callbacks before calling svc_setup_socket */
newsock->sk->sk_state_change = svsk->sk_ostate;
newsock->sk->sk_data_ready = svsk->sk_odata;
@@ -752,10 +789,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
err = kernel_getsockname(newsock, sin);
slen = err;
- if (unlikely(err < 0)) {
- dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err);
+ if (unlikely(err < 0))
slen = offsetof(struct sockaddr, sa_data);
- }
svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
if (sock_is_loopback(newsock->sk))
@@ -772,13 +807,14 @@ failed:
return NULL;
}
-static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
+static size_t svc_tcp_restore_pages(struct svc_sock *svsk,
+ struct svc_rqst *rqstp)
{
- unsigned int i, len, npages;
+ size_t len = svsk->sk_datalen;
+ unsigned int i, npages;
- if (svsk->sk_datalen == 0)
+ if (!len)
return 0;
- len = svsk->sk_datalen;
npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
for (i = 0; i < npages; i++) {
if (rqstp->rq_pages[i] != NULL)
@@ -827,47 +863,45 @@ out:
}
/*
- * Receive fragment record header.
- * If we haven't gotten the record length yet, get the next four bytes.
+ * Receive fragment record header into sk_marker.
*/
-static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
+static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
+ struct svc_rqst *rqstp)
{
- struct svc_serv *serv = svsk->sk_xprt.xpt_server;
- unsigned int want;
- int len;
+ ssize_t want, len;
+ /* If we haven't gotten the record length yet,
+ * get the next four bytes.
+ */
if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
+ struct msghdr msg = { NULL };
struct kvec iov;
want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
- iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;
+ iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen;
iov.iov_len = want;
- len = svc_recvfrom(rqstp, &iov, 1, want, 0);
+ iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, want);
+ len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT);
if (len < 0)
- goto error;
+ return len;
svsk->sk_tcplen += len;
-
if (len < want) {
- dprintk("svc: short recvfrom while reading record "
- "length (%d of %d)\n", len, want);
- return -EAGAIN;
+ /* call again to read the remaining bytes */
+ goto err_short;
}
-
- dprintk("svc: TCP record, %d bytes\n", svc_sock_reclen(svsk));
+ trace_svcsock_marker(&svsk->sk_xprt, svsk->sk_marker);
if (svc_sock_reclen(svsk) + svsk->sk_datalen >
- serv->sv_max_mesg) {
- net_notice_ratelimited("RPC: fragment too large: %d\n",
- svc_sock_reclen(svsk));
- goto err_delete;
- }
+ svsk->sk_xprt.xpt_server->sv_max_mesg)
+ goto err_too_large;
}
-
return svc_sock_reclen(svsk);
-error:
- dprintk("RPC: TCP recv_record got %d\n", len);
- return len;
-err_delete:
+
+err_too_large:
+ net_notice_ratelimited("svc: %s %s RPC fragment too large: %d\n",
+ __func__, svsk->sk_xprt.xpt_server->sv_name,
+ svc_sock_reclen(svsk));
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+err_short:
return -EAGAIN;
}
@@ -916,87 +950,58 @@ unlock_eagain:
return -EAGAIN;
}
-static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len)
-{
- int i = 0;
- int t = 0;
-
- while (t < len) {
- vec[i].iov_base = page_address(pages[i]);
- vec[i].iov_len = PAGE_SIZE;
- i++;
- t += PAGE_SIZE;
- }
- return i;
-}
-
static void svc_tcp_fragment_received(struct svc_sock *svsk)
{
/* If we have more data, signal svc_xprt_enqueue() to try again */
- dprintk("svc: TCP %s record (%d bytes)\n",
- svc_sock_final_rec(svsk) ? "final" : "nonfinal",
- svc_sock_reclen(svsk));
svsk->sk_tcplen = 0;
- svsk->sk_reclen = 0;
+ svsk->sk_marker = xdr_zero;
}
-/*
- * Receive data from a TCP socket.
+/**
+ * svc_tcp_recvfrom - Receive data from a TCP socket
+ * @rqstp: request structure into which to receive an RPC Call
+ *
+ * Called in a loop when XPT_DATA has been set.
+ *
+ * Read the 4-byte stream record marker, then use the record length
+ * in that marker to set up exactly the resources needed to receive
+ * the next RPC message into @rqstp.
+ *
+ * Returns:
+ * On success, the number of bytes in a received RPC Call, or
+ * %0 if a complete RPC Call message was not ready to return
+ *
+ * The zero return case handles partial receives and callback Replies.
+ * The state of a partial receive is preserved in the svc_sock for
+ * the next call to svc_tcp_recvfrom.
*/
static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
{
struct svc_sock *svsk =
container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
struct svc_serv *serv = svsk->sk_xprt.xpt_server;
- int len;
- struct kvec *vec;
- unsigned int want, base;
+ size_t want, base;
+ ssize_t len;
__be32 *p;
__be32 calldir;
- int pnum;
-
- dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
- svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
- test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
- test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
- len = svc_tcp_recv_record(svsk, rqstp);
+ clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
+ len = svc_tcp_read_marker(svsk, rqstp);
if (len < 0)
goto error;
base = svc_tcp_restore_pages(svsk, rqstp);
- want = svc_sock_reclen(svsk) - (svsk->sk_tcplen - sizeof(rpc_fraghdr));
-
- vec = rqstp->rq_vec;
-
- pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], base + want);
-
- rqstp->rq_respages = &rqstp->rq_pages[pnum];
- rqstp->rq_next_page = rqstp->rq_respages + 1;
-
- /* Now receive data */
- len = svc_recvfrom(rqstp, vec, pnum, base + want, base);
+ want = len - (svsk->sk_tcplen - sizeof(rpc_fraghdr));
+ len = svc_tcp_read_msg(rqstp, base + want, base);
if (len >= 0) {
+ trace_svcsock_tcp_recv(&svsk->sk_xprt, len);
svsk->sk_tcplen += len;
svsk->sk_datalen += len;
}
- if (len != want || !svc_sock_final_rec(svsk)) {
- svc_tcp_save_pages(svsk, rqstp);
- if (len < 0 && len != -EAGAIN)
- goto err_delete;
- if (len == want)
- svc_tcp_fragment_received(svsk);
- else
- dprintk("svc: incomplete TCP record (%d of %d)\n",
- (int)(svsk->sk_tcplen - sizeof(rpc_fraghdr)),
- svc_sock_reclen(svsk));
- goto err_noclose;
- }
-
- if (svsk->sk_datalen < 8) {
- svsk->sk_datalen = 0;
- goto err_delete; /* client is nuts. */
- }
+ if (len != want || !svc_sock_final_rec(svsk))
+ goto err_incomplete;
+ if (svsk->sk_datalen < 8)
+ goto err_nuts;
rqstp->rq_arg.len = svsk->sk_datalen;
rqstp->rq_arg.page_base = 0;
@@ -1031,14 +1036,26 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
return rqstp->rq_arg.len;
+err_incomplete:
+ svc_tcp_save_pages(svsk, rqstp);
+ if (len < 0 && len != -EAGAIN)
+ goto err_delete;
+ if (len == want)
+ svc_tcp_fragment_received(svsk);
+ else
+ trace_svcsock_tcp_recv_short(&svsk->sk_xprt,
+ svc_sock_reclen(svsk),
+ svsk->sk_tcplen - sizeof(rpc_fraghdr));
+ goto err_noclose;
error:
if (len != -EAGAIN)
goto err_delete;
- dprintk("RPC: TCP recvfrom got EAGAIN\n");
+ trace_svcsock_tcp_recv_eagain(&svsk->sk_xprt, 0);
return 0;
+err_nuts:
+ svsk->sk_datalen = 0;
err_delete:
- printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
- svsk->sk_xprt.xpt_server->sv_name, -len);
+ trace_svcsock_tcp_recv_err(&svsk->sk_xprt, len);
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
err_noclose:
return 0; /* record not complete */
@@ -1048,6 +1065,9 @@ err_noclose:
* svc_tcp_sendto - Send out a reply on a TCP socket
* @rqstp: completed svc_rqst
*
+ * xpt_mutex ensures @rqstp's whole message is written to the socket
+ * without interruption.
+ *
* Returns the number of bytes sent, or a negative errno.
*/
static int svc_tcp_sendto(struct svc_rqst *rqstp)
@@ -1063,14 +1083,22 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
unsigned int uninitialized_var(sent);
int err;
- svc_release_skb(rqstp);
+ svc_tcp_release_rqst(rqstp);
+ mutex_lock(&xprt->xpt_mutex);
+ if (svc_xprt_is_dead(xprt))
+ goto out_notconn;
err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent);
xdr_free_bvec(xdr);
+ trace_svcsock_tcp_send(xprt, err < 0 ? err : sent);
if (err < 0 || sent != (xdr->len + sizeof(marker)))
goto out_close;
+ mutex_unlock(&xprt->xpt_mutex);
return sent;
+out_notconn:
+ mutex_unlock(&xprt->xpt_mutex);
+ return -ENOTCONN;
out_close:
pr_notice("rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n",
xprt->xpt_server->sv_name,
@@ -1078,6 +1106,7 @@ out_close:
(err < 0) ? err : sent, xdr->len);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
+ mutex_unlock(&xprt->xpt_mutex);
return -EAGAIN;
}
@@ -1094,7 +1123,7 @@ static const struct svc_xprt_ops svc_tcp_ops = {
.xpo_recvfrom = svc_tcp_recvfrom,
.xpo_sendto = svc_tcp_sendto,
.xpo_read_payload = svc_sock_read_payload,
- .xpo_release_rqst = svc_release_skb,
+ .xpo_release_rqst = svc_tcp_release_rqst,
.xpo_detach = svc_tcp_sock_detach,
.xpo_free = svc_sock_free,
.xpo_has_wspace = svc_tcp_has_wspace,
@@ -1132,18 +1161,16 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
if (sk->sk_state == TCP_LISTEN) {
- dprintk("setting up TCP socket for listening\n");
strcpy(svsk->sk_xprt.xpt_remotebuf, "listener");
set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
sk->sk_data_ready = svc_tcp_listen_data_ready;
set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
} else {
- dprintk("setting up TCP socket for reading\n");
sk->sk_state_change = svc_tcp_state_change;
sk->sk_data_ready = svc_data_ready;
sk->sk_write_space = svc_write_space;
- svsk->sk_reclen = 0;
+ svsk->sk_marker = xdr_zero;
svsk->sk_tcplen = 0;
svsk->sk_datalen = 0;
memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
@@ -1188,7 +1215,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
int err = 0;
- dprintk("svc: svc_setup_socket %p\n", sock);
svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
if (!svsk)
return ERR_PTR(-ENOMEM);
@@ -1225,12 +1251,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
else
svc_tcp_init(svsk, serv);
- dprintk("svc: svc_setup_socket created %p (inet %p), "
- "listen %d close %d\n",
- svsk, svsk->sk_sk,
- test_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags),
- test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
-
+ trace_svcsock_new_socket(sock);
return svsk;
}
@@ -1322,11 +1343,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
struct sockaddr *newsin = (struct sockaddr *)&addr;
int newlen;
int family;
- RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
-
- dprintk("svc: svc_create_socket(%s, %d, %s)\n",
- serv->sv_program->pg_name, protocol,
- __svc_print_addr(sin, buf, sizeof(buf)));
if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
printk(KERN_WARNING "svc: only UDP and TCP "
@@ -1383,7 +1399,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen);
return (struct svc_xprt *)svsk;
bummer:
- dprintk("svc: svc_create_socket error = %d\n", -error);
sock_release(sock);
return ERR_PTR(error);
}
@@ -1397,8 +1412,6 @@ static void svc_sock_detach(struct svc_xprt *xprt)
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
struct sock *sk = svsk->sk_sk;
- dprintk("svc: svc_sock_detach(%p)\n", svsk);
-
/* put back the old socket callbacks */
lock_sock(sk);
sk->sk_state_change = svsk->sk_ostate;
@@ -1415,8 +1428,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
- dprintk("svc: svc_tcp_sock_detach(%p)\n", svsk);
-
svc_sock_detach(xprt);
if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
@@ -1431,7 +1442,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
static void svc_sock_free(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
- dprintk("svc: svc_sock_free(%p)\n", svsk);
if (svsk->sk_sock->file)
sockfd_put(svsk->sk_sock);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 493a30a296fc..d5cc5db9dbf3 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -663,6 +663,7 @@ static void xprt_autoclose(struct work_struct *work)
container_of(work, struct rpc_xprt, task_cleanup);
unsigned int pflags = memalloc_nofs_save();
+ trace_xprt_disconnect_auto(xprt);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
xprt->ops->close(xprt);
xprt_release_write(xprt, NULL);
@@ -677,7 +678,7 @@ static void xprt_autoclose(struct work_struct *work)
*/
void xprt_disconnect_done(struct rpc_xprt *xprt)
{
- dprintk("RPC: disconnected transport %p\n", xprt);
+ trace_xprt_disconnect_done(xprt);
spin_lock(&xprt->transport_lock);
xprt_clear_connected(xprt);
xprt_clear_write_space_locked(xprt);
@@ -694,6 +695,8 @@ EXPORT_SYMBOL_GPL(xprt_disconnect_done);
*/
void xprt_force_disconnect(struct rpc_xprt *xprt)
{
+ trace_xprt_disconnect_force(xprt);
+
/* Don't race with the test_bit() in xprt_clear_locked() */
spin_lock(&xprt->transport_lock);
set_bit(XPRT_CLOSE_WAIT, &xprt->state);
@@ -832,8 +835,10 @@ void xprt_connect(struct rpc_task *task)
if (!xprt_lock_write(xprt, task))
return;
- if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state))
+ if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
+ trace_xprt_disconnect_cleanup(xprt);
xprt->ops->close(xprt);
+ }
if (!xprt_connected(xprt)) {
task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie;
@@ -1460,7 +1465,7 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
*/
req->rq_ntrans++;
- trace_xprt_sendto(&req->rq_snd_buf);
+ trace_rpc_xdr_sendto(task, &req->rq_snd_buf);
connect_cookie = xprt->connect_cookie;
status = xprt->ops->send_request(req);
if (status != 0) {
@@ -1903,11 +1908,8 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
found:
xprt = t->setup(args);
- if (IS_ERR(xprt)) {
- dprintk("RPC: xprt_create_transport: failed, %ld\n",
- -PTR_ERR(xprt));
+ if (IS_ERR(xprt))
goto out;
- }
if (args->flags & XPRT_CREATE_NO_IDLE_TIMEOUT)
xprt->idle_timeout = 0;
INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
@@ -1928,8 +1930,7 @@ found:
rpc_xprt_debugfs_register(xprt);
- dprintk("RPC: created transport %p with %u slots\n", xprt,
- xprt->max_reqs);
+ trace_xprt_create(xprt);
out:
return xprt;
}
@@ -1939,6 +1940,8 @@ static void xprt_destroy_cb(struct work_struct *work)
struct rpc_xprt *xprt =
container_of(work, struct rpc_xprt, task_cleanup);
+ trace_xprt_destroy(xprt);
+
rpc_xprt_debugfs_unregister(xprt);
rpc_destroy_wait_queue(&xprt->binding);
rpc_destroy_wait_queue(&xprt->pending);
@@ -1963,8 +1966,6 @@ static void xprt_destroy_cb(struct work_struct *work)
*/
static void xprt_destroy(struct rpc_xprt *xprt)
{
- dprintk("RPC: destroying transport %p\n", xprt);
-
/*
* Exclude transport connect/disconnect handlers and autoclose
*/
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 3c627dc685cc..2081c8fbfa48 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -892,8 +892,8 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
* or privacy, direct data placement of individual data items
* is not allowed.
*/
- ddp_allowed = !(rqst->rq_cred->cr_auth->au_flags &
- RPCAUTH_AUTH_DATATOUCH);
+ ddp_allowed = !test_bit(RPCAUTH_AUTH_DATATOUCH,
+ &rqst->rq_cred->cr_auth->au_flags);
/*
* Chunks needed for results?
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index af7eb8d202ae..1ee73f7cf931 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -10,59 +10,34 @@
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#define RPCDBG_FACILITY RPCDBG_SVCXPRT
-
-#undef SVCRDMA_BACKCHANNEL_DEBUG
-
/**
- * svc_rdma_handle_bc_reply - Process incoming backchannel reply
- * @xprt: controlling backchannel transport
- * @rdma_resp: pointer to incoming transport header
- * @rcvbuf: XDR buffer into which to decode the reply
+ * svc_rdma_handle_bc_reply - Process incoming backchannel Reply
+ * @rqstp: resources for handling the Reply
+ * @rctxt: Received message
*
- * Returns:
- * %0 if @rcvbuf is filled in, xprt_complete_rqst called,
- * %-EAGAIN if server should call ->recvfrom again.
*/
-int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
- struct xdr_buf *rcvbuf)
+void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *rctxt)
{
+ struct svc_xprt *sxprt = rqstp->rq_xprt;
+ struct rpc_xprt *xprt = sxprt->xpt_bc_xprt;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct xdr_buf *rcvbuf = &rqstp->rq_arg;
struct kvec *dst, *src = &rcvbuf->head[0];
+ __be32 *rdma_resp = rctxt->rc_recv_buf;
struct rpc_rqst *req;
u32 credits;
- size_t len;
- __be32 xid;
- __be32 *p;
- int ret;
-
- p = (__be32 *)src->iov_base;
- len = src->iov_len;
- xid = *rdma_resp;
-
-#ifdef SVCRDMA_BACKCHANNEL_DEBUG
- pr_info("%s: xid=%08x, length=%zu\n",
- __func__, be32_to_cpu(xid), len);
- pr_info("%s: RPC/RDMA: %*ph\n",
- __func__, (int)RPCRDMA_HDRLEN_MIN, rdma_resp);
- pr_info("%s: RPC: %*ph\n",
- __func__, (int)len, p);
-#endif
-
- ret = -EAGAIN;
- if (src->iov_len < 24)
- goto out_shortreply;
spin_lock(&xprt->queue_lock);
- req = xprt_lookup_rqst(xprt, xid);
+ req = xprt_lookup_rqst(xprt, *rdma_resp);
if (!req)
- goto out_notfound;
+ goto out_unlock;
dst = &req->rq_private_buf.head[0];
memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
- if (dst->iov_len < len)
+ if (dst->iov_len < src->iov_len)
goto out_unlock;
- memcpy(dst->iov_base, p, len);
+ memcpy(dst->iov_base, src->iov_base, src->iov_len);
xprt_pin_rqst(req);
spin_unlock(&xprt->queue_lock);
@@ -71,31 +46,17 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
credits = 1; /* don't deadlock */
else if (credits > r_xprt->rx_buf.rb_bc_max_requests)
credits = r_xprt->rx_buf.rb_bc_max_requests;
-
spin_lock(&xprt->transport_lock);
xprt->cwnd = credits << RPC_CWNDSHIFT;
spin_unlock(&xprt->transport_lock);
spin_lock(&xprt->queue_lock);
- ret = 0;
xprt_complete_rqst(req->rq_task, rcvbuf->len);
xprt_unpin_rqst(req);
rcvbuf->len = 0;
out_unlock:
spin_unlock(&xprt->queue_lock);
-out:
- return ret;
-
-out_shortreply:
- dprintk("svcrdma: short bc reply: xprt=%p, len=%zu\n",
- xprt, src->iov_len);
- goto out;
-
-out_notfound:
- dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n",
- xprt, be32_to_cpu(xid));
- goto out_unlock;
}
/* Send a backwards direction RPC call.
@@ -192,10 +153,6 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
*p++ = xdr_zero;
*p = xdr_zero;
-#ifdef SVCRDMA_BACKCHANNEL_DEBUG
- pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
-#endif
-
rqst->rq_xtime = ktime_get();
rc = svc_rdma_bc_sendto(rdma, rqst, ctxt);
if (rc)
@@ -206,45 +163,36 @@ put_ctxt:
svc_rdma_send_ctxt_put(rdma, ctxt);
drop_connection:
- dprintk("svcrdma: failed to send bc call\n");
return -ENOTCONN;
}
-/* Send an RPC call on the passive end of a transport
- * connection.
+/**
+ * xprt_rdma_bc_send_request - Send a reverse-direction Call
+ * @rqst: rpc_rqst containing Call message to be sent
+ *
+ * Return values:
+ * %0 if the message was sent successfully
+ * %ENOTCONN if the message was not sent
*/
-static int
-xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
+static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
{
struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
- struct svcxprt_rdma *rdma;
+ struct svcxprt_rdma *rdma =
+ container_of(sxprt, struct svcxprt_rdma, sc_xprt);
int ret;
- dprintk("svcrdma: sending bc call with xid: %08x\n",
- be32_to_cpu(rqst->rq_xid));
+ if (test_bit(XPT_DEAD, &sxprt->xpt_flags))
+ return -ENOTCONN;
- mutex_lock(&sxprt->xpt_mutex);
-
- ret = -ENOTCONN;
- rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
- if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) {
- ret = rpcrdma_bc_send_request(rdma, rqst);
- if (ret == -ENOTCONN)
- svc_close_xprt(sxprt);
- }
-
- mutex_unlock(&sxprt->xpt_mutex);
-
- if (ret < 0)
- return ret;
- return 0;
+ ret = rpcrdma_bc_send_request(rdma, rqst);
+ if (ret == -ENOTCONN)
+ svc_close_xprt(sxprt);
+ return ret;
}
static void
xprt_rdma_bc_close(struct rpc_xprt *xprt)
{
- dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
-
xprt_disconnect_done(xprt);
xprt->cwnd = RPC_CWNDSHIFT;
}
@@ -252,8 +200,6 @@ xprt_rdma_bc_close(struct rpc_xprt *xprt)
static void
xprt_rdma_bc_put(struct rpc_xprt *xprt)
{
- dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
-
xprt_rdma_free_addresses(xprt);
xprt_free(xprt);
}
@@ -288,19 +234,14 @@ xprt_setup_rdma_bc(struct xprt_create *args)
struct rpc_xprt *xprt;
struct rpcrdma_xprt *new_xprt;
- if (args->addrlen > sizeof(xprt->addr)) {
- dprintk("RPC: %s: address too large\n", __func__);
+ if (args->addrlen > sizeof(xprt->addr))
return ERR_PTR(-EBADF);
- }
xprt = xprt_alloc(args->net, sizeof(*new_xprt),
RPCRDMA_MAX_BC_REQUESTS,
RPCRDMA_MAX_BC_REQUESTS);
- if (!xprt) {
- dprintk("RPC: %s: couldn't allocate rpc_xprt\n",
- __func__);
+ if (!xprt)
return ERR_PTR(-ENOMEM);
- }
xprt->timeout = &xprt_rdma_bc_timeout;
xprt_set_bound(xprt);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index efa5fcb5793f..e426fedb9524 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -665,23 +665,23 @@ static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg,
return hdr_len;
out_short:
- trace_svcrdma_decode_short(rq_arg->len);
+ trace_svcrdma_decode_short_err(rq_arg->len);
return -EINVAL;
out_version:
- trace_svcrdma_decode_badvers(rdma_argp);
+ trace_svcrdma_decode_badvers_err(rdma_argp);
return -EPROTONOSUPPORT;
out_drop:
- trace_svcrdma_decode_drop(rdma_argp);
+ trace_svcrdma_decode_drop_err(rdma_argp);
return 0;
out_proc:
- trace_svcrdma_decode_badproc(rdma_argp);
+ trace_svcrdma_decode_badproc_err(rdma_argp);
return -EINVAL;
out_inval:
- trace_svcrdma_decode_parse(rdma_argp);
+ trace_svcrdma_decode_parse_err(rdma_argp);
return -EINVAL;
}
@@ -878,12 +878,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
goto out_drop;
rqstp->rq_xprt_hlen = ret;
- if (svc_rdma_is_backchannel_reply(xprt, p)) {
- ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p,
- &rqstp->rq_arg);
- svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
- return ret;
- }
+ if (svc_rdma_is_backchannel_reply(xprt, p))
+ goto out_backchannel;
+
svc_rdma_get_inv_rkey(rdma_xprt, ctxt);
p += rpcrdma_fixed_maxsz;
@@ -913,6 +910,8 @@ out_postfail:
svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return ret;
+out_backchannel:
+ svc_rdma_handle_bc_reply(rqstp, ctxt);
out_drop:
svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return 0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 23c2d3ce0dc9..5eb35309ecef 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -9,13 +9,10 @@
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_rdma.h>
-#include <linux/sunrpc/debug.h>
#include "xprt_rdma.h"
#include <trace/events/rpcrdma.h>
-#define RPCDBG_FACILITY RPCDBG_SVCXPRT
-
static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc);
static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc);
@@ -39,7 +36,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc);
struct svc_rdma_rw_ctxt {
struct list_head rw_list;
struct rdma_rw_ctx rw_ctx;
- int rw_nents;
+ unsigned int rw_nents;
struct sg_table rw_sg_table;
struct scatterlist rw_first_sgl[];
};
@@ -67,19 +64,22 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
GFP_KERNEL);
if (!ctxt)
- goto out;
+ goto out_noctx;
INIT_LIST_HEAD(&ctxt->rw_list);
}
ctxt->rw_sg_table.sgl = ctxt->rw_first_sgl;
if (sg_alloc_table_chained(&ctxt->rw_sg_table, sges,
ctxt->rw_sg_table.sgl,
- SG_CHUNK_SIZE)) {
- kfree(ctxt);
- ctxt = NULL;
- }
-out:
+ SG_CHUNK_SIZE))
+ goto out_free;
return ctxt;
+
+out_free:
+ kfree(ctxt);
+out_noctx:
+ trace_svcrdma_no_rwctx_err(rdma, sges);
+ return NULL;
}
static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
@@ -107,6 +107,34 @@ void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma)
}
}
+/**
+ * svc_rdma_rw_ctx_init - Prepare a R/W context for I/O
+ * @rdma: controlling transport instance
+ * @ctxt: R/W context to prepare
+ * @offset: RDMA offset
+ * @handle: RDMA tag/handle
+ * @direction: I/O direction
+ *
+ * Returns on success, the number of WQEs that will be needed
+ * on the workqueue, or a negative errno.
+ */
+static int svc_rdma_rw_ctx_init(struct svcxprt_rdma *rdma,
+ struct svc_rdma_rw_ctxt *ctxt,
+ u64 offset, u32 handle,
+ enum dma_data_direction direction)
+{
+ int ret;
+
+ ret = rdma_rw_ctx_init(&ctxt->rw_ctx, rdma->sc_qp, rdma->sc_port_num,
+ ctxt->rw_sg_table.sgl, ctxt->rw_nents,
+ 0, offset, handle, direction);
+ if (unlikely(ret < 0)) {
+ svc_rdma_put_rw_ctxt(rdma, ctxt);
+ trace_svcrdma_dma_map_rw_err(rdma, ctxt->rw_nents, ret);
+ }
+ return ret;
+}
+
/* A chunk context tracks all I/O for moving one Read or Write
* chunk. This is a a set of rdma_rw's that handle data movement
* for all segments of one chunk.
@@ -428,15 +456,13 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
ctxt = svc_rdma_get_rw_ctxt(rdma,
(write_len >> PAGE_SHIFT) + 2);
if (!ctxt)
- goto out_noctx;
+ return -ENOMEM;
constructor(info, write_len, ctxt);
- ret = rdma_rw_ctx_init(&ctxt->rw_ctx, rdma->sc_qp,
- rdma->sc_port_num, ctxt->rw_sg_table.sgl,
- ctxt->rw_nents, 0, seg_offset,
- seg_handle, DMA_TO_DEVICE);
+ ret = svc_rdma_rw_ctx_init(rdma, ctxt, seg_offset, seg_handle,
+ DMA_TO_DEVICE);
if (ret < 0)
- goto out_initerr;
+ return -EIO;
trace_svcrdma_send_wseg(seg_handle, write_len, seg_offset);
@@ -455,18 +481,9 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
return 0;
out_overflow:
- dprintk("svcrdma: inadequate space in Write chunk (%u)\n",
- info->wi_nsegs);
+ trace_svcrdma_small_wrch_err(rdma, remaining, info->wi_seg_no,
+ info->wi_nsegs);
return -E2BIG;
-
-out_noctx:
- dprintk("svcrdma: no R/W ctxs available\n");
- return -ENOMEM;
-
-out_initerr:
- svc_rdma_put_rw_ctxt(rdma, ctxt);
- trace_svcrdma_dma_map_rwctx(rdma, ret);
- return -EIO;
}
/* Send one of an xdr_buf's kvecs by itself. To send a Reply
@@ -616,7 +633,7 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
sge_no = PAGE_ALIGN(info->ri_pageoff + len) >> PAGE_SHIFT;
ctxt = svc_rdma_get_rw_ctxt(cc->cc_rdma, sge_no);
if (!ctxt)
- goto out_noctx;
+ return -ENOMEM;
ctxt->rw_nents = sge_no;
sg = ctxt->rw_sg_table.sgl;
@@ -646,29 +663,18 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
goto out_overrun;
}
- ret = rdma_rw_ctx_init(&ctxt->rw_ctx, cc->cc_rdma->sc_qp,
- cc->cc_rdma->sc_port_num,
- ctxt->rw_sg_table.sgl, ctxt->rw_nents,
- 0, offset, rkey, DMA_FROM_DEVICE);
+ ret = svc_rdma_rw_ctx_init(cc->cc_rdma, ctxt, offset, rkey,
+ DMA_FROM_DEVICE);
if (ret < 0)
- goto out_initerr;
+ return -EIO;
list_add(&ctxt->rw_list, &cc->cc_rwctxts);
cc->cc_sqecount += ret;
return 0;
-out_noctx:
- dprintk("svcrdma: no R/W ctxs available\n");
- return -ENOMEM;
-
out_overrun:
- dprintk("svcrdma: request overruns rq_pages\n");
+ trace_svcrdma_page_overrun_err(cc->cc_rdma, rqstp, info->ri_pageno);
return -EINVAL;
-
-out_initerr:
- trace_svcrdma_dma_map_rwctx(cc->cc_rdma, ret);
- svc_rdma_put_rw_ctxt(cc->cc_rdma, ctxt);
- return -EIO;
}
/* Walk the segments in the Read chunk starting at @p and construct
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index b6c8643867f2..38e7c3c8c4a9 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -868,12 +868,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
__be32 *p;
int ret;
- /* Create the RDMA response header. xprt->xpt_mutex,
- * acquired in svc_send(), serializes RPC replies. The
- * code path below that inserts the credit grant value
- * into each transport header runs only inside this
- * critical section.
- */
+ ret = -ENOTCONN;
+ if (svc_xprt_is_dead(xprt))
+ goto err0;
+
ret = -ENOMEM;
sctxt = svc_rdma_send_ctxt_get(rdma);
if (!sctxt)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index ea54785db4f8..d38be57b00ed 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -211,7 +211,12 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
newxprt->sc_ord = param->initiator_depth;
sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
- svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
+ newxprt->sc_xprt.xpt_remotelen = svc_addr_len(sa);
+ memcpy(&newxprt->sc_xprt.xpt_remote, sa,
+ newxprt->sc_xprt.xpt_remotelen);
+ snprintf(newxprt->sc_xprt.xpt_remotebuf,
+ sizeof(newxprt->sc_xprt.xpt_remotebuf) - 1, "%pISc", sa);
+
/* The remote port is arbitrary and not under the control of the
* client ULP. Set it to a fixed value so that the DRC continues
* to be effective after a reconnect.
@@ -309,11 +314,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
struct svcxprt_rdma *cma_xprt;
int ret;
- dprintk("svcrdma: Creating RDMA listener\n");
- if ((sa->sa_family != AF_INET) && (sa->sa_family != AF_INET6)) {
- dprintk("svcrdma: Address family %d is not supported.\n", sa->sa_family);
+ if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6)
return ERR_PTR(-EAFNOSUPPORT);
- }
cma_xprt = svc_rdma_create_xprt(serv, net);
if (!cma_xprt)
return ERR_PTR(-ENOMEM);
@@ -324,7 +326,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(listen_id)) {
ret = PTR_ERR(listen_id);
- dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
goto err0;
}
@@ -333,23 +334,17 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
*/
#if IS_ENABLED(CONFIG_IPV6)
ret = rdma_set_afonly(listen_id, 1);
- if (ret) {
- dprintk("svcrdma: rdma_set_afonly failed = %d\n", ret);
+ if (ret)
goto err1;
- }
#endif
ret = rdma_bind_addr(listen_id, sa);
- if (ret) {
- dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret);
+ if (ret)
goto err1;
- }
cma_xprt->sc_cm_id = listen_id;
ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
- if (ret) {
- dprintk("svcrdma: rdma_listen failed = %d\n", ret);
+ if (ret)
goto err1;
- }
/*
* We need to use the address from the cm_id in case the
@@ -405,9 +400,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (!newxprt)
return NULL;
- dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n",
- newxprt, newxprt->sc_cm_id);
-
dev = newxprt->sc_cm_id->device;
newxprt->sc_port_num = newxprt->sc_cm_id->port_num;
@@ -443,21 +435,17 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_pd = ib_alloc_pd(dev, 0);
if (IS_ERR(newxprt->sc_pd)) {
- dprintk("svcrdma: error creating PD for connect request\n");
+ trace_svcrdma_pd_err(newxprt, PTR_ERR(newxprt->sc_pd));
goto errout;
}
newxprt->sc_sq_cq = ib_alloc_cq_any(dev, newxprt, newxprt->sc_sq_depth,
IB_POLL_WORKQUEUE);
- if (IS_ERR(newxprt->sc_sq_cq)) {
- dprintk("svcrdma: error creating SQ CQ for connect request\n");
+ if (IS_ERR(newxprt->sc_sq_cq))
goto errout;
- }
newxprt->sc_rq_cq =
ib_alloc_cq_any(dev, newxprt, rq_depth, IB_POLL_WORKQUEUE);
- if (IS_ERR(newxprt->sc_rq_cq)) {
- dprintk("svcrdma: error creating RQ CQ for connect request\n");
+ if (IS_ERR(newxprt->sc_rq_cq))
goto errout;
- }
memset(&qp_attr, 0, sizeof qp_attr);
qp_attr.event_handler = qp_event_handler;
@@ -481,7 +469,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr);
if (ret) {
- dprintk("svcrdma: failed to create QP, ret=%d\n", ret);
+ trace_svcrdma_qp_err(newxprt, ret);
goto errout;
}
newxprt->sc_qp = newxprt->sc_cm_id->qp;
@@ -489,8 +477,10 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (!(dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
newxprt->sc_snd_w_inv = false;
if (!rdma_protocol_iwarp(dev, newxprt->sc_port_num) &&
- !rdma_ib_or_roce(dev, newxprt->sc_port_num))
+ !rdma_ib_or_roce(dev, newxprt->sc_port_num)) {
+ trace_svcrdma_fabric_err(newxprt, -EINVAL);
goto errout;
+ }
if (!svc_rdma_post_recvs(newxprt))
goto errout;
@@ -512,15 +502,17 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
conn_param.initiator_depth = min_t(int, newxprt->sc_ord,
dev->attrs.max_qp_init_rd_atom);
if (!conn_param.initiator_depth) {
- dprintk("svcrdma: invalid ORD setting\n");
ret = -EINVAL;
+ trace_svcrdma_initdepth_err(newxprt, ret);
goto errout;
}
conn_param.private_data = &pmsg;
conn_param.private_data_len = sizeof(pmsg);
ret = rdma_accept(newxprt->sc_cm_id, &conn_param);
- if (ret)
+ if (ret) {
+ trace_svcrdma_accept_err(newxprt, ret);
goto errout;
+ }
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
dprintk("svcrdma: new connection %p accepted:\n", newxprt);
@@ -535,12 +527,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk(" ord : %d\n", conn_param.initiator_depth);
#endif
- trace_svcrdma_xprt_accept(&newxprt->sc_xprt);
return &newxprt->sc_xprt;
errout:
- dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret);
- trace_svcrdma_xprt_fail(&newxprt->sc_xprt);
/* Take a reference in case the DTO handler runs */
svc_xprt_get(&newxprt->sc_xprt);
if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp))
@@ -578,8 +567,6 @@ static void __svc_rdma_free(struct work_struct *work)
container_of(work, struct svcxprt_rdma, sc_work);
struct svc_xprt *xprt = &rdma->sc_xprt;
- trace_svcrdma_xprt_free(xprt);
-
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
ib_drain_qp(rdma->sc_qp);
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 659da37020a4..0c4af7f5e241 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -68,7 +68,7 @@
* tunables
*/
-unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
+static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR;
@@ -281,8 +281,6 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- trace_xprtrdma_op_destroy(r_xprt);
-
cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
rpcrdma_xprt_disconnect(r_xprt);
@@ -365,10 +363,6 @@ xprt_setup_rdma(struct xprt_create *args)
xprt->max_payload = RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
- dprintk("RPC: %s: %s:%s\n", __func__,
- xprt->address_strings[RPC_DISPLAY_ADDR],
- xprt->address_strings[RPC_DISPLAY_PORT]);
- trace_xprtrdma_create(new_xprt);
return xprt;
}
@@ -385,8 +379,6 @@ void xprt_rdma_close(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- trace_xprtrdma_op_close(r_xprt);
-
rpcrdma_xprt_disconnect(r_xprt);
xprt->reestablish_timeout = 0;
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 05c4d3a9cda2..2ae348377806 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -141,7 +141,6 @@ void rpcrdma_flush_disconnect(struct ib_cq *cq, struct ib_wc *wc)
if (wc->status != IB_WC_SUCCESS &&
r_xprt->rx_ep->re_connect_status == 1) {
r_xprt->rx_ep->re_connect_status = -ECONNABORTED;
- trace_xprtrdma_flush_dct(r_xprt, wc->status);
xprt_force_disconnect(xprt);
}
}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 3a143e250b9a..914508ea9b84 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2528,8 +2528,16 @@ static int bc_sendto(struct rpc_rqst *req)
return sent;
}
-/*
- * The send routine. Borrows from svc_send
+/**
+ * bc_send_request - Send a backchannel Call on a TCP socket
+ * @req: rpc_rqst containing Call message to be sent
+ *
+ * xpt_mutex ensures @rqstp's whole message is written to the socket
+ * without interruption.
+ *
+ * Return values:
+ * %0 if the message was sent successfully
+ * %ENOTCONN if the message was not sent
*/
static int bc_send_request(struct rpc_rqst *req)
{
diff --git a/net/switchdev/Kconfig b/net/switchdev/Kconfig
index 50f21a657007..18a2d980e11d 100644
--- a/net/switchdev/Kconfig
+++ b/net/switchdev/Kconfig
@@ -6,7 +6,7 @@
config NET_SWITCHDEV
bool "Switch (and switch-ish) device support"
depends on INET
- ---help---
+ help
This module provides glue between core networking code and device
drivers in order to support hardware switch chips in very generic
meaning of the word "switch". This include devices supporting L2/L3 but
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index 716b61a701a8..9dd780215eef 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -6,7 +6,7 @@
menuconfig TIPC
tristate "The TIPC Protocol"
depends on INET
- ---help---
+ help
The Transparent Inter Process Communication (TIPC) protocol is
specially designed for intra cluster communication. This protocol
originates from Ericsson where it has been used in carrier grade
@@ -55,6 +55,6 @@ config TIPC_DIAG
tristate "TIPC: socket monitoring interface"
depends on TIPC
default y
- ---help---
+ help
Support for TIPC socket monitoring interface used by ss tool.
If unsure, say Y.
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 34ca7b789eba..e366ec9a7e4d 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -316,7 +316,6 @@ static int tipc_enable_bearer(struct net *net, const char *name,
b->domain = disc_domain;
b->net_plane = bearer_id + 'A';
b->priority = prio;
- test_and_set_bit_lock(0, &b->up);
refcount_set(&b->refcnt, 1);
res = tipc_disc_create(net, b, &b->bcast_addr, &skb);
@@ -326,6 +325,7 @@ static int tipc_enable_bearer(struct net *net, const char *name,
goto rejected;
}
+ test_and_set_bit_lock(0, &b->up);
rcu_assign_pointer(tn->bearer_list[bearer_id], b);
if (skb)
tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr);
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index c0afcd627c5e..01b64869a173 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -221,7 +221,7 @@ int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen,
accounted = skb ? msg_blocks(buf_msg(skb)) : 0;
total = accounted;
- while (rem) {
+ do {
if (!skb || skb->len >= mss) {
skb = tipc_buf_acquire(mss, GFP_KERNEL);
if (unlikely(!skb))
@@ -238,14 +238,14 @@ int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen,
hdr = buf_msg(skb);
curr = msg_blocks(hdr);
mlen = msg_size(hdr);
- cpy = min_t(int, rem, mss - mlen);
+ cpy = min_t(size_t, rem, mss - mlen);
if (cpy != copy_from_iter(skb->data + mlen, cpy, &m->msg_iter))
return -EFAULT;
msg_set_size(hdr, mlen + cpy);
skb_put(skb, cpy);
rem -= cpy;
total += msg_blocks(hdr) - curr;
- }
+ } while (rem > 0);
return total - accounted;
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 26123f4177fd..a94f38333698 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1574,7 +1574,8 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
break;
send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
blocks = tsk->snd_backlog;
- if (tsk->oneway++ >= tsk->nagle_start && send <= maxnagle) {
+ if (tsk->oneway++ >= tsk->nagle_start && maxnagle &&
+ send <= maxnagle) {
rc = tipc_msg_append(hdr, m, send, maxnagle, txq);
if (unlikely(rc < 0))
break;
diff --git a/net/tls/Kconfig b/net/tls/Kconfig
index 61ec78521a60..fa0724fd84b4 100644
--- a/net/tls/Kconfig
+++ b/net/tls/Kconfig
@@ -11,7 +11,7 @@ config TLS
select STREAM_PARSER
select NET_SOCK_MSG
default n
- ---help---
+ help
Enable kernel support for TLS protocol. This allows symmetric
encryption handling of the TLS protocol to be done in-kernel.
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 0e989005bdc2..ec10041c6b7d 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -629,7 +629,7 @@ struct tls_context *tls_ctx_create(struct sock *sk)
static void tls_build_proto(struct sock *sk)
{
int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4;
- const struct proto *prot = READ_ONCE(sk->sk_prot);
+ struct proto *prot = READ_ONCE(sk->sk_prot);
/* Build IPv6 TLS whenever the address of tcpv6 _prot changes */
if (ip_ver == TLSV6 &&
diff --git a/net/unix/Kconfig b/net/unix/Kconfig
index a23a5cca9753..b6c4282899ec 100644
--- a/net/unix/Kconfig
+++ b/net/unix/Kconfig
@@ -5,7 +5,7 @@
config UNIX
tristate "Unix domain sockets"
- ---help---
+ help
If you say Y here, you will include support for Unix domain sockets;
sockets are the standard Unix mechanism for establishing and
accessing network connections. Many commonly used programs such as
@@ -29,6 +29,6 @@ config UNIX_DIAG
tristate "UNIX: socket monitoring interface"
depends on UNIX
default n
- ---help---
+ help
Support for UNIX socket monitoring interface used by the ss tool.
If unsure, say Y.
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 4b8b1150a738..8b65323207db 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -2055,7 +2055,7 @@ static bool vmci_check_transport(struct vsock_sock *vsk)
return vsk->transport == &vmci_transport;
}
-void vmci_vsock_transport_cb(bool is_host)
+static void vmci_vsock_transport_cb(bool is_host)
{
int features;
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 813e93644ae7..faf74850a1b5 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -25,13 +25,13 @@ config CFG80211
# using a different algorithm, though right now they shouldn't
# (this is here rather than below to allow it to be a module)
select CRYPTO_SHA256 if CFG80211_USE_KERNEL_REGDB_KEYS
- ---help---
+ help
cfg80211 is the Linux wireless LAN (802.11) configuration API.
Enable this if you have a wireless device.
For more information refer to documentation on the wireless wiki:
- http://wireless.kernel.org/en/developers/Documentation/cfg80211
+ https://wireless.wiki.kernel.org/en/developers/Documentation/cfg80211
When built as a module it will be called cfg80211.
@@ -71,7 +71,7 @@ config CFG80211_CERTIFICATION_ONUS
bool "cfg80211 certification onus"
depends on EXPERT
default n
- ---help---
+ help
You should disable this option unless you are both capable
and willing to ensure your system will remain regulatory
compliant with the features available under this option.
@@ -124,7 +124,7 @@ config CFG80211_EXTRA_REGDB_KEYDIR
config CFG80211_REG_CELLULAR_HINTS
bool "cfg80211 regulatory support for cellular base station hints"
depends on CFG80211_CERTIFICATION_ONUS
- ---help---
+ help
This option enables support for parsing regulatory hints
from cellular base stations. If enabled and at least one driver
claims support for parsing cellular base station hints the
@@ -137,7 +137,7 @@ config CFG80211_REG_CELLULAR_HINTS
config CFG80211_REG_RELAX_NO_IR
bool "cfg80211 support for NO_IR relaxation"
depends on CFG80211_CERTIFICATION_ONUS
- ---help---
+ help
This option enables support for relaxation of the NO_IR flag for
situations that certain regulatory bodies have provided clarifications
on how relaxation can occur. This feature has an inherent dependency on
@@ -171,7 +171,7 @@ config CFG80211_DEFAULT_PS
config CFG80211_DEBUGFS
bool "cfg80211 DebugFS entries"
depends on DEBUG_FS
- ---help---
+ help
You can enable this if you want debugfs entries for cfg80211.
If unsure, say N.
@@ -228,7 +228,7 @@ config LIB80211_DEBUG
bool "lib80211 debugging messages"
depends on LIB80211
default n
- ---help---
+ help
You can enable this if you want verbose debugging messages
from lib80211.
diff --git a/net/wireless/core.c b/net/wireless/core.c
index f0226ae9561c..c623d9bf5096 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -497,6 +497,8 @@ use_default_name:
INIT_WORK(&rdev->propagate_radar_detect_wk,
cfg80211_propagate_radar_detect_wk);
INIT_WORK(&rdev->propagate_cac_done_wk, cfg80211_propagate_cac_done_wk);
+ INIT_WORK(&rdev->mgmt_registrations_update_wk,
+ cfg80211_mgmt_registrations_update_wk);
#ifdef CONFIG_CFG80211_DEFAULT_PS
rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT;
@@ -1047,6 +1049,7 @@ void wiphy_unregister(struct wiphy *wiphy)
flush_work(&rdev->sched_scan_stop_wk);
flush_work(&rdev->propagate_radar_detect_wk);
flush_work(&rdev->propagate_cac_done_wk);
+ flush_work(&rdev->mgmt_registrations_update_wk);
#ifdef CONFIG_PM
if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup)
@@ -1108,7 +1111,6 @@ static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync)
rdev->devlist_generation++;
cfg80211_mlme_purge_registrations(wdev);
- flush_work(&wdev->mgmt_registrations_update_wk);
switch (wdev->iftype) {
case NL80211_IFTYPE_P2P_DEVICE:
@@ -1253,8 +1255,6 @@ void cfg80211_init_wdev(struct cfg80211_registered_device *rdev,
spin_lock_init(&wdev->event_lock);
INIT_LIST_HEAD(&wdev->mgmt_registrations);
spin_lock_init(&wdev->mgmt_registrations_lock);
- INIT_WORK(&wdev->mgmt_registrations_update_wk,
- cfg80211_mgmt_registrations_update_wk);
INIT_LIST_HEAD(&wdev->pmsr_list);
spin_lock_init(&wdev->pmsr_lock);
INIT_WORK(&wdev->pmsr_free_wk, cfg80211_pmsr_free_wk);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index e0e5b3ee9699..67b0389fca4d 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -99,6 +99,8 @@ struct cfg80211_registered_device {
struct cfg80211_chan_def cac_done_chandef;
struct work_struct propagate_cac_done_wk;
+ struct work_struct mgmt_registrations_update_wk;
+
/* must be last because of the way we do wiphy_priv(),
* and it should at least be aligned to NETDEV_ALIGN */
struct wiphy wiphy __aligned(NETDEV_ALIGN);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 189334314cba..a6c61a2e6569 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -440,9 +440,15 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev)
ASSERT_RTNL();
+ spin_lock_bh(&wdev->mgmt_registrations_lock);
+ if (!wdev->mgmt_registrations_need_update) {
+ spin_unlock_bh(&wdev->mgmt_registrations_lock);
+ return;
+ }
+
rcu_read_lock();
list_for_each_entry_rcu(tmp, &rdev->wiphy.wdev_list, list) {
- list_for_each_entry_rcu(reg, &tmp->mgmt_registrations, list) {
+ list_for_each_entry(reg, &tmp->mgmt_registrations, list) {
u32 mask = BIT(le16_to_cpu(reg->frame_type) >> 4);
u32 mcast_mask = 0;
@@ -460,16 +466,23 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev)
}
rcu_read_unlock();
+ wdev->mgmt_registrations_need_update = 0;
+ spin_unlock_bh(&wdev->mgmt_registrations_lock);
+
rdev_update_mgmt_frame_registrations(rdev, wdev, &upd);
}
void cfg80211_mgmt_registrations_update_wk(struct work_struct *wk)
{
- struct wireless_dev *wdev = container_of(wk, struct wireless_dev,
- mgmt_registrations_update_wk);
+ struct cfg80211_registered_device *rdev;
+ struct wireless_dev *wdev;
+
+ rdev = container_of(wk, struct cfg80211_registered_device,
+ mgmt_registrations_update_wk);
rtnl_lock();
- cfg80211_mgmt_registrations_update(wdev);
+ list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list)
+ cfg80211_mgmt_registrations_update(wdev);
rtnl_unlock();
}
@@ -557,6 +570,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
nreg->multicast_rx = multicast_rx;
list_add(&nreg->list, &wdev->mgmt_registrations);
}
+ wdev->mgmt_registrations_need_update = 1;
spin_unlock_bh(&wdev->mgmt_registrations_lock);
cfg80211_mgmt_registrations_update(wdev);
@@ -585,7 +599,8 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
list_del(&reg->list);
kfree(reg);
- schedule_work(&wdev->mgmt_registrations_update_wk);
+ wdev->mgmt_registrations_need_update = 1;
+ schedule_work(&rdev->mgmt_registrations_update_wk);
}
spin_unlock_bh(&wdev->mgmt_registrations_lock);
@@ -608,6 +623,7 @@ void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev)
list_del(&reg->list);
kfree(reg);
}
+ wdev->mgmt_registrations_need_update = 1;
spin_unlock_bh(&wdev->mgmt_registrations_lock);
cfg80211_mgmt_registrations_update(wdev);
diff --git a/net/x25/Kconfig b/net/x25/Kconfig
index 9f0d58b0b90b..e3ed23245a82 100644
--- a/net/x25/Kconfig
+++ b/net/x25/Kconfig
@@ -5,7 +5,7 @@
config X25
tristate "CCITT X.25 Packet Layer"
- ---help---
+ help
X.25 is a set of standardized network protocols, similar in scope to
frame relay; the one physical line from your box to the X.25 network
entry point can carry several logical point-to-point connections
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 1bbaf1747e4f..e97db37354e4 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -254,10 +254,10 @@ static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
if (!umem->pgs)
return -ENOMEM;
- down_read(&current->mm->mmap_sem);
+ mmap_read_lock(current->mm);
npgs = pin_user_pages(address, umem->npgs,
gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL);
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(current->mm);
if (npgs != umem->npgs) {
if (npgs >= 0) {
@@ -336,7 +336,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if ((addr + size) < addr)
return -EINVAL;
- npgs = div_u64(size, PAGE_SIZE);
+ npgs = size >> PAGE_SHIFT;
if (npgs > U32_MAX)
return -EINVAL;
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index b6c0f08bd80d..3700266229f6 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -352,10 +352,8 @@ static int xsk_generic_xmit(struct sock *sk)
len = desc.len;
skb = sock_alloc_send_skb(sk, len, 1, &err);
- if (unlikely(!skb)) {
- err = -EAGAIN;
+ if (unlikely(!skb))
goto out;
- }
skb_put(skb, len);
addr = desc.addr;
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index e77ba529229c..5b9a5ab48111 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -22,7 +22,7 @@ if INET
config XFRM_USER
tristate "Transformation user configuration interface"
select XFRM_ALGO
- ---help---
+ help
Support for Transformation(XFRM) user configuration interface
like IPsec used by native Linux tools.
@@ -31,7 +31,7 @@ config XFRM_USER
config XFRM_INTERFACE
tristate "Transformation virtual interface"
depends on XFRM && IPV6
- ---help---
+ help
This provides a virtual interface to route IPsec traffic.
If unsure, say N.
@@ -39,7 +39,7 @@ config XFRM_INTERFACE
config XFRM_SUB_POLICY
bool "Transformation sub policy support"
depends on XFRM
- ---help---
+ help
Support sub policy for developers. By using sub policy with main
one, two policies can be applied to the same packet at once.
Policy which lives shorter time in kernel should be a sub.
@@ -49,7 +49,7 @@ config XFRM_SUB_POLICY
config XFRM_MIGRATE
bool "Transformation migrate database"
depends on XFRM
- ---help---
+ help
A feature to update locator(s) of a given IPsec security
association dynamically. This feature is required, for
instance, in a Mobile IPv6 environment with IPsec configuration
@@ -60,7 +60,7 @@ config XFRM_MIGRATE
config XFRM_STATISTICS
bool "Transformation statistics"
depends on XFRM && PROC_FS
- ---help---
+ help
This statistics is not a SNMP/MIB specification but shows
statistics about transformation error (or almost error) factor
at packet processing for developer.
@@ -100,7 +100,7 @@ config XFRM_IPCOMP
config NET_KEY
tristate "PF_KEY sockets"
select XFRM_ALGO
- ---help---
+ help
PF_KEYv2 socket family, compatible to KAME ones.
They are required if you are going to use IPsec tools ported
from KAME.
@@ -111,7 +111,7 @@ config NET_KEY_MIGRATE
bool "PF_KEY MIGRATE"
depends on NET_KEY
select XFRM_MIGRATE
- ---help---
+ help
Add a PF_KEY MIGRATE message to PF_KEYv2 socket family.
The PF_KEY MIGRATE message is used to dynamically update
locator(s) of a given IPsec security association.