summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorPablo Neira Ayuso <pablo@netfilter.org>2012-10-17 10:59:20 +0200
committerPablo Neira Ayuso <pablo@netfilter.org>2012-10-17 10:59:20 +0200
commit0b4f5b1d6385826093dc3cd9035b186f0d77a5dc (patch)
treee7602057216d3de837995b267a281ae09d899a26 /net
parent939ccba437da1726a5c8a5b702a47d473da927ae (diff)
parent9f0d3c2781baa1102108e16efbe640dd74564a7c (diff)
downloadlwn-0b4f5b1d6385826093dc3cd9035b186f0d77a5dc.tar.gz
lwn-0b4f5b1d6385826093dc3cd9035b186f0d77a5dc.zip
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
To obtain new flag FLOWI_FLAG_KNOWN_NH to fix netfilter's xt_TEE target.
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_core.c19
-rw-r--r--net/9p/client.c18
-rw-r--r--net/9p/trans_fd.c54
-rw-r--r--net/bluetooth/af_bluetooth.c2
-rw-r--r--net/bridge/br_netfilter.c3
-rw-r--r--net/can/af_can.c2
-rw-r--r--net/can/bcm.c2
-rw-r--r--net/can/gw.c2
-rw-r--r--net/can/raw.c2
-rw-r--r--net/ceph/crypto.c9
-rw-r--r--net/ceph/mon_client.c7
-rw-r--r--net/ceph/osd_client.c48
-rw-r--r--net/ceph/osdmap.c18
-rw-r--r--net/ceph/pagelist.c5
-rw-r--r--net/compat.c3
-rw-r--r--net/core/dev.c59
-rw-r--r--net/core/neighbour.c6
-rw-r--r--net/core/netprio_cgroup.c38
-rw-r--r--net/core/pktgen.c146
-rw-r--r--net/core/scm.c3
-rw-r--r--net/core/skbuff.c47
-rw-r--r--net/core/utils.c24
-rw-r--r--net/decnet/dn_rules.c2
-rw-r--r--net/dns_resolver/dns_key.c6
-rw-r--r--net/ipv4/fib_frontend.c3
-rw-r--r--net/ipv4/fib_rules.c2
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/inet_connection_sock.c4
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_output.c4
-rw-r--r--net/ipv4/ip_vti.c4
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/route.c148
-rw-r--r--net/ipv4/sysctl_net_ipv4.c2
-rw-r--r--net/ipv4/tcp_ipv4.c7
-rw-r--r--net/ipv4/xfrm4_policy.c1
-rw-r--r--net/ipv6/addrconf.c15
-rw-r--r--net/ipv6/addrlabel.c2
-rw-r--r--net/ipv6/af_inet6.c22
-rw-r--r--net/ipv6/fib6_rules.c2
-rw-r--r--net/ipv6/ip6mr.c2
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/mac80211/mesh_sync.c3
-rw-r--r--net/mac80211/status.c4
-rw-r--r--net/mac80211/tx.c22
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c6
-rw-r--r--net/netlink/af_netlink.c29
-rw-r--r--net/rds/send.c2
-rw-r--r--net/rxrpc/ar-key.c40
-rw-r--r--net/sctp/sm_sideeffect.c3
-rw-r--r--net/sctp/socket.c25
-rw-r--r--net/socket.c68
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c58
-rw-r--r--net/sunrpc/clnt.c105
-rw-r--r--net/sunrpc/rpc_pipe.c8
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/sunrpc/svc_xprt.c233
-rw-r--r--net/sunrpc/svcsock.c157
-rw-r--r--net/sunrpc/xdr.c21
-rw-r--r--net/sunrpc/xprt.c8
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c4
-rw-r--r--net/sunrpc/xprtrdma/transport.c22
-rw-r--r--net/sunrpc/xprtsock.c40
63 files changed, 816 insertions, 796 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index add69d0fd99d..65e06abe023f 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -5,7 +5,7 @@
#include <linux/export.h>
#include "vlan.h"
-bool vlan_do_receive(struct sk_buff **skbp, bool last_handler)
+bool vlan_do_receive(struct sk_buff **skbp)
{
struct sk_buff *skb = *skbp;
u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
@@ -13,14 +13,8 @@ bool vlan_do_receive(struct sk_buff **skbp, bool last_handler)
struct vlan_pcpu_stats *rx_stats;
vlan_dev = vlan_find_dev(skb->dev, vlan_id);
- if (!vlan_dev) {
- /* Only the last call to vlan_do_receive() should change
- * pkt_type to PACKET_OTHERHOST
- */
- if (vlan_id && last_handler)
- skb->pkt_type = PACKET_OTHERHOST;
+ if (!vlan_dev)
return false;
- }
skb = *skbp = skb_share_check(skb, GFP_ATOMIC);
if (unlikely(!skb))
@@ -372,6 +366,13 @@ EXPORT_SYMBOL(vlan_vids_del_by_dev);
bool vlan_uses_dev(const struct net_device *dev)
{
- return rtnl_dereference(dev->vlan_info) ? true : false;
+ struct vlan_info *vlan_info;
+
+ ASSERT_RTNL();
+
+ vlan_info = rtnl_dereference(dev->vlan_info);
+ if (!vlan_info)
+ return false;
+ return vlan_info->grp.nr_vlan_devs ? true : false;
}
EXPORT_SYMBOL(vlan_uses_dev);
diff --git a/net/9p/client.c b/net/9p/client.c
index 8260f132b32e..34d417670935 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -76,6 +76,20 @@ inline int p9_is_proto_dotu(struct p9_client *clnt)
}
EXPORT_SYMBOL(p9_is_proto_dotu);
+/*
+ * Some error codes are taken directly from the server replies,
+ * make sure they are valid.
+ */
+static int safe_errno(int err)
+{
+ if ((err > 0) || (err < -MAX_ERRNO)) {
+ p9_debug(P9_DEBUG_ERROR, "Invalid error code %d\n", err);
+ return -EPROTO;
+ }
+ return err;
+}
+
+
/* Interpret mount option for protocol version */
static int get_protocol_version(char *s)
{
@@ -782,7 +796,7 @@ again:
return req;
reterr:
p9_free_req(c, req);
- return ERR_PTR(err);
+ return ERR_PTR(safe_errno(err));
}
/**
@@ -865,7 +879,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
return req;
reterr:
p9_free_req(c, req);
- return ERR_PTR(err);
+ return ERR_PTR(safe_errno(err));
}
static struct p9_fid *p9_fid_create(struct p9_client *clnt)
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 505f0ce3f10b..02efb25c2957 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -316,8 +316,7 @@ static void p9_read_work(struct work_struct *work)
m->rsize - m->rpos);
p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err);
if (err == -EAGAIN) {
- clear_bit(Rworksched, &m->wsched);
- return;
+ goto end_clear;
}
if (err <= 0)
@@ -379,19 +378,20 @@ static void p9_read_work(struct work_struct *work)
m->req = NULL;
}
+end_clear:
+ clear_bit(Rworksched, &m->wsched);
+
if (!list_empty(&m->req_list)) {
if (test_and_clear_bit(Rpending, &m->wsched))
n = POLLIN;
else
n = p9_fd_poll(m->client, NULL);
- if (n & POLLIN) {
+ if ((n & POLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) {
p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m);
schedule_work(&m->rq);
- } else
- clear_bit(Rworksched, &m->wsched);
- } else
- clear_bit(Rworksched, &m->wsched);
+ }
+ }
return;
error:
@@ -453,12 +453,13 @@ static void p9_write_work(struct work_struct *work)
}
if (!m->wsize) {
+ spin_lock(&m->client->lock);
if (list_empty(&m->unsent_req_list)) {
clear_bit(Wworksched, &m->wsched);
+ spin_unlock(&m->client->lock);
return;
}
- spin_lock(&m->client->lock);
req = list_entry(m->unsent_req_list.next, struct p9_req_t,
req_list);
req->status = REQ_STATUS_SENT;
@@ -476,10 +477,9 @@ static void p9_write_work(struct work_struct *work)
clear_bit(Wpending, &m->wsched);
err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos);
p9_debug(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err);
- if (err == -EAGAIN) {
- clear_bit(Wworksched, &m->wsched);
- return;
- }
+ if (err == -EAGAIN)
+ goto end_clear;
+
if (err < 0)
goto error;
@@ -492,19 +492,21 @@ static void p9_write_work(struct work_struct *work)
if (m->wpos == m->wsize)
m->wpos = m->wsize = 0;
- if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) {
+end_clear:
+ clear_bit(Wworksched, &m->wsched);
+
+ if (m->wsize || !list_empty(&m->unsent_req_list)) {
if (test_and_clear_bit(Wpending, &m->wsched))
n = POLLOUT;
else
n = p9_fd_poll(m->client, NULL);
- if (n & POLLOUT) {
+ if ((n & POLLOUT) &&
+ !test_and_set_bit(Wworksched, &m->wsched)) {
p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m);
schedule_work(&m->wq);
- } else
- clear_bit(Wworksched, &m->wsched);
- } else
- clear_bit(Wworksched, &m->wsched);
+ }
+ }
return;
@@ -793,30 +795,28 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
static int p9_socket_open(struct p9_client *client, struct socket *csocket)
{
struct p9_trans_fd *p;
- int ret, fd;
+ struct file *file;
+ int ret;
p = kmalloc(sizeof(struct p9_trans_fd), GFP_KERNEL);
if (!p)
return -ENOMEM;
csocket->sk->sk_allocation = GFP_NOIO;
- fd = sock_map_fd(csocket, 0);
- if (fd < 0) {
+ file = sock_alloc_file(csocket, 0, NULL);
+ if (IS_ERR(file)) {
pr_err("%s (%d): failed to map fd\n",
__func__, task_pid_nr(current));
sock_release(csocket);
kfree(p);
- return fd;
+ return PTR_ERR(file);
}
- get_file(csocket->file);
- get_file(csocket->file);
- p->wr = p->rd = csocket->file;
+ get_file(file);
+ p->wr = p->rd = file;
client->trans = p;
client->status = Connected;
- sys_close(fd); /* still racy */
-
p->rd->f_flags |= O_NONBLOCK;
p->conn = p9_conn_create(client);
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 9d49ee6d7219..ba033f09196e 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -591,7 +591,7 @@ static int bt_seq_show(struct seq_file *seq, void *v)
atomic_read(&sk->sk_refcnt),
sk_rmem_alloc_get(sk),
sk_wmem_alloc_get(sk),
- sock_i_uid(sk),
+ from_kuid(seq_user_ns(seq), sock_i_uid(sk)),
sock_i_ino(sk),
&src_baswapped,
&dst_baswapped,
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 68e8f364bbf8..fe43bc7b063f 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -265,6 +265,9 @@ static int br_parse_ip_options(struct sk_buff *skb)
struct net_device *dev = skb->dev;
u32 len;
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ goto inhdr_error;
+
iph = ip_hdr(skb);
opt = &(IPCB(skb)->opt);
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 821022a7214f..ddac1ee2ed20 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -63,7 +63,7 @@
#include "af_can.h"
-static __initdata const char banner[] = KERN_INFO
+static __initconst const char banner[] = KERN_INFO
"can: controller area network core (" CAN_VERSION_STRING ")\n";
MODULE_DESCRIPTION("Controller Area Network PF_CAN core");
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 151b7730c12c..6f747582718e 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -77,7 +77,7 @@
(CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG))
#define CAN_BCM_VERSION CAN_VERSION
-static __initdata const char banner[] = KERN_INFO
+static __initconst const char banner[] = KERN_INFO
"can: broadcast manager protocol (rev " CAN_BCM_VERSION " t)\n";
MODULE_DESCRIPTION("PF_CAN broadcast manager protocol");
diff --git a/net/can/gw.c b/net/can/gw.c
index 127879c55fb6..1f5c9785a262 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -58,7 +58,7 @@
#include <net/sock.h>
#define CAN_GW_VERSION "20101209"
-static __initdata const char banner[] =
+static __initconst const char banner[] =
KERN_INFO "can: netlink gateway (rev " CAN_GW_VERSION ")\n";
MODULE_DESCRIPTION("PF_CAN netlink gateway");
diff --git a/net/can/raw.c b/net/can/raw.c
index 3e9c89356a93..5b0e3e330d97 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -55,7 +55,7 @@
#include <net/net_namespace.h>
#define CAN_RAW_VERSION CAN_VERSION
-static __initdata const char banner[] =
+static __initconst const char banner[] =
KERN_INFO "can: raw protocol (rev " CAN_RAW_VERSION ")\n";
MODULE_DESCRIPTION("PF_CAN raw protocol");
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 9da7fdd3cd8a..af14cb425164 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -423,14 +423,15 @@ int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len,
}
}
-int ceph_key_instantiate(struct key *key, const void *data, size_t datalen)
+int ceph_key_instantiate(struct key *key, struct key_preparsed_payload *prep)
{
struct ceph_crypto_key *ckey;
+ size_t datalen = prep->datalen;
int ret;
void *p;
ret = -EINVAL;
- if (datalen <= 0 || datalen > 32767 || !data)
+ if (datalen <= 0 || datalen > 32767 || !prep->data)
goto err;
ret = key_payload_reserve(key, datalen);
@@ -443,8 +444,8 @@ int ceph_key_instantiate(struct key *key, const void *data, size_t datalen)
goto err;
/* TODO ceph_crypto_key_decode should really take const input */
- p = (void *)data;
- ret = ceph_crypto_key_decode(ckey, &p, (char*)data+datalen);
+ p = (void *)prep->data;
+ ret = ceph_crypto_key_decode(ckey, &p, (char*)prep->data+datalen);
if (ret < 0)
goto err_ckey;
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 900ea0f043fc..812eb3b46c1f 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -637,7 +637,7 @@ bad:
/*
* Do a synchronous pool op.
*/
-int ceph_monc_do_poolop(struct ceph_mon_client *monc, u32 op,
+static int do_poolop(struct ceph_mon_client *monc, u32 op,
u32 pool, u64 snapid,
char *buf, int len)
{
@@ -687,7 +687,7 @@ out:
int ceph_monc_create_snapid(struct ceph_mon_client *monc,
u32 pool, u64 *snapid)
{
- return ceph_monc_do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP,
+ return do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP,
pool, 0, (char *)snapid, sizeof(*snapid));
}
@@ -696,7 +696,7 @@ EXPORT_SYMBOL(ceph_monc_create_snapid);
int ceph_monc_delete_snapid(struct ceph_mon_client *monc,
u32 pool, u64 snapid)
{
- return ceph_monc_do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP,
+ return do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP,
pool, snapid, 0, 0);
}
@@ -769,7 +769,6 @@ static int build_initial_monmap(struct ceph_mon_client *monc)
monc->monmap->mon_inst[i].name.num = cpu_to_le64(i);
}
monc->monmap->num_mon = num_mon;
- monc->have_fsid = false;
return 0;
}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 42119c05e82c..c1d756cc7448 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -52,7 +52,7 @@ static int op_has_extent(int op)
op == CEPH_OSD_OP_WRITE);
}
-void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
+int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
struct ceph_file_layout *layout,
u64 snapid,
u64 off, u64 *plen, u64 *bno,
@@ -62,12 +62,15 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
u64 orig_len = *plen;
u64 objoff, objlen; /* extent in object */
+ int r;
reqhead->snapid = cpu_to_le64(snapid);
/* object extent? */
- ceph_calc_file_object_mapping(layout, off, plen, bno,
- &objoff, &objlen);
+ r = ceph_calc_file_object_mapping(layout, off, plen, bno,
+ &objoff, &objlen);
+ if (r < 0)
+ return r;
if (*plen < orig_len)
dout(" skipping last %llu, final file extent %llu~%llu\n",
orig_len - *plen, off, *plen);
@@ -83,7 +86,7 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
dout("calc_layout bno=%llx %llu~%llu (%d pages)\n",
*bno, objoff, objlen, req->r_num_pages);
-
+ return 0;
}
EXPORT_SYMBOL(ceph_calc_raw_layout);
@@ -112,20 +115,25 @@ EXPORT_SYMBOL(ceph_calc_raw_layout);
*
* fill osd op in request message.
*/
-static void calc_layout(struct ceph_osd_client *osdc,
- struct ceph_vino vino,
- struct ceph_file_layout *layout,
- u64 off, u64 *plen,
- struct ceph_osd_request *req,
- struct ceph_osd_req_op *op)
+static int calc_layout(struct ceph_osd_client *osdc,
+ struct ceph_vino vino,
+ struct ceph_file_layout *layout,
+ u64 off, u64 *plen,
+ struct ceph_osd_request *req,
+ struct ceph_osd_req_op *op)
{
u64 bno;
+ int r;
- ceph_calc_raw_layout(osdc, layout, vino.snap, off,
- plen, &bno, req, op);
+ r = ceph_calc_raw_layout(osdc, layout, vino.snap, off,
+ plen, &bno, req, op);
+ if (r < 0)
+ return r;
snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno);
req->r_oid_len = strlen(req->r_oid);
+
+ return r;
}
/*
@@ -213,7 +221,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
kref_init(&req->r_kref);
init_completion(&req->r_completion);
init_completion(&req->r_safe_completion);
- rb_init_node(&req->r_node);
INIT_LIST_HEAD(&req->r_unsafe_item);
INIT_LIST_HEAD(&req->r_linger_item);
INIT_LIST_HEAD(&req->r_linger_osd);
@@ -456,6 +463,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
{
struct ceph_osd_req_op ops[3];
struct ceph_osd_request *req;
+ int r;
ops[0].op = opcode;
ops[0].extent.truncate_seq = truncate_seq;
@@ -474,10 +482,12 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
use_mempool,
GFP_NOFS, NULL, NULL);
if (!req)
- return NULL;
+ return ERR_PTR(-ENOMEM);
/* calculate max write size */
- calc_layout(osdc, vino, layout, off, plen, req, ops);
+ r = calc_layout(osdc, vino, layout, off, plen, req, ops);
+ if (r < 0)
+ return ERR_PTR(r);
req->r_file_layout = *layout; /* keep a copy */
/* in case it differs from natural (file) alignment that
@@ -1920,8 +1930,8 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
NULL, 0, truncate_seq, truncate_size, NULL,
false, 1, page_align);
- if (!req)
- return -ENOMEM;
+ if (IS_ERR(req))
+ return PTR_ERR(req);
/* it may be a short read due to an object boundary */
req->r_pages = pages;
@@ -1963,8 +1973,8 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
snapc, do_sync,
truncate_seq, truncate_size, mtime,
nofail, 1, page_align);
- if (!req)
- return -ENOMEM;
+ if (IS_ERR(req))
+ return PTR_ERR(req);
/* it may be a short write due to an object boundary */
req->r_pages = pages;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 3124b71a8883..5433fb0eb3c6 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -984,7 +984,7 @@ bad:
* for now, we write only a single su, until we can
* pass a stride back to the caller.
*/
-void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
+int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
u64 off, u64 *plen,
u64 *ono,
u64 *oxoff, u64 *oxlen)
@@ -998,11 +998,17 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
dout("mapping %llu~%llu osize %u fl_su %u\n", off, *plen,
osize, su);
+ if (su == 0 || sc == 0)
+ goto invalid;
su_per_object = osize / su;
+ if (su_per_object == 0)
+ goto invalid;
dout("osize %u / su %u = su_per_object %u\n", osize, su,
su_per_object);
- BUG_ON((su & ~PAGE_MASK) != 0);
+ if ((su & ~PAGE_MASK) != 0)
+ goto invalid;
+
/* bl = *off / su; */
t = off;
do_div(t, su);
@@ -1030,6 +1036,14 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
*plen = *oxlen;
dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
+ return 0;
+
+invalid:
+ dout(" invalid layout\n");
+ *ono = 0;
+ *oxoff = 0;
+ *oxlen = 0;
+ return -EINVAL;
}
EXPORT_SYMBOL(ceph_calc_file_object_mapping);
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index 665cd23020ff..92866bebb65f 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -1,4 +1,3 @@
-
#include <linux/module.h>
#include <linux/gfp.h>
#include <linux/pagemap.h>
@@ -134,8 +133,8 @@ int ceph_pagelist_truncate(struct ceph_pagelist *pl,
ceph_pagelist_unmap_tail(pl);
while (pl->head.prev != c->page_lru) {
page = list_entry(pl->head.prev, struct page, lru);
- list_del(&page->lru); /* remove from pagelist */
- list_add_tail(&page->lru, &pl->free_list); /* add to reserve */
+ /* move from pagelist to reserve */
+ list_move_tail(&page->lru, &pl->free_list);
++pl->num_pages_free;
}
pl->room = c->room;
diff --git a/net/compat.c b/net/compat.c
index 74ed1d7a84a2..79ae88485001 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -301,8 +301,7 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm)
break;
}
/* Bump the usage count and install the file. */
- get_file(fp[i]);
- fd_install(new_fd, fp[i]);
+ fd_install(new_fd, get_file(fp[i]));
}
if (i > 0) {
diff --git a/net/core/dev.c b/net/core/dev.c
index 1e0a1847c3bb..09cb3f6dc40c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3300,18 +3300,18 @@ ncls:
&& !skb_pfmemalloc_protocol(skb))
goto drop;
- rx_handler = rcu_dereference(skb->dev->rx_handler);
if (vlan_tx_tag_present(skb)) {
if (pt_prev) {
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL;
}
- if (vlan_do_receive(&skb, !rx_handler))
+ if (vlan_do_receive(&skb))
goto another_round;
else if (unlikely(!skb))
goto unlock;
}
+ rx_handler = rcu_dereference(skb->dev->rx_handler);
if (rx_handler) {
if (pt_prev) {
ret = deliver_skb(skb, pt_prev, orig_dev);
@@ -3331,6 +3331,9 @@ ncls:
}
}
+ if (vlan_tx_nonzero_tag_present(skb))
+ skb->pkt_type = PACKET_OTHERHOST;
+
/* deliver only exact match when indicated */
null_or_dev = deliver_exact ? skb->dev : NULL;
@@ -3471,17 +3474,31 @@ out:
return netif_receive_skb(skb);
}
-inline void napi_gro_flush(struct napi_struct *napi)
+/* napi->gro_list contains packets ordered by age.
+ * youngest packets at the head of it.
+ * Complete skbs in reverse order to reduce latencies.
+ */
+void napi_gro_flush(struct napi_struct *napi, bool flush_old)
{
- struct sk_buff *skb, *next;
+ struct sk_buff *skb, *prev = NULL;
- for (skb = napi->gro_list; skb; skb = next) {
- next = skb->next;
+ /* scan list and build reverse chain */
+ for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
+ skb->prev = prev;
+ prev = skb;
+ }
+
+ for (skb = prev; skb; skb = prev) {
skb->next = NULL;
+
+ if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
+ return;
+
+ prev = skb->prev;
napi_gro_complete(skb);
+ napi->gro_count--;
}
- napi->gro_count = 0;
napi->gro_list = NULL;
}
EXPORT_SYMBOL(napi_gro_flush);
@@ -3542,6 +3559,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
napi->gro_count++;
NAPI_GRO_CB(skb)->count = 1;
+ NAPI_GRO_CB(skb)->age = jiffies;
skb_shinfo(skb)->gso_size = skb_gro_len(skb);
skb->next = napi->gro_list;
napi->gro_list = skb;
@@ -3631,20 +3649,22 @@ gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
}
EXPORT_SYMBOL(napi_skb_finish);
-void skb_gro_reset_offset(struct sk_buff *skb)
+static void skb_gro_reset_offset(struct sk_buff *skb)
{
+ const struct skb_shared_info *pinfo = skb_shinfo(skb);
+ const skb_frag_t *frag0 = &pinfo->frags[0];
+
NAPI_GRO_CB(skb)->data_offset = 0;
NAPI_GRO_CB(skb)->frag0 = NULL;
NAPI_GRO_CB(skb)->frag0_len = 0;
if (skb->mac_header == skb->tail &&
- !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) {
- NAPI_GRO_CB(skb)->frag0 =
- skb_frag_address(&skb_shinfo(skb)->frags[0]);
- NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]);
+ pinfo->nr_frags &&
+ !PageHighMem(skb_frag_page(frag0))) {
+ NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
+ NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
}
}
-EXPORT_SYMBOL(skb_gro_reset_offset);
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
@@ -3876,7 +3896,7 @@ void napi_complete(struct napi_struct *n)
if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
return;
- napi_gro_flush(n);
+ napi_gro_flush(n, false);
local_irq_save(flags);
__napi_complete(n);
local_irq_restore(flags);
@@ -3981,8 +4001,17 @@ static void net_rx_action(struct softirq_action *h)
local_irq_enable();
napi_complete(n);
local_irq_disable();
- } else
+ } else {
+ if (n->gro_list) {
+ /* flush too old packets
+ * If HZ < 1000, flush all packets.
+ */
+ local_irq_enable();
+ napi_gro_flush(n, HZ >= 1000);
+ local_irq_disable();
+ }
list_move_tail(&n->poll_list, &sd->poll_list);
+ }
}
netpoll_poll_unlock(have);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index baca771caae2..22571488730a 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1301,8 +1301,6 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
if (!dst)
goto discard;
- __skb_pull(skb, skb_network_offset(skb));
-
if (!neigh_event_send(neigh, skb)) {
int err;
struct net_device *dev = neigh->dev;
@@ -1312,6 +1310,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
neigh_hh_init(neigh, dst);
do {
+ __skb_pull(skb, skb_network_offset(skb));
seq = read_seqbegin(&neigh->ha_lock);
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
neigh->ha, NULL, skb->len);
@@ -1342,9 +1341,8 @@ int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
unsigned int seq;
int err;
- __skb_pull(skb, skb_network_offset(skb));
-
do {
+ __skb_pull(skb, skb_network_offset(skb));
seq = read_seqbegin(&neigh->ha_lock);
err = dev_hard_header(skb, dev, ntohs(skb->protocol),
neigh->ha, NULL, skb->len);
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 4a83fb3c8e87..79285a36035f 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -239,38 +239,24 @@ out_free_devname:
return ret;
}
+static int update_netprio(const void *v, struct file *file, unsigned n)
+{
+ int err;
+ struct socket *sock = sock_from_file(file, &err);
+ if (sock)
+ sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v;
+ return 0;
+}
+
void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
{
struct task_struct *p;
+ void *v;
cgroup_taskset_for_each(p, cgrp, tset) {
- unsigned int fd;
- struct fdtable *fdt;
- struct files_struct *files;
-
task_lock(p);
- files = p->files;
- if (!files) {
- task_unlock(p);
- continue;
- }
-
- spin_lock(&files->file_lock);
- fdt = files_fdtable(files);
- for (fd = 0; fd < fdt->max_fds; fd++) {
- struct file *file;
- struct socket *sock;
- int err;
-
- file = fcheck_files(files, fd);
- if (!file)
- continue;
-
- sock = sock_from_file(file, &err);
- if (sock)
- sock_update_netprioidx(sock->sk, p);
- }
- spin_unlock(&files->file_lock);
+ v = (void *)(unsigned long)task_netprioidx(p);
+ iterate_fd(p->files, 0, update_netprio, v);
task_unlock(p);
}
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 148e73d2c451..d1dc14c2aac4 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -248,8 +248,8 @@ struct pktgen_dev {
int removal_mark; /* non-zero => the device is marked for
* removal by worker thread */
- int min_pkt_size; /* = ETH_ZLEN; */
- int max_pkt_size; /* = ETH_ZLEN; */
+ int min_pkt_size;
+ int max_pkt_size;
int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */
int nfrags;
struct page *page;
@@ -449,8 +449,6 @@ static void pktgen_stop_all_threads_ifs(void);
static void pktgen_stop(struct pktgen_thread *t);
static void pktgen_clear_counters(struct pktgen_dev *pkt_dev);
-static unsigned int scan_ip6(const char *s, char ip[16]);
-
/* Module parameters, defaults. */
static int pg_count_d __read_mostly = 1000;
static int pg_delay_d __read_mostly;
@@ -702,8 +700,8 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
&pkt_dev->cur_in6_saddr,
&pkt_dev->cur_in6_daddr);
} else
- seq_printf(seq, " cur_saddr: 0x%x cur_daddr: 0x%x\n",
- pkt_dev->cur_saddr, pkt_dev->cur_daddr);
+ seq_printf(seq, " cur_saddr: %pI4 cur_daddr: %pI4\n",
+ &pkt_dev->cur_saddr, &pkt_dev->cur_daddr);
seq_printf(seq, " cur_udp_dst: %d cur_udp_src: %d\n",
pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src);
@@ -1299,7 +1297,7 @@ static ssize_t pktgen_if_write(struct file *file,
return -EFAULT;
buf[len] = 0;
- scan_ip6(buf, pkt_dev->in6_daddr.s6_addr);
+ in6_pton(buf, -1, pkt_dev->in6_daddr.s6_addr, -1, NULL);
snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_daddr);
pkt_dev->cur_in6_daddr = pkt_dev->in6_daddr;
@@ -1322,7 +1320,7 @@ static ssize_t pktgen_if_write(struct file *file,
return -EFAULT;
buf[len] = 0;
- scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr);
+ in6_pton(buf, -1, pkt_dev->min_in6_daddr.s6_addr, -1, NULL);
snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->min_in6_daddr);
pkt_dev->cur_in6_daddr = pkt_dev->min_in6_daddr;
@@ -1344,7 +1342,7 @@ static ssize_t pktgen_if_write(struct file *file,
return -EFAULT;
buf[len] = 0;
- scan_ip6(buf, pkt_dev->max_in6_daddr.s6_addr);
+ in6_pton(buf, -1, pkt_dev->max_in6_daddr.s6_addr, -1, NULL);
snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->max_in6_daddr);
if (debug)
@@ -1365,7 +1363,7 @@ static ssize_t pktgen_if_write(struct file *file,
return -EFAULT;
buf[len] = 0;
- scan_ip6(buf, pkt_dev->in6_saddr.s6_addr);
+ in6_pton(buf, -1, pkt_dev->in6_saddr.s6_addr, -1, NULL);
snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_saddr);
pkt_dev->cur_in6_saddr = pkt_dev->in6_saddr;
@@ -2036,19 +2034,17 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
/* Set up Dest MAC */
memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, ETH_ALEN);
- /* Set up pkt size */
- pkt_dev->cur_pkt_size = pkt_dev->min_pkt_size;
-
if (pkt_dev->flags & F_IPV6) {
- /*
- * Skip this automatic address setting until locks or functions
- * gets exported
- */
-
-#ifdef NOTNOW
int i, set = 0, err = 1;
struct inet6_dev *idev;
+ if (pkt_dev->min_pkt_size == 0) {
+ pkt_dev->min_pkt_size = 14 + sizeof(struct ipv6hdr)
+ + sizeof(struct udphdr)
+ + sizeof(struct pktgen_hdr)
+ + pkt_dev->pkt_overhead;
+ }
+
for (i = 0; i < IN6_ADDR_HSIZE; i++)
if (pkt_dev->cur_in6_saddr.s6_addr[i]) {
set = 1;
@@ -2069,9 +2065,8 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
struct inet6_ifaddr *ifp;
read_lock_bh(&idev->lock);
- for (ifp = idev->addr_list; ifp;
- ifp = ifp->if_next) {
- if (ifp->scope == IFA_LINK &&
+ list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ if ((ifp->scope & IFA_LINK) &&
!(ifp->flags & IFA_F_TENTATIVE)) {
pkt_dev->cur_in6_saddr = ifp->addr;
err = 0;
@@ -2084,8 +2079,14 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
if (err)
pr_err("ERROR: IPv6 link address not available\n");
}
-#endif
} else {
+ if (pkt_dev->min_pkt_size == 0) {
+ pkt_dev->min_pkt_size = 14 + sizeof(struct iphdr)
+ + sizeof(struct udphdr)
+ + sizeof(struct pktgen_hdr)
+ + pkt_dev->pkt_overhead;
+ }
+
pkt_dev->saddr_min = 0;
pkt_dev->saddr_max = 0;
if (strlen(pkt_dev->src_min) == 0) {
@@ -2111,6 +2112,10 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
pkt_dev->daddr_max = in_aton(pkt_dev->dst_max);
}
/* Initialize current values. */
+ pkt_dev->cur_pkt_size = pkt_dev->min_pkt_size;
+ if (pkt_dev->min_pkt_size > pkt_dev->max_pkt_size)
+ pkt_dev->max_pkt_size = pkt_dev->min_pkt_size;
+
pkt_dev->cur_dst_mac_offset = 0;
pkt_dev->cur_src_mac_offset = 0;
pkt_dev->cur_saddr = pkt_dev->saddr_min;
@@ -2758,97 +2763,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
return skb;
}
-/*
- * scan_ip6, fmt_ip taken from dietlibc-0.21
- * Author Felix von Leitner <felix-dietlibc@fefe.de>
- *
- * Slightly modified for kernel.
- * Should be candidate for net/ipv4/utils.c
- * --ro
- */
-
-static unsigned int scan_ip6(const char *s, char ip[16])
-{
- unsigned int i;
- unsigned int len = 0;
- unsigned long u;
- char suffix[16];
- unsigned int prefixlen = 0;
- unsigned int suffixlen = 0;
- __be32 tmp;
- char *pos;
-
- for (i = 0; i < 16; i++)
- ip[i] = 0;
-
- for (;;) {
- if (*s == ':') {
- len++;
- if (s[1] == ':') { /* Found "::", skip to part 2 */
- s += 2;
- len++;
- break;
- }
- s++;
- }
-
- u = simple_strtoul(s, &pos, 16);
- i = pos - s;
- if (!i)
- return 0;
- if (prefixlen == 12 && s[i] == '.') {
-
- /* the last 4 bytes may be written as IPv4 address */
-
- tmp = in_aton(s);
- memcpy((struct in_addr *)(ip + 12), &tmp, sizeof(tmp));
- return i + len;
- }
- ip[prefixlen++] = (u >> 8);
- ip[prefixlen++] = (u & 255);
- s += i;
- len += i;
- if (prefixlen == 16)
- return len;
- }
-
-/* part 2, after "::" */
- for (;;) {
- if (*s == ':') {
- if (suffixlen == 0)
- break;
- s++;
- len++;
- } else if (suffixlen != 0)
- break;
-
- u = simple_strtol(s, &pos, 16);
- i = pos - s;
- if (!i) {
- if (*s)
- len--;
- break;
- }
- if (suffixlen + prefixlen <= 12 && s[i] == '.') {
- tmp = in_aton(s);
- memcpy((struct in_addr *)(suffix + suffixlen), &tmp,
- sizeof(tmp));
- suffixlen += 4;
- len += strlen(s);
- break;
- }
- suffix[suffixlen++] = (u >> 8);
- suffix[suffixlen++] = (u & 255);
- s += i;
- len += i;
- if (prefixlen + suffixlen == 16)
- break;
- }
- for (i = 0; i < suffixlen; i++)
- ip[16 - suffixlen + i] = suffix[i];
- return len;
-}
-
static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
struct pktgen_dev *pkt_dev)
{
@@ -2927,7 +2841,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
sizeof(struct ipv6hdr) - sizeof(struct udphdr) -
pkt_dev->pkt_overhead;
- if (datalen < sizeof(struct pktgen_hdr)) {
+ if (datalen < 0 || datalen < sizeof(struct pktgen_hdr)) {
datalen = sizeof(struct pktgen_hdr);
net_info_ratelimited("increased datalen to %d\n", datalen);
}
@@ -3548,8 +3462,6 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
}
pkt_dev->removal_mark = 0;
- pkt_dev->min_pkt_size = ETH_ZLEN;
- pkt_dev->max_pkt_size = ETH_ZLEN;
pkt_dev->nfrags = 0;
pkt_dev->delay = pg_delay_d;
pkt_dev->count = pg_count_d;
diff --git a/net/core/scm.c b/net/core/scm.c
index 9c1c63da3ca8..ab570841a532 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -301,11 +301,10 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
break;
}
/* Bump the usage count and install the file. */
- get_file(fp[i]);
sock = sock_from_file(fp[i], &err);
if (sock)
sock_update_netprioidx(sock->sk, current);
- fd_install(new_fd, fp[i]);
+ fd_install(new_fd, get_file(fp[i]));
}
if (i > 0)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index cdc28598f4ef..6e04b1fa11f2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -655,53 +655,6 @@ void consume_skb(struct sk_buff *skb)
}
EXPORT_SYMBOL(consume_skb);
-/**
- * skb_recycle - clean up an skb for reuse
- * @skb: buffer
- *
- * Recycles the skb to be reused as a receive buffer. This
- * function does any necessary reference count dropping, and
- * cleans up the skbuff as if it just came from __alloc_skb().
- */
-void skb_recycle(struct sk_buff *skb)
-{
- struct skb_shared_info *shinfo;
-
- skb_release_head_state(skb);
-
- shinfo = skb_shinfo(skb);
- memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
- atomic_set(&shinfo->dataref, 1);
-
- memset(skb, 0, offsetof(struct sk_buff, tail));
- skb->data = skb->head + NET_SKB_PAD;
- skb_reset_tail_pointer(skb);
-}
-EXPORT_SYMBOL(skb_recycle);
-
-/**
- * skb_recycle_check - check if skb can be reused for receive
- * @skb: buffer
- * @skb_size: minimum receive buffer size
- *
- * Checks that the skb passed in is not shared or cloned, and
- * that it is linear and its head portion at least as large as
- * skb_size so that it can be recycled as a receive buffer.
- * If these conditions are met, this function does any necessary
- * reference count dropping and cleans up the skbuff as if it
- * just came from __alloc_skb().
- */
-bool skb_recycle_check(struct sk_buff *skb, int skb_size)
-{
- if (!skb_is_recycleable(skb, skb_size))
- return false;
-
- skb_recycle(skb);
-
- return true;
-}
-EXPORT_SYMBOL(skb_recycle_check);
-
static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
{
new->tstamp = old->tstamp;
diff --git a/net/core/utils.c b/net/core/utils.c
index f5613d569c23..e3487e461939 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -107,6 +107,18 @@ static inline int xdigit2bin(char c, int delim)
return IN6PTON_UNKNOWN;
}
+/**
+ * in4_pton - convert an IPv4 address from literal to binary representation
+ * @src: the start of the IPv4 address string
+ * @srclen: the length of the string, -1 means strlen(src)
+ * @dst: the binary (u8[4] array) representation of the IPv4 address
+ * @delim: the delimiter of the IPv4 address in @src, -1 means no delimiter
+ * @end: A pointer to the end of the parsed string will be placed here
+ *
+ * Return one on success, return zero when any error occurs
+ * and @end will point to the end of the parsed string.
+ *
+ */
int in4_pton(const char *src, int srclen,
u8 *dst,
int delim, const char **end)
@@ -161,6 +173,18 @@ out:
}
EXPORT_SYMBOL(in4_pton);
+/**
+ * in6_pton - convert an IPv6 address from literal to binary representation
+ * @src: the start of the IPv6 address string
+ * @srclen: the length of the string, -1 means strlen(src)
+ * @dst: the binary (u8[16] array) representation of the IPv6 address
+ * @delim: the delimiter of the IPv6 address in @src, -1 means no delimiter
+ * @end: A pointer to the end of the parsed string will be placed here
+ *
+ * Return one on success, return zero when any error occurs
+ * and @end will point to the end of the parsed string.
+ *
+ */
int in6_pton(const char *src, int srclen,
u8 *dst,
int delim, const char **end)
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index e65f2c856e06..faf7cc3483fe 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -220,7 +220,7 @@ static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops)
dn_rt_cache_flush(-1);
}
-static const struct fib_rules_ops __net_initdata dn_fib_rules_ops_template = {
+static const struct fib_rules_ops __net_initconst dn_fib_rules_ops_template = {
.family = AF_DECnet,
.rule_size = sizeof(struct dn_fib_rule),
.addr_size = sizeof(u16),
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 9807945a56d9..8aa4b1115384 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -59,13 +59,13 @@ const struct cred *dns_resolver_cache;
* "ip1,ip2,...#foo=bar"
*/
static int
-dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen)
+dns_resolver_instantiate(struct key *key, struct key_preparsed_payload *prep)
{
struct user_key_payload *upayload;
unsigned long derrno;
int ret;
- size_t result_len = 0;
- const char *data = _data, *end, *opt;
+ size_t datalen = prep->datalen, result_len = 0;
+ const char *data = prep->data, *end, *opt;
kenter("%%%d,%s,'%*.*s',%zu",
key->serial, key->description,
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 68c93d1bb03a..825c608826de 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -322,7 +322,8 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
{
int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
- if (!r && !fib_num_tclassid_users(dev_net(dev))) {
+ if (!r && !fib_num_tclassid_users(dev_net(dev)) &&
+ (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) {
*itag = 0;
return 0;
}
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 274309d3aded..26aa65d1fce4 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -262,7 +262,7 @@ static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
rt_cache_flush(ops->fro_net);
}
-static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = {
+static const struct fib_rules_ops __net_initconst fib4_rules_ops_template = {
.family = AF_INET,
.rule_size = sizeof(struct fib4_rule),
.addr_size = sizeof(u32),
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 267753060ffc..71b125cd5db1 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -840,6 +840,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
change_nexthops(fi) {
nexthop_nh->nh_parent = fi;
nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *);
+ if (!nexthop_nh->nh_pcpu_rth_output)
+ goto failure;
} endfor_nexthops(fi)
if (cfg->fc_mx) {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f0c5b9c1a957..d34ce2972c8f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -406,7 +406,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
goto no_route;
- if (opt && opt->opt.is_strictroute && rt->rt_gateway)
+ if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
goto route_err;
return &rt->dst;
@@ -442,7 +442,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
goto no_route;
- if (opt && opt->opt.is_strictroute && rt->rt_gateway)
+ if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
goto route_err;
rcu_read_unlock();
return &rt->dst;
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index ab09b126423c..694de3b7aebf 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -85,7 +85,7 @@ int ip_forward(struct sk_buff *skb)
rt = skb_rtable(skb);
- if (opt->is_strictroute && opt->nexthop != rt->rt_gateway)
+ if (opt->is_strictroute && rt->rt_uses_gateway)
goto sr_failed;
if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 24a29a39e9a8..6537a408a4fb 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -193,7 +193,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
}
rcu_read_lock_bh();
- nexthop = rt->rt_gateway ? rt->rt_gateway : ip_hdr(skb)->daddr;
+ nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
if (unlikely(!neigh))
neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
@@ -371,7 +371,7 @@ int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl)
skb_dst_set_noref(skb, &rt->dst);
packet_routed:
- if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gateway)
+ if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway)
goto no_route;
/* OK, we know where to send it, allocate and build IP header. */
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 978bca4818ae..1831092f999f 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -374,7 +374,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
memset(&fl4, 0, sizeof(fl4));
flowi4_init_output(&fl4, tunnel->parms.link,
- htonl(tunnel->parms.i_key), RT_TOS(tos),
+ be32_to_cpu(tunnel->parms.i_key), RT_TOS(tos),
RT_SCOPE_UNIVERSE,
IPPROTO_IPIP, 0,
dst, tiph->saddr, 0, 0);
@@ -441,7 +441,7 @@ static int vti_tunnel_bind_dev(struct net_device *dev)
struct flowi4 fl4;
memset(&fl4, 0, sizeof(fl4));
flowi4_init_output(&fl4, tunnel->parms.link,
- htonl(tunnel->parms.i_key),
+ be32_to_cpu(tunnel->parms.i_key),
RT_TOS(iph->tos), RT_SCOPE_UNIVERSE,
IPPROTO_IPIP, 0,
iph->daddr, iph->saddr, 0, 0);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 1daa95c2a0ba..6168c4dc58b1 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -221,7 +221,7 @@ static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
return 0;
}
-static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
+static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = {
.family = RTNL_FAMILY_IPMR,
.rule_size = sizeof(struct ipmr_rule),
.addr_size = sizeof(u32),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ff622069fcef..432f4bb77238 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -802,7 +802,8 @@ void ip_rt_send_redirect(struct sk_buff *skb)
net = dev_net(rt->dst.dev);
peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
if (!peer) {
- icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
+ icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
+ rt_nexthop(rt, ip_hdr(skb)->daddr));
return;
}
@@ -827,7 +828,9 @@ void ip_rt_send_redirect(struct sk_buff *skb)
time_after(jiffies,
(peer->rate_last +
(ip_rt_redirect_load << peer->rate_tokens)))) {
- icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
+ __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
+
+ icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
peer->rate_last = jiffies;
++peer->rate_tokens;
#ifdef CONFIG_IP_ROUTE_VERBOSE
@@ -835,7 +838,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
peer->rate_tokens == ip_rt_redirect_number)
net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
&ip_hdr(skb)->saddr, inet_iif(skb),
- &ip_hdr(skb)->daddr, &rt->rt_gateway);
+ &ip_hdr(skb)->daddr, &gw);
#endif
}
out_put_peer:
@@ -904,22 +907,32 @@ out: kfree_skb(skb);
return 0;
}
-static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
+static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
{
+ struct dst_entry *dst = &rt->dst;
struct fib_result res;
+ if (dst->dev->mtu < mtu)
+ return;
+
if (mtu < ip_rt_min_pmtu)
mtu = ip_rt_min_pmtu;
+ if (!rt->rt_pmtu) {
+ dst->obsolete = DST_OBSOLETE_KILL;
+ } else {
+ rt->rt_pmtu = mtu;
+ dst->expires = max(1UL, jiffies + ip_rt_mtu_expires);
+ }
+
rcu_read_lock();
- if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) {
+ if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
struct fib_nh *nh = &FIB_RES_NH(res);
update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
jiffies + ip_rt_mtu_expires);
}
rcu_read_unlock();
- return mtu;
}
static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
@@ -929,14 +942,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct flowi4 fl4;
ip_rt_build_flow_key(&fl4, sk, skb);
- mtu = __ip_rt_update_pmtu(rt, &fl4, mtu);
-
- if (!rt->rt_pmtu) {
- dst->obsolete = DST_OBSOLETE_KILL;
- } else {
- rt->rt_pmtu = mtu;
- rt->dst.expires = max(1UL, jiffies + ip_rt_mtu_expires);
- }
+ __ip_rt_update_pmtu(rt, &fl4, mtu);
}
void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
@@ -1120,7 +1126,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
mtu = dst->dev->mtu;
if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
- if (rt->rt_gateway && mtu > 576)
+ if (rt->rt_uses_gateway && mtu > 576)
mtu = 576;
}
@@ -1171,7 +1177,9 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
if (fnhe->fnhe_gw) {
rt->rt_flags |= RTCF_REDIRECTED;
rt->rt_gateway = fnhe->fnhe_gw;
- }
+ rt->rt_uses_gateway = 1;
+ } else if (!rt->rt_gateway)
+ rt->rt_gateway = daddr;
orig = rcu_dereference(fnhe->fnhe_rth);
rcu_assign_pointer(fnhe->fnhe_rth, rt);
@@ -1180,13 +1188,6 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
fnhe->fnhe_stamp = jiffies;
ret = true;
- } else {
- /* Routes we intend to cache in nexthop exception have
- * the DST_NOCACHE bit clear. However, if we are
- * unsuccessful at storing this route into the cache
- * we really need to set it.
- */
- rt->dst.flags |= DST_NOCACHE;
}
spin_unlock_bh(&fnhe_lock);
@@ -1201,8 +1202,6 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
if (rt_is_input_route(rt)) {
p = (struct rtable **)&nh->nh_rth_input;
} else {
- if (!nh->nh_pcpu_rth_output)
- goto nocache;
p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
}
orig = *p;
@@ -1211,16 +1210,8 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
if (prev == orig) {
if (orig)
rt_free(orig);
- } else {
- /* Routes we intend to cache in the FIB nexthop have
- * the DST_NOCACHE bit clear. However, if we are
- * unsuccessful at storing this route into the cache
- * we really need to set it.
- */
-nocache:
- rt->dst.flags |= DST_NOCACHE;
+ } else
ret = false;
- }
return ret;
}
@@ -1281,8 +1272,10 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
if (fi) {
struct fib_nh *nh = &FIB_RES_NH(*res);
- if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
+ if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
rt->rt_gateway = nh->nh_gw;
+ rt->rt_uses_gateway = 1;
+ }
dst_init_metrics(&rt->dst, fi->fib_metrics, true);
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
@@ -1291,8 +1284,18 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
cached = rt_bind_exception(rt, fnhe, daddr);
else if (!(rt->dst.flags & DST_NOCACHE))
cached = rt_cache_route(nh, rt);
- }
- if (unlikely(!cached))
+ if (unlikely(!cached)) {
+ /* Routes we intend to cache in nexthop exception or
+ * FIB nexthop have the DST_NOCACHE bit clear.
+ * However, if we are unsuccessful at storing this
+ * route into the cache we really need to set it.
+ */
+ rt->dst.flags |= DST_NOCACHE;
+ if (!rt->rt_gateway)
+ rt->rt_gateway = daddr;
+ rt_add_uncached_list(rt);
+ }
+ } else
rt_add_uncached_list(rt);
#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -1360,6 +1363,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->rt_iif = 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
if (our) {
rth->dst.input= ip_local_deliver;
@@ -1429,7 +1433,6 @@ static int __mkroute_input(struct sk_buff *skb,
return -EINVAL;
}
-
err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
in_dev->dev, in_dev, &itag);
if (err < 0) {
@@ -1439,10 +1442,13 @@ static int __mkroute_input(struct sk_buff *skb,
goto cleanup;
}
- if (out_dev == in_dev && err &&
+ do_cache = res->fi && !itag;
+ if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
(IN_DEV_SHARED_MEDIA(out_dev) ||
- inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
+ inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) {
flags |= RTCF_DOREDIRECT;
+ do_cache = false;
+ }
if (skb->protocol != htons(ETH_P_IP)) {
/* Not IP (i.e. ARP). Do not create route, if it is
@@ -1459,15 +1465,11 @@ static int __mkroute_input(struct sk_buff *skb,
}
}
- do_cache = false;
- if (res->fi) {
- if (!itag) {
- rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
- if (rt_cache_valid(rth)) {
- skb_dst_set_noref(skb, &rth->dst);
- goto out;
- }
- do_cache = true;
+ if (do_cache) {
+ rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+ if (rt_cache_valid(rth)) {
+ skb_dst_set_noref(skb, &rth->dst);
+ goto out;
}
}
@@ -1486,6 +1488,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_iif = 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
rth->dst.input = ip_forward;
@@ -1656,6 +1659,7 @@ local_input:
rth->rt_iif = 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
if (res.type == RTN_UNREACHABLE) {
rth->dst.input= ip_error;
@@ -1758,6 +1762,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
struct in_device *in_dev;
u16 type = res->type;
struct rtable *rth;
+ bool do_cache;
in_dev = __in_dev_get_rcu(dev_out);
if (!in_dev)
@@ -1794,24 +1799,36 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
}
fnhe = NULL;
+ do_cache = fi != NULL;
if (fi) {
struct rtable __rcu **prth;
+ struct fib_nh *nh = &FIB_RES_NH(*res);
- fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
+ fnhe = find_exception(nh, fl4->daddr);
if (fnhe)
prth = &fnhe->fnhe_rth;
- else
- prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output);
+ else {
+ if (unlikely(fl4->flowi4_flags &
+ FLOWI_FLAG_KNOWN_NH &&
+ !(nh->nh_gw &&
+ nh->nh_scope == RT_SCOPE_LINK))) {
+ do_cache = false;
+ goto add;
+ }
+ prth = __this_cpu_ptr(nh->nh_pcpu_rth_output);
+ }
rth = rcu_dereference(*prth);
if (rt_cache_valid(rth)) {
dst_hold(&rth->dst);
return rth;
}
}
+
+add:
rth = rt_dst_alloc(dev_out,
IN_DEV_CONF_GET(in_dev, NOPOLICY),
IN_DEV_CONF_GET(in_dev, NOXFRM),
- fi);
+ do_cache);
if (!rth)
return ERR_PTR(-ENOBUFS);
@@ -1824,6 +1841,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
rth->rt_iif = orig_oif ? : 0;
rth->rt_pmtu = 0;
rth->rt_gateway = 0;
+ rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
RT_CACHE_STAT_INC(out_slow_tot);
@@ -2102,6 +2120,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_flags = ort->rt_flags;
rt->rt_type = ort->rt_type;
rt->rt_gateway = ort->rt_gateway;
+ rt->rt_uses_gateway = ort->rt_uses_gateway;
INIT_LIST_HEAD(&rt->rt_uncached);
@@ -2180,28 +2199,31 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr))
goto nla_put_failure;
}
- if (rt->rt_gateway &&
+ if (rt->rt_uses_gateway &&
nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
goto nla_put_failure;
+ expires = rt->dst.expires;
+ if (expires) {
+ unsigned long now = jiffies;
+
+ if (time_before(now, expires))
+ expires -= now;
+ else
+ expires = 0;
+ }
+
memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
- if (rt->rt_pmtu)
+ if (rt->rt_pmtu && expires)
metrics[RTAX_MTU - 1] = rt->rt_pmtu;
if (rtnetlink_put_metrics(skb, metrics) < 0)
goto nla_put_failure;
if (fl4->flowi4_mark &&
- nla_put_be32(skb, RTA_MARK, fl4->flowi4_mark))
+ nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
goto nla_put_failure;
error = rt->dst.error;
- expires = rt->dst.expires;
- if (expires) {
- if (time_before(jiffies, expires))
- expires -= jiffies;
- else
- expires = 0;
- }
if (rt_is_input_route(rt)) {
if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 9205e492dc9d..63d4eccc674d 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -248,6 +248,8 @@ int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer,
ctxt = rcu_dereference(tcp_fastopen_ctx);
if (ctxt)
memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
+ else
+ memset(user_key, 0, sizeof(user_key));
rcu_read_unlock();
snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 75735c9a6a9d..ef998b008a57 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -708,10 +708,11 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
/* When socket is gone, all binding information is lost.
- * routing might fail in this case. using iif for oif to
- * make sure we can deliver it
+ * routing might fail in this case. No choice here, if we choose to force
+ * input interface, we will misroute in case of asymmetric route.
*/
- arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb);
+ if (sk)
+ arg.bound_dev_if = sk->sk_bound_dev_if;
net = dev_net(skb_dst(skb)->dev);
arg.tos = ip_hdr(skb)->tos;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 681ea2f413e2..05c5ab8d983c 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -91,6 +91,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
RTCF_LOCAL);
xdst->u.rt.rt_type = rt->rt_type;
xdst->u.rt.rt_gateway = rt->rt_gateway;
+ xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
xdst->u.rt.rt_pmtu = rt->rt_pmtu;
INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d7c56f8a5b4e..0424e4e27414 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3064,14 +3064,15 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos)
struct hlist_node *n;
hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket],
addr_lst) {
+ if (!net_eq(dev_net(ifa->idev->dev), net))
+ continue;
/* sync with offset */
if (p < state->offset) {
p++;
continue;
}
state->offset++;
- if (net_eq(dev_net(ifa->idev->dev), net))
- return ifa;
+ return ifa;
}
/* prepare for next bucket */
@@ -3089,18 +3090,20 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
struct hlist_node *n = &ifa->addr_lst;
hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst) {
+ if (!net_eq(dev_net(ifa->idev->dev), net))
+ continue;
state->offset++;
- if (net_eq(dev_net(ifa->idev->dev), net))
- return ifa;
+ return ifa;
}
while (++state->bucket < IN6_ADDR_HSIZE) {
state->offset = 0;
hlist_for_each_entry_rcu_bh(ifa, n,
&inet6_addr_lst[state->bucket], addr_lst) {
+ if (!net_eq(dev_net(ifa->idev->dev), net))
+ continue;
state->offset++;
- if (net_eq(dev_net(ifa->idev->dev), net))
- return ifa;
+ return ifa;
}
}
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 4be23da32b89..ff76eecfd622 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -79,7 +79,7 @@ struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
#define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL
-static const __net_initdata struct ip6addrlbl_init_table
+static const __net_initconst struct ip6addrlbl_init_table
{
const struct in6_addr *prefix;
int prefixlen;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e22e6d88bac6..a974247a9ae4 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -822,13 +822,6 @@ out:
return segs;
}
-struct ipv6_gro_cb {
- struct napi_gro_cb napi;
- int proto;
-};
-
-#define IPV6_GRO_CB(skb) ((struct ipv6_gro_cb *)(skb)->cb)
-
static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
struct sk_buff *skb)
{
@@ -874,28 +867,31 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
iph = ipv6_hdr(skb);
}
- IPV6_GRO_CB(skb)->proto = proto;
+ NAPI_GRO_CB(skb)->proto = proto;
flush--;
nlen = skb_network_header_len(skb);
for (p = *head; p; p = p->next) {
- struct ipv6hdr *iph2;
+ const struct ipv6hdr *iph2;
+ __be32 first_word; /* <Version:4><Traffic_Class:8><Flow_Label:20> */
if (!NAPI_GRO_CB(p)->same_flow)
continue;
iph2 = ipv6_hdr(p);
+ first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ;
- /* All fields must match except length. */
+ /* All fields must match except length and Traffic Class. */
if (nlen != skb_network_header_len(p) ||
- memcmp(iph, iph2, offsetof(struct ipv6hdr, payload_len)) ||
+ (first_word & htonl(0xF00FFFFF)) ||
memcmp(&iph->nexthdr, &iph2->nexthdr,
nlen - offsetof(struct ipv6hdr, nexthdr))) {
NAPI_GRO_CB(p)->same_flow = 0;
continue;
}
-
+ /* flush if Traffic Class fields are different */
+ NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
NAPI_GRO_CB(p)->flush |= flush;
}
@@ -927,7 +923,7 @@ static int ipv6_gro_complete(struct sk_buff *skb)
sizeof(*iph));
rcu_read_lock();
- ops = rcu_dereference(inet6_protos[IPV6_GRO_CB(skb)->proto]);
+ ops = rcu_dereference(inet6_protos[NAPI_GRO_CB(skb)->proto]);
if (WARN_ON(!ops || !ops->gro_complete))
goto out_unlock;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 0ff1cfd55bc4..d9fb9110f607 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -238,7 +238,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
+ nla_total_size(16); /* src */
}
-static const struct fib_rules_ops __net_initdata fib6_rules_ops_template = {
+static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
.family = AF_INET6,
.rule_size = sizeof(struct fib6_rule),
.addr_size = sizeof(struct in6_addr),
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 08ea3f0b6e55..f7c7c6319720 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -205,7 +205,7 @@ static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
return 0;
}
-static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
+static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
.family = RTNL_FAMILY_IP6MR,
.rule_size = sizeof(struct ip6mr_rule),
.addr_size = sizeof(struct in6_addr),
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 49c890386ce9..26175bffbaa0 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -877,7 +877,8 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
fl6.flowi6_proto = IPPROTO_TCP;
- fl6.flowi6_oif = inet6_iif(skb);
+ if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
+ fl6.flowi6_oif = inet6_iif(skb);
fl6.fl6_dport = t1->dest;
fl6.fl6_sport = t1->source;
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c
index accfa00ffcdf..a16b7b4b1e02 100644
--- a/net/mac80211/mesh_sync.c
+++ b/net/mac80211/mesh_sync.c
@@ -56,7 +56,6 @@ void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata)
u64 tsfdelta;
spin_lock_bh(&ifmsh->sync_offset_lock);
-
if (ifmsh->sync_offset_clockdrift_max < beacon_int_fraction) {
msync_dbg(sdata, "TBTT : max clockdrift=%lld; adjusting\n",
(long long) ifmsh->sync_offset_clockdrift_max);
@@ -69,11 +68,11 @@ void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata)
tsfdelta = -beacon_int_fraction;
ifmsh->sync_offset_clockdrift_max -= beacon_int_fraction;
}
+ spin_unlock_bh(&ifmsh->sync_offset_lock);
tsf = drv_get_tsf(local, sdata);
if (tsf != -1ULL)
drv_set_tsf(local, sdata, tsf + tsfdelta);
- spin_unlock_bh(&ifmsh->sync_offset_lock);
}
static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 2ce89732d0f2..3af0cc4130f1 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -34,7 +34,7 @@ void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw,
skb_queue_len(&local->skb_queue_unreliable);
while (tmp > IEEE80211_IRQSAFE_QUEUE_LIMIT &&
(skb = skb_dequeue(&local->skb_queue_unreliable))) {
- dev_kfree_skb_irq(skb);
+ ieee80211_free_txskb(hw, skb);
tmp--;
I802_DEBUG_INC(local->tx_status_drop);
}
@@ -159,7 +159,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
"dropped TX filtered frame, queue_len=%d PS=%d @%lu\n",
skb_queue_len(&sta->tx_filtered[ac]),
!!test_sta_flag(sta, WLAN_STA_PS_STA), jiffies);
- dev_kfree_skb(skb);
+ ieee80211_free_txskb(&local->hw, skb);
}
static void ieee80211_check_pending_bar(struct sta_info *sta, u8 *addr, u8 tid)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index e0e0d1d0e830..c9bf83f36657 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -354,7 +354,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
total += skb_queue_len(&sta->ps_tx_buf[ac]);
if (skb) {
purged++;
- dev_kfree_skb(skb);
+ ieee80211_free_txskb(&local->hw, skb);
break;
}
}
@@ -466,7 +466,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
ps_dbg(tx->sdata,
"STA %pM TX buffer for AC %d full - dropping oldest frame\n",
sta->sta.addr, ac);
- dev_kfree_skb(old);
+ ieee80211_free_txskb(&local->hw, old);
} else
tx->local->total_ps_buffered++;
@@ -1103,7 +1103,7 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
spin_unlock(&tx->sta->lock);
if (purge_skb)
- dev_kfree_skb(purge_skb);
+ ieee80211_free_txskb(&tx->local->hw, purge_skb);
}
/* reset session timer */
@@ -1214,7 +1214,7 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
if (WARN_ON_ONCE(q >= local->hw.queues)) {
__skb_unlink(skb, skbs);
- dev_kfree_skb(skb);
+ ieee80211_free_txskb(&local->hw, skb);
continue;
}
#endif
@@ -1356,7 +1356,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
if (unlikely(res == TX_DROP)) {
I802_DEBUG_INC(tx->local->tx_handlers_drop);
if (tx->skb)
- dev_kfree_skb(tx->skb);
+ ieee80211_free_txskb(&tx->local->hw, tx->skb);
else
__skb_queue_purge(&tx->skbs);
return -1;
@@ -1393,7 +1393,7 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
res_prepare = ieee80211_tx_prepare(sdata, &tx, skb);
if (unlikely(res_prepare == TX_DROP)) {
- dev_kfree_skb(skb);
+ ieee80211_free_txskb(&local->hw, skb);
goto out;
} else if (unlikely(res_prepare == TX_QUEUED)) {
goto out;
@@ -1465,7 +1465,7 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
headroom = max_t(int, 0, headroom);
if (ieee80211_skb_resize(sdata, skb, headroom, may_encrypt)) {
- dev_kfree_skb(skb);
+ ieee80211_free_txskb(&local->hw, skb);
rcu_read_unlock();
return;
}
@@ -2050,8 +2050,10 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
head_need += IEEE80211_ENCRYPT_HEADROOM;
head_need += local->tx_headroom;
head_need = max_t(int, 0, head_need);
- if (ieee80211_skb_resize(sdata, skb, head_need, true))
- goto fail;
+ if (ieee80211_skb_resize(sdata, skb, head_need, true)) {
+ ieee80211_free_txskb(&local->hw, skb);
+ return NETDEV_TX_OK;
+ }
}
if (encaps_data) {
@@ -2184,7 +2186,7 @@ void ieee80211_tx_pending(unsigned long data)
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
if (WARN_ON(!info->control.vif)) {
- kfree_skb(skb);
+ ieee80211_free_txskb(&local->hw, skb);
continue;
}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 56f6d5d81a77..cc4c8095681a 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -50,6 +50,7 @@ enum {
* local
*/
IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */
+ IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */
};
/*
@@ -113,6 +114,8 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr,
fl4.daddr = daddr;
fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
fl4.flowi4_tos = rtos;
+ fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
+ FLOWI_FLAG_KNOWN_NH : 0;
retry:
rt = ip_route_output_key(net, &fl4);
@@ -1061,7 +1064,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
RT_TOS(iph->tos),
IP_VS_RT_MODE_LOCAL |
- IP_VS_RT_MODE_NON_LOCAL, NULL)))
+ IP_VS_RT_MODE_NON_LOCAL |
+ IP_VS_RT_MODE_KNOWN_NH, NULL)))
goto tx_error_icmp;
if (rt->rt_flags & RTCF_LOCAL) {
ip_rt_put(rt);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 0f2e3ad69c47..01e944a017a4 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -169,6 +169,8 @@ static void netlink_sock_destruct(struct sock *sk)
if (nlk->cb) {
if (nlk->cb->done)
nlk->cb->done(nlk->cb);
+
+ module_put(nlk->cb->module);
netlink_destroy_callback(nlk->cb);
}
@@ -1758,6 +1760,7 @@ static int netlink_dump(struct sock *sk)
nlk->cb = NULL;
mutex_unlock(nlk->cb_mutex);
+ module_put(cb->module);
netlink_consume_callback(cb);
return 0;
@@ -1767,9 +1770,9 @@ errout_skb:
return err;
}
-int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
- const struct nlmsghdr *nlh,
- struct netlink_dump_control *control)
+int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ struct netlink_dump_control *control)
{
struct netlink_callback *cb;
struct sock *sk;
@@ -1784,6 +1787,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
cb->done = control->done;
cb->nlh = nlh;
cb->data = control->data;
+ cb->module = control->module;
cb->min_dump_alloc = control->min_dump_alloc;
atomic_inc(&skb->users);
cb->skb = skb;
@@ -1794,19 +1798,28 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
return -ECONNREFUSED;
}
nlk = nlk_sk(sk);
- /* A dump is in progress... */
+
mutex_lock(nlk->cb_mutex);
+ /* A dump is in progress... */
if (nlk->cb) {
mutex_unlock(nlk->cb_mutex);
netlink_destroy_callback(cb);
- sock_put(sk);
- return -EBUSY;
+ ret = -EBUSY;
+ goto out;
}
+ /* add reference of module which cb->dump belongs to */
+ if (!try_module_get(cb->module)) {
+ mutex_unlock(nlk->cb_mutex);
+ netlink_destroy_callback(cb);
+ ret = -EPROTONOSUPPORT;
+ goto out;
+ }
+
nlk->cb = cb;
mutex_unlock(nlk->cb_mutex);
ret = netlink_dump(sk);
-
+out:
sock_put(sk);
if (ret)
@@ -1817,7 +1830,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
*/
return -EINTR;
}
-EXPORT_SYMBOL(netlink_dump_start);
+EXPORT_SYMBOL(__netlink_dump_start);
void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
{
diff --git a/net/rds/send.c b/net/rds/send.c
index 96531d4033a2..88eace57dd6b 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1122,7 +1122,7 @@ rds_send_pong(struct rds_connection *conn, __be16 dport)
rds_stats_inc(s_send_pong);
if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags))
- rds_send_xmit(conn);
+ queue_delayed_work(rds_wq, &conn->c_send_w, 0);
rds_message_put(rm);
return 0;
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 011d2384b115..7633a752c65e 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -26,8 +26,8 @@
#include "ar-internal.h"
static int rxrpc_vet_description_s(const char *);
-static int rxrpc_instantiate(struct key *, const void *, size_t);
-static int rxrpc_instantiate_s(struct key *, const void *, size_t);
+static int rxrpc_instantiate(struct key *, struct key_preparsed_payload *);
+static int rxrpc_instantiate_s(struct key *, struct key_preparsed_payload *);
static void rxrpc_destroy(struct key *);
static void rxrpc_destroy_s(struct key *);
static void rxrpc_describe(const struct key *, struct seq_file *);
@@ -678,7 +678,7 @@ error:
*
* if no data is provided, then a no-security key is made
*/
-static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
+static int rxrpc_instantiate(struct key *key, struct key_preparsed_payload *prep)
{
const struct rxrpc_key_data_v1 *v1;
struct rxrpc_key_token *token, **pp;
@@ -686,26 +686,26 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
u32 kver;
int ret;
- _enter("{%x},,%zu", key_serial(key), datalen);
+ _enter("{%x},,%zu", key_serial(key), prep->datalen);
/* handle a no-security key */
- if (!data && datalen == 0)
+ if (!prep->data && prep->datalen == 0)
return 0;
/* determine if the XDR payload format is being used */
- if (datalen > 7 * 4) {
- ret = rxrpc_instantiate_xdr(key, data, datalen);
+ if (prep->datalen > 7 * 4) {
+ ret = rxrpc_instantiate_xdr(key, prep->data, prep->datalen);
if (ret != -EPROTO)
return ret;
}
/* get the key interface version number */
ret = -EINVAL;
- if (datalen <= 4 || !data)
+ if (prep->datalen <= 4 || !prep->data)
goto error;
- memcpy(&kver, data, sizeof(kver));
- data += sizeof(kver);
- datalen -= sizeof(kver);
+ memcpy(&kver, prep->data, sizeof(kver));
+ prep->data += sizeof(kver);
+ prep->datalen -= sizeof(kver);
_debug("KEY I/F VERSION: %u", kver);
@@ -715,11 +715,11 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
/* deal with a version 1 key */
ret = -EINVAL;
- if (datalen < sizeof(*v1))
+ if (prep->datalen < sizeof(*v1))
goto error;
- v1 = data;
- if (datalen != sizeof(*v1) + v1->ticket_length)
+ v1 = prep->data;
+ if (prep->datalen != sizeof(*v1) + v1->ticket_length)
goto error;
_debug("SCIX: %u", v1->security_index);
@@ -784,17 +784,17 @@ error:
* instantiate a server secret key
* data should be a pointer to the 8-byte secret key
*/
-static int rxrpc_instantiate_s(struct key *key, const void *data,
- size_t datalen)
+static int rxrpc_instantiate_s(struct key *key,
+ struct key_preparsed_payload *prep)
{
struct crypto_blkcipher *ci;
- _enter("{%x},,%zu", key_serial(key), datalen);
+ _enter("{%x},,%zu", key_serial(key), prep->datalen);
- if (datalen != 8)
+ if (prep->datalen != 8)
return -EINVAL;
- memcpy(&key->type_data, data, 8);
+ memcpy(&key->type_data, prep->data, 8);
ci = crypto_alloc_blkcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(ci)) {
@@ -802,7 +802,7 @@ static int rxrpc_instantiate_s(struct key *key, const void *data,
return PTR_ERR(ci);
}
- if (crypto_blkcipher_setkey(ci, data, 8) < 0)
+ if (crypto_blkcipher_setkey(ci, prep->data, 8) < 0)
BUG();
key->payload.data = ci;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 57f7de839b03..6773d7803627 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1642,8 +1642,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
asoc->outqueue.outstanding_bytes;
sackh.num_gap_ack_blocks = 0;
sackh.num_dup_tsns = 0;
+ chunk->subh.sack_hdr = &sackh;
sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK,
- SCTP_SACKH(&sackh));
+ SCTP_CHUNK(chunk));
break;
case SCTP_CMD_DISCARD_PACKET:
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d37d24ff197f..59d16ea927f0 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -70,6 +70,7 @@
#include <linux/init.h>
#include <linux/crypto.h>
#include <linux/slab.h>
+#include <linux/file.h>
#include <net/ip.h>
#include <net/icmp.h>
@@ -4292,6 +4293,7 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval
{
sctp_peeloff_arg_t peeloff;
struct socket *newsock;
+ struct file *newfile;
int retval = 0;
if (len < sizeof(sctp_peeloff_arg_t))
@@ -4305,22 +4307,35 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval
goto out;
/* Map the socket to an unused fd that can be returned to the user. */
- retval = sock_map_fd(newsock, 0);
+ retval = get_unused_fd();
if (retval < 0) {
sock_release(newsock);
goto out;
}
+ newfile = sock_alloc_file(newsock, 0, NULL);
+ if (unlikely(IS_ERR(newfile))) {
+ put_unused_fd(retval);
+ sock_release(newsock);
+ return PTR_ERR(newfile);
+ }
+
SCTP_DEBUG_PRINTK("%s: sk: %p newsk: %p sd: %d\n",
__func__, sk, newsock->sk, retval);
/* Return the fd mapped to the new socket. */
+ if (put_user(len, optlen)) {
+ fput(newfile);
+ put_unused_fd(retval);
+ return -EFAULT;
+ }
peeloff.sd = retval;
- if (put_user(len, optlen))
+ if (copy_to_user(optval, &peeloff, len)) {
+ fput(newfile);
+ put_unused_fd(retval);
return -EFAULT;
- if (copy_to_user(optval, &peeloff, len))
- retval = -EFAULT;
-
+ }
+ fd_install(retval, newfile);
out:
return retval;
}
diff --git a/net/socket.c b/net/socket.c
index 80dc7e84b046..d92c490e66fa 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -347,17 +347,11 @@ static struct file_system_type sock_fs_type = {
* but we take care of internal coherence yet.
*/
-static int sock_alloc_file(struct socket *sock, struct file **f, int flags,
- const char *dname)
+struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
{
struct qstr name = { .name = "" };
struct path path;
struct file *file;
- int fd;
-
- fd = get_unused_fd_flags(flags);
- if (unlikely(fd < 0))
- return fd;
if (dname) {
name.name = dname;
@@ -367,10 +361,8 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags,
name.len = strlen(name.name);
}
path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
- if (unlikely(!path.dentry)) {
- put_unused_fd(fd);
- return -ENOMEM;
- }
+ if (unlikely(!path.dentry))
+ return ERR_PTR(-ENOMEM);
path.mnt = mntget(sock_mnt);
d_instantiate(path.dentry, SOCK_INODE(sock));
@@ -382,30 +374,33 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags,
/* drop dentry, keep inode */
ihold(path.dentry->d_inode);
path_put(&path);
- put_unused_fd(fd);
- return -ENFILE;
+ return ERR_PTR(-ENFILE);
}
sock->file = file;
file->f_flags = O_RDWR | (flags & O_NONBLOCK);
file->f_pos = 0;
file->private_data = sock;
-
- *f = file;
- return fd;
+ return file;
}
+EXPORT_SYMBOL(sock_alloc_file);
-int sock_map_fd(struct socket *sock, int flags)
+static int sock_map_fd(struct socket *sock, int flags)
{
struct file *newfile;
- int fd = sock_alloc_file(sock, &newfile, flags, NULL);
+ int fd = get_unused_fd_flags(flags);
+ if (unlikely(fd < 0))
+ return fd;
- if (likely(fd >= 0))
+ newfile = sock_alloc_file(sock, flags, NULL);
+ if (likely(!IS_ERR(newfile))) {
fd_install(fd, newfile);
+ return fd;
+ }
- return fd;
+ put_unused_fd(fd);
+ return PTR_ERR(newfile);
}
-EXPORT_SYMBOL(sock_map_fd);
struct socket *sock_from_file(struct file *file, int *err)
{
@@ -1466,17 +1461,32 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
if (err < 0)
goto out_release_both;
- fd1 = sock_alloc_file(sock1, &newfile1, flags, NULL);
+ fd1 = get_unused_fd_flags(flags);
if (unlikely(fd1 < 0)) {
err = fd1;
goto out_release_both;
}
-
- fd2 = sock_alloc_file(sock2, &newfile2, flags, NULL);
+ fd2 = get_unused_fd_flags(flags);
if (unlikely(fd2 < 0)) {
err = fd2;
+ put_unused_fd(fd1);
+ goto out_release_both;
+ }
+
+ newfile1 = sock_alloc_file(sock1, flags, NULL);
+ if (unlikely(IS_ERR(newfile1))) {
+ err = PTR_ERR(newfile1);
+ put_unused_fd(fd1);
+ put_unused_fd(fd2);
+ goto out_release_both;
+ }
+
+ newfile2 = sock_alloc_file(sock2, flags, NULL);
+ if (IS_ERR(newfile2)) {
+ err = PTR_ERR(newfile2);
fput(newfile1);
put_unused_fd(fd1);
+ put_unused_fd(fd2);
sock_release(sock2);
goto out;
}
@@ -1608,13 +1618,19 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
*/
__module_get(newsock->ops->owner);
- newfd = sock_alloc_file(newsock, &newfile, flags,
- sock->sk->sk_prot_creator->name);
+ newfd = get_unused_fd_flags(flags);
if (unlikely(newfd < 0)) {
err = newfd;
sock_release(newsock);
goto out_put;
}
+ newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
+ if (unlikely(IS_ERR(newfile))) {
+ err = PTR_ERR(newfile);
+ put_unused_fd(newfd);
+ sock_release(newsock);
+ goto out_put;
+ }
err = security_socket_accept(sock, newsock);
if (err)
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 34c522021004..909dc0c31aab 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -239,7 +239,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
}
return q;
err:
- dprintk("RPC: gss_fill_context returning %ld\n", -PTR_ERR(p));
+ dprintk("RPC: %s returning %ld\n", __func__, -PTR_ERR(p));
return p;
}
@@ -301,10 +301,10 @@ __gss_find_upcall(struct rpc_pipe *pipe, uid_t uid)
if (pos->uid != uid)
continue;
atomic_inc(&pos->count);
- dprintk("RPC: gss_find_upcall found msg %p\n", pos);
+ dprintk("RPC: %s found msg %p\n", __func__, pos);
return pos;
}
- dprintk("RPC: gss_find_upcall found nothing\n");
+ dprintk("RPC: %s found nothing\n", __func__);
return NULL;
}
@@ -507,8 +507,8 @@ gss_refresh_upcall(struct rpc_task *task)
struct rpc_pipe *pipe;
int err = 0;
- dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid,
- cred->cr_uid);
+ dprintk("RPC: %5u %s for uid %u\n",
+ task->tk_pid, __func__, cred->cr_uid);
gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred);
if (PTR_ERR(gss_msg) == -EAGAIN) {
/* XXX: warning on the first, under the assumption we
@@ -539,8 +539,8 @@ gss_refresh_upcall(struct rpc_task *task)
spin_unlock(&pipe->lock);
gss_release_msg(gss_msg);
out:
- dprintk("RPC: %5u gss_refresh_upcall for uid %u result %d\n",
- task->tk_pid, cred->cr_uid, err);
+ dprintk("RPC: %5u %s for uid %u result %d\n",
+ task->tk_pid, __func__, cred->cr_uid, err);
return err;
}
@@ -553,7 +553,7 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
DEFINE_WAIT(wait);
int err = 0;
- dprintk("RPC: gss_upcall for uid %u\n", cred->cr_uid);
+ dprintk("RPC: %s for uid %u\n", __func__, cred->cr_uid);
retry:
gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred);
if (PTR_ERR(gss_msg) == -EAGAIN) {
@@ -594,8 +594,8 @@ out_intr:
finish_wait(&gss_msg->waitqueue, &wait);
gss_release_msg(gss_msg);
out:
- dprintk("RPC: gss_create_upcall for uid %u result %d\n",
- cred->cr_uid, err);
+ dprintk("RPC: %s for uid %u result %d\n",
+ __func__, cred->cr_uid, err);
return err;
}
@@ -681,7 +681,7 @@ err_put_ctx:
err:
kfree(buf);
out:
- dprintk("RPC: gss_pipe_downcall returning %Zd\n", err);
+ dprintk("RPC: %s returning %Zd\n", __func__, err);
return err;
}
@@ -747,8 +747,8 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg);
if (msg->errno < 0) {
- dprintk("RPC: gss_pipe_destroy_msg releasing msg %p\n",
- gss_msg);
+ dprintk("RPC: %s releasing msg %p\n",
+ __func__, gss_msg);
atomic_inc(&gss_msg->count);
gss_unhash_msg(gss_msg);
if (msg->errno == -ETIMEDOUT)
@@ -976,7 +976,7 @@ gss_destroying_context(struct rpc_cred *cred)
static void
gss_do_free_ctx(struct gss_cl_ctx *ctx)
{
- dprintk("RPC: gss_free_ctx\n");
+ dprintk("RPC: %s\n", __func__);
gss_delete_sec_context(&ctx->gc_gss_ctx);
kfree(ctx->gc_wire_ctx.data);
@@ -999,7 +999,7 @@ gss_free_ctx(struct gss_cl_ctx *ctx)
static void
gss_free_cred(struct gss_cred *gss_cred)
{
- dprintk("RPC: gss_free_cred %p\n", gss_cred);
+ dprintk("RPC: %s cred=%p\n", __func__, gss_cred);
kfree(gss_cred);
}
@@ -1049,8 +1049,8 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
struct gss_cred *cred = NULL;
int err = -ENOMEM;
- dprintk("RPC: gss_create_cred for uid %d, flavor %d\n",
- acred->uid, auth->au_flavor);
+ dprintk("RPC: %s for uid %d, flavor %d\n",
+ __func__, acred->uid, auth->au_flavor);
if (!(cred = kzalloc(sizeof(*cred), GFP_NOFS)))
goto out_err;
@@ -1069,7 +1069,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
return &cred->gc_base;
out_err:
- dprintk("RPC: gss_create_cred failed with error %d\n", err);
+ dprintk("RPC: %s failed with error %d\n", __func__, err);
return ERR_PTR(err);
}
@@ -1127,7 +1127,7 @@ gss_marshal(struct rpc_task *task, __be32 *p)
struct kvec iov;
struct xdr_buf verf_buf;
- dprintk("RPC: %5u gss_marshal\n", task->tk_pid);
+ dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
*p++ = htonl(RPC_AUTH_GSS);
cred_len = p++;
@@ -1253,7 +1253,7 @@ gss_validate(struct rpc_task *task, __be32 *p)
u32 flav,len;
u32 maj_stat;
- dprintk("RPC: %5u gss_validate\n", task->tk_pid);
+ dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
flav = ntohl(*p++);
if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE)
@@ -1271,20 +1271,20 @@ gss_validate(struct rpc_task *task, __be32 *p)
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
if (maj_stat) {
- dprintk("RPC: %5u gss_validate: gss_verify_mic returned "
- "error 0x%08x\n", task->tk_pid, maj_stat);
+ dprintk("RPC: %5u %s: gss_verify_mic returned error 0x%08x\n",
+ task->tk_pid, __func__, maj_stat);
goto out_bad;
}
/* We leave it to unwrap to calculate au_rslack. For now we just
* calculate the length of the verifier: */
cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2;
gss_put_ctx(ctx);
- dprintk("RPC: %5u gss_validate: gss_verify_mic succeeded.\n",
- task->tk_pid);
+ dprintk("RPC: %5u %s: gss_verify_mic succeeded.\n",
+ task->tk_pid, __func__);
return p + XDR_QUADLEN(len);
out_bad:
gss_put_ctx(ctx);
- dprintk("RPC: %5u gss_validate failed.\n", task->tk_pid);
+ dprintk("RPC: %5u %s failed.\n", task->tk_pid, __func__);
return NULL;
}
@@ -1466,7 +1466,7 @@ gss_wrap_req(struct rpc_task *task,
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
int status = -EIO;
- dprintk("RPC: %5u gss_wrap_req\n", task->tk_pid);
+ dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
if (ctx->gc_proc != RPC_GSS_PROC_DATA) {
/* The spec seems a little ambiguous here, but I think that not
* wrapping context destruction requests makes the most sense.
@@ -1489,7 +1489,7 @@ gss_wrap_req(struct rpc_task *task,
}
out:
gss_put_ctx(ctx);
- dprintk("RPC: %5u gss_wrap_req returning %d\n", task->tk_pid, status);
+ dprintk("RPC: %5u %s returning %d\n", task->tk_pid, __func__, status);
return status;
}
@@ -1604,8 +1604,8 @@ out_decode:
status = gss_unwrap_req_decode(decode, rqstp, p, obj);
out:
gss_put_ctx(ctx);
- dprintk("RPC: %5u gss_unwrap_resp returning %d\n", task->tk_pid,
- status);
+ dprintk("RPC: %5u %s returning %d\n",
+ task->tk_pid, __func__, status);
return status;
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index fa48c60aef23..cdc7564b4512 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -490,61 +490,86 @@ EXPORT_SYMBOL_GPL(rpc_create);
* same transport while varying parameters such as the authentication
* flavour.
*/
-struct rpc_clnt *
-rpc_clone_client(struct rpc_clnt *clnt)
+static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
+ struct rpc_clnt *clnt)
{
- struct rpc_clnt *new;
struct rpc_xprt *xprt;
- int err = -ENOMEM;
+ struct rpc_clnt *new;
+ int err;
- new = kmemdup(clnt, sizeof(*new), GFP_KERNEL);
- if (!new)
- goto out_no_clnt;
- new->cl_parent = clnt;
- /* Turn off autobind on clones */
- new->cl_autobind = 0;
- INIT_LIST_HEAD(&new->cl_tasks);
- spin_lock_init(&new->cl_lock);
- rpc_init_rtt(&new->cl_rtt_default, clnt->cl_timeout->to_initval);
- new->cl_metrics = rpc_alloc_iostats(clnt);
- if (new->cl_metrics == NULL)
- goto out_no_stats;
- if (clnt->cl_principal) {
- new->cl_principal = kstrdup(clnt->cl_principal, GFP_KERNEL);
- if (new->cl_principal == NULL)
- goto out_no_principal;
- }
+ err = -ENOMEM;
rcu_read_lock();
xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
rcu_read_unlock();
if (xprt == NULL)
- goto out_no_transport;
- rcu_assign_pointer(new->cl_xprt, xprt);
- atomic_set(&new->cl_count, 1);
- err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name);
- if (err != 0)
- goto out_no_path;
- rpc_clnt_set_nodename(new, utsname()->nodename);
- if (new->cl_auth)
- atomic_inc(&new->cl_auth->au_count);
+ goto out_err;
+ args->servername = xprt->servername;
+
+ new = rpc_new_client(args, xprt);
+ if (IS_ERR(new)) {
+ err = PTR_ERR(new);
+ goto out_put;
+ }
+
atomic_inc(&clnt->cl_count);
- rpc_register_client(new);
- rpciod_up();
+ new->cl_parent = clnt;
+
+ /* Turn off autobind on clones */
+ new->cl_autobind = 0;
+ new->cl_softrtry = clnt->cl_softrtry;
+ new->cl_discrtry = clnt->cl_discrtry;
+ new->cl_chatty = clnt->cl_chatty;
return new;
-out_no_path:
+
+out_put:
xprt_put(xprt);
-out_no_transport:
- kfree(new->cl_principal);
-out_no_principal:
- rpc_free_iostats(new->cl_metrics);
-out_no_stats:
- kfree(new);
-out_no_clnt:
+out_err:
dprintk("RPC: %s: returned error %d\n", __func__, err);
return ERR_PTR(err);
}
+
+/**
+ * rpc_clone_client - Clone an RPC client structure
+ *
+ * @clnt: RPC client whose parameters are copied
+ *
+ * Returns a fresh RPC client or an ERR_PTR.
+ */
+struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt)
+{
+ struct rpc_create_args args = {
+ .program = clnt->cl_program,
+ .prognumber = clnt->cl_prog,
+ .version = clnt->cl_vers,
+ .authflavor = clnt->cl_auth->au_flavor,
+ .client_name = clnt->cl_principal,
+ };
+ return __rpc_clone_client(&args, clnt);
+}
EXPORT_SYMBOL_GPL(rpc_clone_client);
+/**
+ * rpc_clone_client_set_auth - Clone an RPC client structure and set its auth
+ *
+ * @clnt: RPC client whose parameters are copied
+ * @auth: security flavor for new client
+ *
+ * Returns a fresh RPC client or an ERR_PTR.
+ */
+struct rpc_clnt *
+rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
+{
+ struct rpc_create_args args = {
+ .program = clnt->cl_program,
+ .prognumber = clnt->cl_prog,
+ .version = clnt->cl_vers,
+ .authflavor = flavor,
+ .client_name = clnt->cl_principal,
+ };
+ return __rpc_clone_client(&args, clnt);
+}
+EXPORT_SYMBOL_GPL(rpc_clone_client_set_auth);
+
/*
* Kill all tasks for the given client.
* XXX: kill their descendants as well?
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 21fde99e5c56..80f5dd23417d 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1119,8 +1119,8 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
return -ENOMEM;
if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL))
return -ENOMEM;
- dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n", net,
- NET_NAME(net));
+ dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n",
+ net, NET_NAME(net));
sn->pipefs_sb = sb;
err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
RPC_PIPEFS_MOUNT,
@@ -1155,8 +1155,8 @@ static void rpc_kill_sb(struct super_block *sb)
sn->pipefs_sb = NULL;
mutex_unlock(&sn->pipefs_sb_lock);
put_net(net);
- dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n", net,
- NET_NAME(net));
+ dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n",
+ net, NET_NAME(net));
blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
RPC_PIPEFS_UMOUNT,
sb);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 128494ec9a64..6357fcb00c7e 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -1022,7 +1022,7 @@ static int rpciod_start(void)
* Create the rpciod thread and wait for it to start.
*/
dprintk("RPC: creating workqueue rpciod\n");
- wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0);
+ wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 1);
rpciod_workqueue = wq;
return rpciod_workqueue != NULL;
}
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index bac973a31367..194d865fae72 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -208,6 +208,35 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
return xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);
}
+/*
+ * svc_xprt_received conditionally queues the transport for processing
+ * by another thread. The caller must hold the XPT_BUSY bit and must
+ * not thereafter touch transport data.
+ *
+ * Note: XPT_DATA only gets cleared when a read-attempt finds no (or
+ * insufficient) data.
+ */
+static void svc_xprt_received(struct svc_xprt *xprt)
+{
+ BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags));
+ /* As soon as we clear busy, the xprt could be closed and
+ * 'put', so we need a reference to call svc_xprt_enqueue with:
+ */
+ svc_xprt_get(xprt);
+ clear_bit(XPT_BUSY, &xprt->xpt_flags);
+ svc_xprt_enqueue(xprt);
+ svc_xprt_put(xprt);
+}
+
+void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
+{
+ clear_bit(XPT_TEMP, &new->xpt_flags);
+ spin_lock_bh(&serv->sv_lock);
+ list_add(&new->xpt_list, &serv->sv_permsocks);
+ spin_unlock_bh(&serv->sv_lock);
+ svc_xprt_received(new);
+}
+
int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
struct net *net, const int family,
const unsigned short port, int flags)
@@ -232,13 +261,8 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
module_put(xcl->xcl_owner);
return PTR_ERR(newxprt);
}
-
- clear_bit(XPT_TEMP, &newxprt->xpt_flags);
- spin_lock_bh(&serv->sv_lock);
- list_add(&newxprt->xpt_list, &serv->sv_permsocks);
- spin_unlock_bh(&serv->sv_lock);
+ svc_add_new_perm_xprt(serv, newxprt);
newport = svc_xprt_local_port(newxprt);
- clear_bit(XPT_BUSY, &newxprt->xpt_flags);
return newport;
}
err:
@@ -394,27 +418,6 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool)
return xprt;
}
-/*
- * svc_xprt_received conditionally queues the transport for processing
- * by another thread. The caller must hold the XPT_BUSY bit and must
- * not thereafter touch transport data.
- *
- * Note: XPT_DATA only gets cleared when a read-attempt finds no (or
- * insufficient) data.
- */
-void svc_xprt_received(struct svc_xprt *xprt)
-{
- BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags));
- /* As soon as we clear busy, the xprt could be closed and
- * 'put', so we need a reference to call svc_xprt_enqueue with:
- */
- svc_xprt_get(xprt);
- clear_bit(XPT_BUSY, &xprt->xpt_flags);
- svc_xprt_enqueue(xprt);
- svc_xprt_put(xprt);
-}
-EXPORT_SYMBOL_GPL(svc_xprt_received);
-
/**
* svc_reserve - change the space reserved for the reply to a request.
* @rqstp: The request in question
@@ -565,33 +568,12 @@ static void svc_check_conn_limits(struct svc_serv *serv)
}
}
-/*
- * Receive the next request on any transport. This code is carefully
- * organised not to touch any cachelines in the shared svc_serv
- * structure, only cachelines in the local svc_pool.
- */
-int svc_recv(struct svc_rqst *rqstp, long timeout)
+int svc_alloc_arg(struct svc_rqst *rqstp)
{
- struct svc_xprt *xprt = NULL;
- struct svc_serv *serv = rqstp->rq_server;
- struct svc_pool *pool = rqstp->rq_pool;
- int len, i;
- int pages;
- struct xdr_buf *arg;
- DECLARE_WAITQUEUE(wait, current);
- long time_left;
-
- dprintk("svc: server %p waiting for data (to = %ld)\n",
- rqstp, timeout);
-
- if (rqstp->rq_xprt)
- printk(KERN_ERR
- "svc_recv: service %p, transport not NULL!\n",
- rqstp);
- if (waitqueue_active(&rqstp->rq_wait))
- printk(KERN_ERR
- "svc_recv: service %p, wait queue active!\n",
- rqstp);
+ struct svc_serv *serv = rqstp->rq_server;
+ struct xdr_buf *arg;
+ int pages;
+ int i;
/* now allocate needed pages. If we get a failure, sleep briefly */
pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE;
@@ -621,11 +603,15 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
arg->page_len = (pages-2)*PAGE_SIZE;
arg->len = (pages-1)*PAGE_SIZE;
arg->tail[0].iov_len = 0;
+ return 0;
+}
- try_to_freeze();
- cond_resched();
- if (signalled() || kthread_should_stop())
- return -EINTR;
+struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
+{
+ struct svc_xprt *xprt;
+ struct svc_pool *pool = rqstp->rq_pool;
+ DECLARE_WAITQUEUE(wait, current);
+ long time_left;
/* Normally we will wait up to 5 seconds for any required
* cache information to be provided.
@@ -663,7 +649,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
if (kthread_should_stop()) {
set_current_state(TASK_RUNNING);
spin_unlock_bh(&pool->sp_lock);
- return -EINTR;
+ return ERR_PTR(-EINTR);
}
add_wait_queue(&rqstp->rq_wait, &wait);
@@ -684,48 +670,58 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
spin_unlock_bh(&pool->sp_lock);
dprintk("svc: server %p, no data yet\n", rqstp);
if (signalled() || kthread_should_stop())
- return -EINTR;
+ return ERR_PTR(-EINTR);
else
- return -EAGAIN;
+ return ERR_PTR(-EAGAIN);
}
}
spin_unlock_bh(&pool->sp_lock);
+ return xprt;
+}
+
+void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
+{
+ spin_lock_bh(&serv->sv_lock);
+ set_bit(XPT_TEMP, &newxpt->xpt_flags);
+ list_add(&newxpt->xpt_list, &serv->sv_tempsocks);
+ serv->sv_tmpcnt++;
+ if (serv->sv_temptimer.function == NULL) {
+ /* setup timer to age temp transports */
+ setup_timer(&serv->sv_temptimer, svc_age_temp_xprts,
+ (unsigned long)serv);
+ mod_timer(&serv->sv_temptimer,
+ jiffies + svc_conn_age_period * HZ);
+ }
+ spin_unlock_bh(&serv->sv_lock);
+ svc_xprt_received(newxpt);
+}
+
+static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
+{
+ struct svc_serv *serv = rqstp->rq_server;
+ int len = 0;
- len = 0;
if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {
dprintk("svc_recv: found XPT_CLOSE\n");
svc_delete_xprt(xprt);
/* Leave XPT_BUSY set on the dead xprt: */
- goto out;
+ return 0;
}
if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
struct svc_xprt *newxpt;
+ /*
+ * We know this module_get will succeed because the
+ * listener holds a reference too
+ */
+ __module_get(xprt->xpt_class->xcl_owner);
+ svc_check_conn_limits(xprt->xpt_server);
newxpt = xprt->xpt_ops->xpo_accept(xprt);
- if (newxpt) {
- /*
- * We know this module_get will succeed because the
- * listener holds a reference too
- */
- __module_get(newxpt->xpt_class->xcl_owner);
- svc_check_conn_limits(xprt->xpt_server);
- spin_lock_bh(&serv->sv_lock);
- set_bit(XPT_TEMP, &newxpt->xpt_flags);
- list_add(&newxpt->xpt_list, &serv->sv_tempsocks);
- serv->sv_tmpcnt++;
- if (serv->sv_temptimer.function == NULL) {
- /* setup timer to age temp transports */
- setup_timer(&serv->sv_temptimer,
- svc_age_temp_xprts,
- (unsigned long)serv);
- mod_timer(&serv->sv_temptimer,
- jiffies + svc_conn_age_period * HZ);
- }
- spin_unlock_bh(&serv->sv_lock);
- svc_xprt_received(newxpt);
- }
+ if (newxpt)
+ svc_add_new_temp_xprt(serv, newxpt);
} else if (xprt->xpt_ops->xpo_has_wspace(xprt)) {
+ /* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
- rqstp, pool->sp_id, xprt,
+ rqstp, rqstp->rq_pool->sp_id, xprt,
atomic_read(&xprt->xpt_ref.refcount));
rqstp->rq_deferred = svc_deferred_dequeue(xprt);
if (rqstp->rq_deferred)
@@ -736,10 +732,51 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
}
+ /* clear XPT_BUSY: */
svc_xprt_received(xprt);
+ return len;
+}
+
+/*
+ * Receive the next request on any transport. This code is carefully
+ * organised not to touch any cachelines in the shared svc_serv
+ * structure, only cachelines in the local svc_pool.
+ */
+int svc_recv(struct svc_rqst *rqstp, long timeout)
+{
+ struct svc_xprt *xprt = NULL;
+ struct svc_serv *serv = rqstp->rq_server;
+ int len, err;
+
+ dprintk("svc: server %p waiting for data (to = %ld)\n",
+ rqstp, timeout);
+
+ if (rqstp->rq_xprt)
+ printk(KERN_ERR
+ "svc_recv: service %p, transport not NULL!\n",
+ rqstp);
+ if (waitqueue_active(&rqstp->rq_wait))
+ printk(KERN_ERR
+ "svc_recv: service %p, wait queue active!\n",
+ rqstp);
+
+ err = svc_alloc_arg(rqstp);
+ if (err)
+ return err;
+
+ try_to_freeze();
+ cond_resched();
+ if (signalled() || kthread_should_stop())
+ return -EINTR;
+
+ xprt = svc_get_next_xprt(rqstp, timeout);
+ if (IS_ERR(xprt))
+ return PTR_ERR(xprt);
+
+ len = svc_handle_xprt(rqstp, xprt);
/* No data, incomplete (TCP) read, or accept() */
- if (len == 0 || len == -EAGAIN)
+ if (len <= 0)
goto out;
clear_bit(XPT_OLD, &xprt->xpt_flags);
@@ -917,16 +954,18 @@ void svc_close_xprt(struct svc_xprt *xprt)
}
EXPORT_SYMBOL_GPL(svc_close_xprt);
-static void svc_close_list(struct list_head *xprt_list, struct net *net)
+static void svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net)
{
struct svc_xprt *xprt;
+ spin_lock(&serv->sv_lock);
list_for_each_entry(xprt, xprt_list, xpt_list) {
if (xprt->xpt_net != net)
continue;
set_bit(XPT_CLOSE, &xprt->xpt_flags);
set_bit(XPT_BUSY, &xprt->xpt_flags);
}
+ spin_unlock(&serv->sv_lock);
}
static void svc_clear_pools(struct svc_serv *serv, struct net *net)
@@ -949,24 +988,28 @@ static void svc_clear_pools(struct svc_serv *serv, struct net *net)
}
}
-static void svc_clear_list(struct list_head *xprt_list, struct net *net)
+static void svc_clear_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net)
{
struct svc_xprt *xprt;
struct svc_xprt *tmp;
+ LIST_HEAD(victims);
+ spin_lock(&serv->sv_lock);
list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) {
if (xprt->xpt_net != net)
continue;
- svc_delete_xprt(xprt);
+ list_move(&xprt->xpt_list, &victims);
}
- list_for_each_entry(xprt, xprt_list, xpt_list)
- BUG_ON(xprt->xpt_net == net);
+ spin_unlock(&serv->sv_lock);
+
+ list_for_each_entry_safe(xprt, tmp, &victims, xpt_list)
+ svc_delete_xprt(xprt);
}
void svc_close_net(struct svc_serv *serv, struct net *net)
{
- svc_close_list(&serv->sv_tempsocks, net);
- svc_close_list(&serv->sv_permsocks, net);
+ svc_close_list(serv, &serv->sv_tempsocks, net);
+ svc_close_list(serv, &serv->sv_permsocks, net);
svc_clear_pools(serv, net);
/*
@@ -974,8 +1017,8 @@ void svc_close_net(struct svc_serv *serv, struct net *net)
* svc_xprt_enqueue will not add new entries without taking the
* sp_lock and checking XPT_BUSY.
*/
- svc_clear_list(&serv->sv_tempsocks, net);
- svc_clear_list(&serv->sv_permsocks, net);
+ svc_clear_list(serv, &serv->sv_tempsocks, net);
+ svc_clear_list(serv, &serv->sv_permsocks, net);
}
/*
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 998aa8c1807c..03827cef1fa7 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -59,7 +59,7 @@
static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
- int *errp, int flags);
+ int flags);
static void svc_udp_data_ready(struct sock *, int);
static int svc_udp_recvfrom(struct svc_rqst *);
static int svc_udp_sendto(struct svc_rqst *);
@@ -305,57 +305,6 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
return len;
}
-/**
- * svc_sock_names - construct a list of listener names in a string
- * @serv: pointer to RPC service
- * @buf: pointer to a buffer to fill in with socket names
- * @buflen: size of the buffer to be filled
- * @toclose: pointer to '\0'-terminated C string containing the name
- * of a listener to be closed
- *
- * Fills in @buf with a '\n'-separated list of names of listener
- * sockets. If @toclose is not NULL, the socket named by @toclose
- * is closed, and is not included in the output list.
- *
- * Returns positive length of the socket name string, or a negative
- * errno value on error.
- */
-int svc_sock_names(struct svc_serv *serv, char *buf, const size_t buflen,
- const char *toclose)
-{
- struct svc_sock *svsk, *closesk = NULL;
- int len = 0;
-
- if (!serv)
- return 0;
-
- spin_lock_bh(&serv->sv_lock);
- list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) {
- int onelen = svc_one_sock_name(svsk, buf + len, buflen - len);
- if (onelen < 0) {
- len = onelen;
- break;
- }
- if (toclose && strcmp(toclose, buf + len) == 0) {
- closesk = svsk;
- svc_xprt_get(&closesk->sk_xprt);
- } else
- len += onelen;
- }
- spin_unlock_bh(&serv->sv_lock);
-
- if (closesk) {
- /* Should unregister with portmap, but you cannot
- * unregister just one protocol...
- */
- svc_close_xprt(&closesk->sk_xprt);
- svc_xprt_put(&closesk->sk_xprt);
- } else if (toclose)
- return -ENOENT;
- return len;
-}
-EXPORT_SYMBOL_GPL(svc_sock_names);
-
/*
* Check input queue length
*/
@@ -598,11 +547,9 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
dprintk("svc: recvfrom returned error %d\n", -err);
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
}
- return -EAGAIN;
+ return 0;
}
len = svc_addr_len(svc_addr(rqstp));
- if (len == 0)
- return -EAFNOSUPPORT;
rqstp->rq_addrlen = len;
if (skb->tstamp.tv64 == 0) {
skb->tstamp = ktime_get_real();
@@ -620,10 +567,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
if (!svc_udp_get_dest_address(rqstp, cmh)) {
net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n",
cmh->cmsg_level, cmh->cmsg_type);
-out_free:
- trace_kfree_skb(skb, svc_udp_recvfrom);
- skb_free_datagram_locked(svsk->sk_sk, skb);
- return 0;
+ goto out_free;
}
rqstp->rq_daddrlen = svc_addr_len(svc_daddr(rqstp));
@@ -662,6 +606,10 @@ out_free:
serv->sv_stats->netudpcnt++;
return len;
+out_free:
+ trace_kfree_skb(skb, svc_udp_recvfrom);
+ skb_free_datagram_locked(svsk->sk_sk, skb);
+ return 0;
}
static int
@@ -900,8 +848,9 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
*/
newsock->sk->sk_sndtimeo = HZ*30;
- if (!(newsvsk = svc_setup_socket(serv, newsock, &err,
- (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY))))
+ newsvsk = svc_setup_socket(serv, newsock,
+ (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY));
+ if (IS_ERR(newsvsk))
goto failed;
svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);
err = kernel_getsockname(newsock, sin, &slen);
@@ -1174,13 +1123,13 @@ error:
if (len != -EAGAIN)
goto err_other;
dprintk("RPC: TCP recvfrom got EAGAIN\n");
- return -EAGAIN;
+ return 0;
err_other:
printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
svsk->sk_xprt.xpt_server->sv_name, -len);
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
err_noclose:
- return -EAGAIN; /* record not complete */
+ return 0; /* record not complete */
}
/*
@@ -1383,29 +1332,29 @@ EXPORT_SYMBOL_GPL(svc_sock_update_bufs);
*/
static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
struct socket *sock,
- int *errp, int flags)
+ int flags)
{
struct svc_sock *svsk;
struct sock *inet;
int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
+ int err = 0;
dprintk("svc: svc_setup_socket %p\n", sock);
- if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
- *errp = -ENOMEM;
- return NULL;
- }
+ svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
+ if (!svsk)
+ return ERR_PTR(-ENOMEM);
inet = sock->sk;
/* Register socket with portmapper */
- if (*errp >= 0 && pmap_register)
- *errp = svc_register(serv, sock_net(sock->sk), inet->sk_family,
+ if (pmap_register)
+ err = svc_register(serv, sock_net(sock->sk), inet->sk_family,
inet->sk_protocol,
ntohs(inet_sk(inet)->inet_sport));
- if (*errp < 0) {
+ if (err < 0) {
kfree(svsk);
- return NULL;
+ return ERR_PTR(err);
}
inet->sk_user_data = svsk;
@@ -1450,42 +1399,38 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,
int err = 0;
struct socket *so = sockfd_lookup(fd, &err);
struct svc_sock *svsk = NULL;
+ struct sockaddr_storage addr;
+ struct sockaddr *sin = (struct sockaddr *)&addr;
+ int salen;
if (!so)
return err;
+ err = -EAFNOSUPPORT;
if ((so->sk->sk_family != PF_INET) && (so->sk->sk_family != PF_INET6))
- err = -EAFNOSUPPORT;
- else if (so->sk->sk_protocol != IPPROTO_TCP &&
+ goto out;
+ err = -EPROTONOSUPPORT;
+ if (so->sk->sk_protocol != IPPROTO_TCP &&
so->sk->sk_protocol != IPPROTO_UDP)
- err = -EPROTONOSUPPORT;
- else if (so->state > SS_UNCONNECTED)
- err = -EISCONN;
- else {
- if (!try_module_get(THIS_MODULE))
- err = -ENOENT;
- else
- svsk = svc_setup_socket(serv, so, &err,
- SVC_SOCK_DEFAULTS);
- if (svsk) {
- struct sockaddr_storage addr;
- struct sockaddr *sin = (struct sockaddr *)&addr;
- int salen;
- if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0)
- svc_xprt_set_local(&svsk->sk_xprt, sin, salen);
- clear_bit(XPT_TEMP, &svsk->sk_xprt.xpt_flags);
- spin_lock_bh(&serv->sv_lock);
- list_add(&svsk->sk_xprt.xpt_list, &serv->sv_permsocks);
- spin_unlock_bh(&serv->sv_lock);
- svc_xprt_received(&svsk->sk_xprt);
- err = 0;
- } else
- module_put(THIS_MODULE);
- }
- if (err) {
- sockfd_put(so);
- return err;
+ goto out;
+ err = -EISCONN;
+ if (so->state > SS_UNCONNECTED)
+ goto out;
+ err = -ENOENT;
+ if (!try_module_get(THIS_MODULE))
+ goto out;
+ svsk = svc_setup_socket(serv, so, SVC_SOCK_DEFAULTS);
+ if (IS_ERR(svsk)) {
+ module_put(THIS_MODULE);
+ err = PTR_ERR(svsk);
+ goto out;
}
+ if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0)
+ svc_xprt_set_local(&svsk->sk_xprt, sin, salen);
+ svc_add_new_perm_xprt(serv, &svsk->sk_xprt);
return svc_one_sock_name(svsk, name_return, len);
+out:
+ sockfd_put(so);
+ return err;
}
EXPORT_SYMBOL_GPL(svc_addsock);
@@ -1563,11 +1508,13 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
goto bummer;
}
- if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) {
- svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen);
- return (struct svc_xprt *)svsk;
+ svsk = svc_setup_socket(serv, sock, flags);
+ if (IS_ERR(svsk)) {
+ error = PTR_ERR(svsk);
+ goto bummer;
}
-
+ svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen);
+ return (struct svc_xprt *)svsk;
bummer:
dprintk("svc: svc_create_socket error = %d\n", -error);
sock_release(sock);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 0afba1b4b656..08f50afd5f2a 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -730,19 +730,24 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len)
if (xdr->nwords == 0)
return 0;
- if (nwords > xdr->nwords) {
- nwords = xdr->nwords;
- len = nwords << 2;
- }
/* Realign pages to current pointer position */
iov = buf->head;
- if (iov->iov_len > cur)
+ if (iov->iov_len > cur) {
xdr_shrink_bufhead(buf, iov->iov_len - cur);
+ xdr->nwords = XDR_QUADLEN(buf->len - cur);
+ }
- /* Truncate page data and move it into the tail */
- if (buf->page_len > len)
+ if (nwords > xdr->nwords) {
+ nwords = xdr->nwords;
+ len = nwords << 2;
+ }
+ if (buf->page_len <= len)
+ len = buf->page_len;
+ else if (nwords < xdr->nwords) {
+ /* Truncate page data and move it into the tail */
xdr_shrink_pagelen(buf, buf->page_len - len);
- xdr->nwords = XDR_QUADLEN(buf->len - cur);
+ xdr->nwords = XDR_QUADLEN(buf->len - cur);
+ }
return len;
}
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 5d7f61d7559c..bd462a532acf 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -231,7 +231,7 @@ EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
static void xprt_clear_locked(struct rpc_xprt *xprt)
{
xprt->snd_task = NULL;
- if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state) || xprt->shutdown) {
+ if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
smp_mb__before_clear_bit();
clear_bit(XPRT_LOCKED, &xprt->state);
smp_mb__after_clear_bit();
@@ -504,9 +504,6 @@ EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space);
*/
void xprt_write_space(struct rpc_xprt *xprt)
{
- if (unlikely(xprt->shutdown))
- return;
-
spin_lock_bh(&xprt->transport_lock);
if (xprt->snd_task) {
dprintk("RPC: write space: waking waiting task on "
@@ -679,7 +676,7 @@ xprt_init_autodisconnect(unsigned long data)
struct rpc_xprt *xprt = (struct rpc_xprt *)data;
spin_lock(&xprt->transport_lock);
- if (!list_empty(&xprt->recv) || xprt->shutdown)
+ if (!list_empty(&xprt->recv))
goto out_abort;
if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
goto out_abort;
@@ -1262,7 +1259,6 @@ out:
static void xprt_destroy(struct rpc_xprt *xprt)
{
dprintk("RPC: destroying transport %p\n", xprt);
- xprt->shutdown = 1;
del_timer_sync(&xprt->timer);
rpc_destroy_wait_queue(&xprt->binding);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 73b428bef598..62e4f9bcc387 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -578,10 +578,6 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id, size_t client_ird)
list_add_tail(&newxprt->sc_accept_q, &listen_xprt->sc_accept_q);
spin_unlock_bh(&listen_xprt->sc_lock);
- /*
- * Can't use svc_xprt_received here because we are not on a
- * rqstp thread
- */
set_bit(XPT_CONN, &listen_xprt->sc_xprt.xpt_flags);
svc_xprt_enqueue(&listen_xprt->sc_xprt);
}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 5d9202dc7cb1..c9aa7a35f3bf 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -199,21 +199,15 @@ xprt_rdma_connect_worker(struct work_struct *work)
struct rpc_xprt *xprt = &r_xprt->xprt;
int rc = 0;
- if (!xprt->shutdown) {
- current->flags |= PF_FSTRANS;
- xprt_clear_connected(xprt);
-
- dprintk("RPC: %s: %sconnect\n", __func__,
- r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
- rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
- if (rc)
- goto out;
- }
- goto out_clear;
+ current->flags |= PF_FSTRANS;
+ xprt_clear_connected(xprt);
+
+ dprintk("RPC: %s: %sconnect\n", __func__,
+ r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
+ rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
+ if (rc)
+ xprt_wake_pending_tasks(xprt, rc);
-out:
- xprt_wake_pending_tasks(xprt, rc);
-out_clear:
dprintk("RPC: %s: exit\n", __func__);
xprt_clear_connecting(xprt);
current->flags &= ~PF_FSTRANS;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index a35b8e52e551..aaaadfbe36e9 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -917,9 +917,6 @@ static void xs_local_data_ready(struct sock *sk, int len)
if (skb == NULL)
goto out;
- if (xprt->shutdown)
- goto dropit;
-
repsize = skb->len - sizeof(rpc_fraghdr);
if (repsize < 4) {
dprintk("RPC: impossible RPC reply size %d\n", repsize);
@@ -981,9 +978,6 @@ static void xs_udp_data_ready(struct sock *sk, int len)
if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL)
goto out;
- if (xprt->shutdown)
- goto dropit;
-
repsize = skb->len - sizeof(struct udphdr);
if (repsize < 4) {
dprintk("RPC: impossible RPC reply size %d!\n", repsize);
@@ -1025,6 +1019,16 @@ static void xs_udp_data_ready(struct sock *sk, int len)
read_unlock_bh(&sk->sk_callback_lock);
}
+/*
+ * Helper function to force a TCP close if the server is sending
+ * junk and/or it has put us in CLOSE_WAIT
+ */
+static void xs_tcp_force_close(struct rpc_xprt *xprt)
+{
+ set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
+ xprt_force_disconnect(xprt);
+}
+
static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -1051,7 +1055,7 @@ static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_rea
/* Sanity check of the record length */
if (unlikely(transport->tcp_reclen < 8)) {
dprintk("RPC: invalid TCP record fragment length\n");
- xprt_force_disconnect(xprt);
+ xs_tcp_force_close(xprt);
return;
}
dprintk("RPC: reading TCP record fragment of length %d\n",
@@ -1132,7 +1136,7 @@ static inline void xs_tcp_read_calldir(struct sock_xprt *transport,
break;
default:
dprintk("RPC: invalid request message type\n");
- xprt_force_disconnect(&transport->xprt);
+ xs_tcp_force_close(&transport->xprt);
}
xs_tcp_check_fraghdr(transport);
}
@@ -1402,9 +1406,6 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes)
read_lock_bh(&sk->sk_callback_lock);
if (!(xprt = xprt_from_sock(sk)))
goto out;
- if (xprt->shutdown)
- goto out;
-
/* Any data means we had a useful conversation, so
* the we don't need to delay the next reconnect
*/
@@ -1455,6 +1456,8 @@ static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
static void xs_sock_mark_closed(struct rpc_xprt *xprt)
{
smp_mb__before_clear_bit();
+ clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+ clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
clear_bit(XPRT_CLOSING, &xprt->state);
smp_mb__after_clear_bit();
@@ -1512,8 +1515,8 @@ static void xs_tcp_state_change(struct sock *sk)
break;
case TCP_CLOSE_WAIT:
/* The server initiated a shutdown of the socket */
- xprt_force_disconnect(xprt);
xprt->connect_cookie++;
+ xs_tcp_force_close(xprt);
case TCP_CLOSING:
/*
* If the server closed down the connection, make sure that
@@ -1889,9 +1892,6 @@ static void xs_local_setup_socket(struct work_struct *work)
struct socket *sock;
int status = -EIO;
- if (xprt->shutdown)
- goto out;
-
current->flags |= PF_FSTRANS;
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
@@ -2008,9 +2008,6 @@ static void xs_udp_setup_socket(struct work_struct *work)
struct socket *sock = transport->sock;
int status = -EIO;
- if (xprt->shutdown)
- goto out;
-
current->flags |= PF_FSTRANS;
/* Start by resetting any existing state */
@@ -2156,9 +2153,6 @@ static void xs_tcp_setup_socket(struct work_struct *work)
struct rpc_xprt *xprt = &transport->xprt;
int status = -EIO;
- if (xprt->shutdown)
- goto out;
-
current->flags |= PF_FSTRANS;
if (!sock) {
@@ -2199,8 +2193,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
/* We're probably in TIME_WAIT. Get rid of existing socket,
* and retry
*/
- set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
- xprt_force_disconnect(xprt);
+ xs_tcp_force_close(xprt);
break;
case -ECONNREFUSED:
case -ECONNRESET:
@@ -2528,6 +2521,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
static struct rpc_xprt_ops bc_tcp_ops = {
.reserve_xprt = xprt_reserve_xprt,
.release_xprt = xprt_release_xprt,
+ .alloc_slot = xprt_alloc_slot,
.rpcbind = xs_local_rpcbind,
.buf_alloc = bc_malloc,
.buf_free = bc_free,