diff options
author | Jiri Kosina <jkosina@suse.cz> | 2013-09-04 10:49:39 +0200 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2013-09-04 10:49:57 +0200 |
commit | efd15f5f4ff63f6ac5d80850686e3d2cc8c4481b (patch) | |
tree | 40024adbe77a3d660662e639fd765097133d648c /net | |
parent | 6c2794a2984f4c17a58117a68703cc7640f01c5a (diff) | |
parent | 58c59bc997d86593f0bea41845885917cf304d22 (diff) | |
download | lwn-efd15f5f4ff63f6ac5d80850686e3d2cc8c4481b.tar.gz lwn-efd15f5f4ff63f6ac5d80850686e3d2cc8c4481b.zip |
Merge branch 'master' into for-3.12/upstream
Sync with Linus' tree to be able to apply fixup patch on top
of 9d9a04ee75 ("HID: apple: Add support for the 2013 Macbook Air")
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Diffstat (limited to 'net')
372 files changed, 12121 insertions, 8678 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 9424f3718ea7..2fb2d88e8c2e 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -341,7 +341,7 @@ static void __vlan_device_event(struct net_device *dev, unsigned long event) static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct vlan_group *grp; struct vlan_info *vlan_info; int i, flgs; diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 8a15eaadc4bd..4a78c4de9f20 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -9,7 +9,7 @@ bool vlan_do_receive(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; __be16 vlan_proto = skb->vlan_proto; - u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; + u16 vlan_id = vlan_tx_tag_get_id(skb); struct net_device *vlan_dev; struct vlan_pcpu_stats *rx_stats; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 3a8c8fd63c88..1cd3d2a406f5 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -73,6 +73,8 @@ vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb) { struct vlan_priority_tci_mapping *mp; + smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */ + mp = vlan_dev_priv(dev)->egress_priority_map[(skb->priority & 0xF)]; while (mp) { if (mp->priority == skb->priority) { @@ -249,6 +251,11 @@ int vlan_dev_set_egress_priority(const struct net_device *dev, np->next = mp; np->priority = skb_prio; np->vlan_qos = vlan_qos; + /* Before inserting this element in hash table, make sure all its fields + * are committed to memory. + * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask() + */ + smp_wmb(); vlan->egress_priority_map[skb_prio & 0xF] = np; if (vlan_qos) vlan->nr_egress_mappings++; diff --git a/net/9p/client.c b/net/9p/client.c index addc116cecf0..8b93cae2d11d 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -127,7 +127,7 @@ static int parse_opts(char *opts, struct p9_client *clnt) char *s; int ret = 0; - clnt->proto_version = p9_proto_2000u; + clnt->proto_version = p9_proto_2000L; clnt->msize = 8192; if (!opts) @@ -204,6 +204,17 @@ free_and_return: return ret; } +struct p9_fcall *p9_fcall_alloc(int alloc_msize) +{ + struct p9_fcall *fc; + fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS); + if (!fc) + return NULL; + fc->capacity = alloc_msize; + fc->sdata = (char *) fc + sizeof(struct p9_fcall); + return fc; +} + /** * p9_tag_alloc - lookup/allocate a request by tag * @c: client session to lookup tag within @@ -256,39 +267,36 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size) col = tag % P9_ROW_MAXTAG; req = &c->reqs[row][col]; - if (!req->tc) { + if (!req->wq) { req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS); - if (!req->wq) { - pr_err("Couldn't grow tag array\n"); - return ERR_PTR(-ENOMEM); - } + if (!req->wq) + goto grow_failed; init_waitqueue_head(req->wq); - req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, - GFP_NOFS); - req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, - GFP_NOFS); - if ((!req->tc) || (!req->rc)) { - pr_err("Couldn't grow tag array\n"); - kfree(req->tc); - kfree(req->rc); - kfree(req->wq); - req->tc = req->rc = NULL; - req->wq = NULL; - return ERR_PTR(-ENOMEM); - } - req->tc->capacity = alloc_msize; - req->rc->capacity = alloc_msize; - req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); - req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); } + if (!req->tc) + req->tc = p9_fcall_alloc(alloc_msize); + if (!req->rc) + req->rc = p9_fcall_alloc(alloc_msize); + if (!req->tc || !req->rc) + goto grow_failed; + p9pdu_reset(req->tc); p9pdu_reset(req->rc); req->tc->tag = tag-1; req->status = REQ_STATUS_ALLOC; - return &c->reqs[row][col]; + return req; + +grow_failed: + pr_err("Couldn't grow tag array\n"); + kfree(req->tc); + kfree(req->rc); + kfree(req->wq); + req->tc = req->rc = NULL; + req->wq = NULL; + return ERR_PTR(-ENOMEM); } /** @@ -648,12 +656,20 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) return PTR_ERR(req); - /* if we haven't received a response for oldreq, - remove it from the list. */ + /* + * if we haven't received a response for oldreq, + * remove it from the list, and notify the transport + * layer that the reply will never arrive. + */ spin_lock(&c->lock); - if (oldreq->status == REQ_STATUS_FLSH) + if (oldreq->status == REQ_STATUS_FLSH) { list_del(&oldreq->req_list); - spin_unlock(&c->lock); + spin_unlock(&c->lock); + if (c->trans_mod->cancelled) + c->trans_mod->cancelled(c, req); + } else { + spin_unlock(&c->lock); + } p9_free_req(c, req); return 0; @@ -996,6 +1012,9 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) goto destroy_tagpool; if (!clnt->trans_mod) + clnt->trans_mod = v9fs_get_trans_by_name("virtio"); + + if (!clnt->trans_mod) clnt->trans_mod = v9fs_get_default_trans(); if (clnt->trans_mod == NULL) { diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index de8df957867d..2ee3879161b1 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -24,11 +24,11 @@ */ void p9_release_pages(struct page **pages, int nr_pages) { - int i = 0; - while (pages[i] && nr_pages--) { - put_page(pages[i]); - i++; - } + int i; + + for (i = 0; i < nr_pages; i++) + if (pages[i]) + put_page(pages[i]); } EXPORT_SYMBOL(p9_release_pages); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 02efb25c2957..3ffda1b3799b 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -63,6 +63,7 @@ struct p9_fd_opts { int rfd; int wfd; u16 port; + int privport; }; /** @@ -87,12 +88,15 @@ struct p9_trans_fd { enum { /* Options that take integer arguments */ Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, + /* Options that take no arguments */ + Opt_privport, }; static const match_table_t tokens = { {Opt_port, "port=%u"}, {Opt_rfdno, "rfdno=%u"}, {Opt_wfdno, "wfdno=%u"}, + {Opt_privport, "privport"}, {Opt_err, NULL}, }; @@ -161,6 +165,9 @@ static DEFINE_SPINLOCK(p9_poll_lock); static LIST_HEAD(p9_poll_pending_list); static DECLARE_WORK(p9_poll_work, p9_poll_workfn); +static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT; +static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT; + static void p9_mux_poll_stop(struct p9_conn *m) { unsigned long flags; @@ -741,7 +748,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) if (!*p) continue; token = match_token(p, tokens, args); - if (token != Opt_err) { + if ((token != Opt_err) && (token != Opt_privport)) { r = match_int(&args[0], &option); if (r < 0) { p9_debug(P9_DEBUG_ERROR, @@ -759,6 +766,9 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) case Opt_wfdno: opts->wfd = option; break; + case Opt_privport: + opts->privport = 1; + break; default: continue; } @@ -898,6 +908,24 @@ static inline int valid_ipaddr4(const char *buf) return 0; } +static int p9_bind_privport(struct socket *sock) +{ + struct sockaddr_in cl; + int port, err = -EINVAL; + + memset(&cl, 0, sizeof(cl)); + cl.sin_family = AF_INET; + cl.sin_addr.s_addr = INADDR_ANY; + for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) { + cl.sin_port = htons((ushort)port); + err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl)); + if (err != -EADDRINUSE) + break; + } + return err; +} + + static int p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) { @@ -926,6 +954,16 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) return err; } + if (opts.privport) { + err = p9_bind_privport(csocket); + if (err < 0) { + pr_err("%s (%d): problem binding to privport\n", + __func__, task_pid_nr(current)); + sock_release(csocket); + return err; + } + } + err = csocket->ops->connect(csocket, (struct sockaddr *)&sin_server, sizeof(struct sockaddr_in), 0); diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 2c69ddd691a1..928f2bb9bf8d 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -57,9 +57,7 @@ #define P9_RDMA_IRD 0 #define P9_RDMA_ORD 0 #define P9_RDMA_TIMEOUT 30000 /* 30 seconds */ -#define P9_RDMA_MAXSIZE (4*4096) /* Min SGE is 4, so we can - * safely advertise a maxsize - * of 64k */ +#define P9_RDMA_MAXSIZE (1024*1024) /* 1MB */ /** * struct p9_trans_rdma - RDMA transport instance @@ -75,7 +73,9 @@ * @sq_depth: The depth of the Send Queue * @sq_sem: Semaphore for the SQ * @rq_depth: The depth of the Receive Queue. - * @rq_count: Count of requests in the Receive Queue. + * @rq_sem: Semaphore for the RQ + * @excess_rc : Amount of posted Receive Contexts without a pending request. + * See rdma_request() * @addr: The remote peer's address * @req_lock: Protects the active request list * @cm_done: Completion event for connection management tracking @@ -100,7 +100,8 @@ struct p9_trans_rdma { int sq_depth; struct semaphore sq_sem; int rq_depth; - atomic_t rq_count; + struct semaphore rq_sem; + atomic_t excess_rc; struct sockaddr_in addr; spinlock_t req_lock; @@ -296,6 +297,13 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, if (!req) goto err_out; + /* Check that we have not yet received a reply for this request. + */ + if (unlikely(req->rc)) { + pr_err("Duplicate reply for request %d", tag); + goto err_out; + } + req->rc = c->rc; req->status = REQ_STATUS_RCVD; p9_client_cb(client, req); @@ -336,8 +344,8 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context) switch (c->wc_op) { case IB_WC_RECV: - atomic_dec(&rdma->rq_count); handle_recv(client, rdma, c, wc.status, wc.byte_len); + up(&rdma->rq_sem); break; case IB_WC_SEND: @@ -421,32 +429,33 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) struct p9_rdma_context *c = NULL; struct p9_rdma_context *rpl_context = NULL; + /* When an error occurs between posting the recv and the send, + * there will be a receive context posted without a pending request. + * Since there is no way to "un-post" it, we remember it and skip + * post_recv() for the next request. + * So here, + * see if we are this `next request' and need to absorb an excess rc. + * If yes, then drop and free our own, and do not recv_post(). + **/ + if (unlikely(atomic_read(&rdma->excess_rc) > 0)) { + if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) { + /* Got one ! */ + kfree(req->rc); + req->rc = NULL; + goto dont_need_post_recv; + } else { + /* We raced and lost. */ + atomic_inc(&rdma->excess_rc); + } + } + /* Allocate an fcall for the reply */ rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS); if (!rpl_context) { err = -ENOMEM; - goto err_close; - } - - /* - * If the request has a buffer, steal it, otherwise - * allocate a new one. Typically, requests should already - * have receive buffers allocated and just swap them around - */ - if (!req->rc) { - req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize, - GFP_NOFS); - if (req->rc) { - req->rc->sdata = (char *) req->rc + - sizeof(struct p9_fcall); - req->rc->capacity = client->msize; - } + goto recv_error; } rpl_context->rc = req->rc; - if (!rpl_context->rc) { - err = -ENOMEM; - goto err_free2; - } /* * Post a receive buffer for this request. We need to ensure @@ -455,29 +464,35 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) * outstanding request, so we must keep a count to avoid * overflowing the RQ. */ - if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) { - err = post_recv(client, rpl_context); - if (err) - goto err_free1; - } else - atomic_dec(&rdma->rq_count); + if (down_interruptible(&rdma->rq_sem)) { + err = -EINTR; + goto recv_error; + } + err = post_recv(client, rpl_context); + if (err) { + p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n"); + goto recv_error; + } /* remove posted receive buffer from request structure */ req->rc = NULL; +dont_need_post_recv: /* Post the request */ c = kmalloc(sizeof *c, GFP_NOFS); if (!c) { err = -ENOMEM; - goto err_free1; + goto send_error; } c->req = req; c->busa = ib_dma_map_single(rdma->cm_id->device, c->req->tc->sdata, c->req->tc->size, DMA_TO_DEVICE); - if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) - goto error; + if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) { + err = -EIO; + goto send_error; + } sge.addr = c->busa; sge.length = c->req->tc->size; @@ -491,22 +506,32 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) wr.sg_list = &sge; wr.num_sge = 1; - if (down_interruptible(&rdma->sq_sem)) - goto error; + if (down_interruptible(&rdma->sq_sem)) { + err = -EINTR; + goto send_error; + } - return ib_post_send(rdma->qp, &wr, &bad_wr); + err = ib_post_send(rdma->qp, &wr, &bad_wr); + if (err) + goto send_error; - error: + /* Success */ + return 0; + + /* Handle errors that happened during or while preparing the send: */ + send_error: kfree(c); - kfree(rpl_context->rc); - kfree(rpl_context); - p9_debug(P9_DEBUG_ERROR, "EIO\n"); - return -EIO; - err_free1: - kfree(rpl_context->rc); - err_free2: + p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err); + + /* Ach. + * We did recv_post(), but not send. We have one recv_post in excess. + */ + atomic_inc(&rdma->excess_rc); + return err; + + /* Handle errors that happened during or while preparing post_recv(): */ + recv_error: kfree(rpl_context); - err_close: spin_lock_irqsave(&rdma->req_lock, flags); if (rdma->state < P9_RDMA_CLOSING) { rdma->state = P9_RDMA_CLOSING; @@ -551,7 +576,8 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts) spin_lock_init(&rdma->req_lock); init_completion(&rdma->cm_done); sema_init(&rdma->sq_sem, rdma->sq_depth); - atomic_set(&rdma->rq_count, 0); + sema_init(&rdma->rq_sem, rdma->rq_depth); + atomic_set(&rdma->excess_rc, 0); return rdma; } @@ -562,6 +588,17 @@ static int rdma_cancel(struct p9_client *client, struct p9_req_t *req) return 1; } +/* A request has been fully flushed without a reply. + * That means we have posted one buffer in excess. + */ +static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req) +{ + struct p9_trans_rdma *rdma = client->trans; + + atomic_inc(&rdma->excess_rc); + return 0; +} + /** * trans_create_rdma - Transport method for creating atransport instance * @client: client instance diff --git a/net/Kconfig b/net/Kconfig index 6dfe1c636a80..2b406608a1a4 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -219,6 +219,7 @@ source "net/batman-adv/Kconfig" source "net/openvswitch/Kconfig" source "net/vmw_vsock/Kconfig" source "net/netlink/Kconfig" +source "net/mpls/Kconfig" config RPS boolean @@ -243,6 +244,10 @@ config NETPRIO_CGROUP Cgroup subsystem for use in assigning processes to network priorities on a per-interface basis +config NET_RX_BUSY_POLL + boolean + default y + config BQL boolean depends on SYSFS @@ -260,6 +265,18 @@ config BPF_JIT packet sniffing (libpcap/tcpdump). Note : Admin should enable this feature changing /proc/sys/net/core/bpf_jit_enable +config NET_FLOW_LIMIT + boolean + depends on RPS + default y + ---help--- + The network stack has to drop packets when a receive processing CPU's + backlog reaches netdev_max_backlog. If a few out of many active flows + generate the vast majority of load, drop their traffic earlier to + maintain capacity for the other flows. This feature provides servers + with many clients some protection against DoS by a single (spoofed) + flow that greatly exceeds average workload. + menu "Network testing" config NET_PKTGEN diff --git a/net/Makefile b/net/Makefile index 091e7b04f301..9492e8cb64e9 100644 --- a/net/Makefile +++ b/net/Makefile @@ -70,3 +70,4 @@ obj-$(CONFIG_BATMAN_ADV) += batman-adv/ obj-$(CONFIG_NFC) += nfc/ obj-$(CONFIG_OPENVSWITCH) += openvswitch/ obj-$(CONFIG_VSOCKETS) += vmw_vsock/ +obj-$(CONFIG_NET_MPLS_GSO) += mpls/ diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 173a2e82f486..690356fa52b9 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -332,7 +332,7 @@ static void aarp_expire_timeout(unsigned long unused) static int aarp_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); int ct; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index ef12839a7cfe..7fee50d637f9 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -644,7 +644,7 @@ static inline void atalk_dev_down(struct net_device *dev) static int ddp_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/atm/clip.c b/net/atm/clip.c index 8ae3a7879335..8215f7cb170b 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -539,9 +539,9 @@ static int clip_create(int number) } static int clip_device_event(struct notifier_block *this, unsigned long event, - void *arg) + void *ptr) { - struct net_device *dev = arg; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; @@ -575,6 +575,7 @@ static int clip_inet_event(struct notifier_block *this, unsigned long event, void *ifa) { struct in_device *in_dev; + struct netdev_notifier_info info; in_dev = ((struct in_ifaddr *)ifa)->ifa_dev; /* @@ -583,7 +584,8 @@ static int clip_inet_event(struct notifier_block *this, unsigned long event, */ if (event != NETDEV_UP) return NOTIFY_DONE; - return clip_device_event(this, NETDEV_CHANGE, in_dev->dev); + netdev_notifier_info_init(&info, in_dev->dev); + return clip_device_event(this, NETDEV_CHANGE, &info); } static struct notifier_block clip_dev_notifier = { diff --git a/net/atm/mpc.c b/net/atm/mpc.c index d4cc1be5c364..3af12755cd04 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -998,14 +998,12 @@ int msg_to_mpoad(struct k_message *mesg, struct mpoa_client *mpc) } static int mpoa_event_listener(struct notifier_block *mpoa_notifier, - unsigned long event, void *dev_ptr) + unsigned long event, void *ptr) { - struct net_device *dev; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct mpoa_client *mpc; struct lec_priv *priv; - dev = dev_ptr; - if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index e277e38f736b..4b4d2b779ec1 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -111,9 +111,9 @@ again: * Handle device status changes. */ static int ax25_device_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { - struct net_device *dev = (struct net_device *)ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; @@ -1974,7 +1974,7 @@ static struct packet_type ax25_packet_type __read_mostly = { }; static struct notifier_block ax25_dev_notifier = { - .notifier_call =ax25_device_event, + .notifier_call = ax25_device_event, }; static int __init ax25_init(void) diff --git a/net/ax25/sysctl_net_ax25.c b/net/ax25/sysctl_net_ax25.c index d5744b752511..919a5ce47515 100644 --- a/net/ax25/sysctl_net_ax25.c +++ b/net/ax25/sysctl_net_ax25.c @@ -29,7 +29,7 @@ static int min_proto[1], max_proto[] = { AX25_PROTO_MAX }; static int min_ds_timeout[1], max_ds_timeout[] = {65535000}; #endif -static const ctl_table ax25_param_table[] = { +static const struct ctl_table ax25_param_table[] = { { .procname = "ip_default_mode", .maxlen = sizeof(int), diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index acbac2a9c62f..489bb36f1b94 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -32,7 +32,6 @@ batman-adv-y += icmp_socket.o batman-adv-y += main.o batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o batman-adv-y += originator.o -batman-adv-y += ring_buffer.o batman-adv-y += routing.o batman-adv-y += send.o batman-adv-y += soft-interface.o diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index f680ee101878..62da5278014a 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -19,7 +19,6 @@ #include "main.h" #include "translation-table.h" -#include "ring_buffer.h" #include "originator.h" #include "routing.h" #include "gateway_common.h" @@ -30,6 +29,49 @@ #include "network-coding.h" /** + * batadv_ring_buffer_set - update the ring buffer with the given value + * @lq_recv: pointer to the ring buffer + * @lq_index: index to store the value at + * @value: value to store in the ring buffer + */ +static void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, + uint8_t value) +{ + lq_recv[*lq_index] = value; + *lq_index = (*lq_index + 1) % BATADV_TQ_GLOBAL_WINDOW_SIZE; +} + +/** + * batadv_ring_buffer_set - compute the average of all non-zero values stored + * in the given ring buffer + * @lq_recv: pointer to the ring buffer + * + * Returns computed average value. + */ +static uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[]) +{ + const uint8_t *ptr; + uint16_t count = 0, i = 0, sum = 0; + + ptr = lq_recv; + + while (i < BATADV_TQ_GLOBAL_WINDOW_SIZE) { + if (*ptr != 0) { + count++; + sum += *ptr; + } + + i++; + ptr++; + } + + if (count == 0) + return 0; + + return (uint8_t)(sum / count); +} + +/* * batadv_dup_status - duplicate status * @BATADV_NO_DUP: the packet is a duplicate * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for the @@ -48,12 +90,11 @@ static struct batadv_neigh_node * batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, const uint8_t *neigh_addr, struct batadv_orig_node *orig_node, - struct batadv_orig_node *orig_neigh, __be32 seqno) + struct batadv_orig_node *orig_neigh) { struct batadv_neigh_node *neigh_node; - neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr, - ntohl(seqno)); + neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr); if (!neigh_node) goto out; @@ -428,18 +469,16 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, else skb_size = packet_len; - skb_size += ETH_HLEN + NET_IP_ALIGN; + skb_size += ETH_HLEN; - forw_packet_aggr->skb = dev_alloc_skb(skb_size); + forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size); if (!forw_packet_aggr->skb) { if (!own_packet) atomic_inc(&bat_priv->batman_queue_left); kfree(forw_packet_aggr); goto out; } - skb_reserve(forw_packet_aggr->skb, ETH_HLEN + NET_IP_ALIGN); - - INIT_HLIST_NODE(&forw_packet_aggr->list); + skb_reserve(forw_packet_aggr->skb, ETH_HLEN); skb_buff = skb_put(forw_packet_aggr->skb, packet_len); forw_packet_aggr->packet_len = packet_len; @@ -605,6 +644,41 @@ static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, if_incoming, 0, batadv_iv_ogm_fwd_send_time()); } +/** + * batadv_iv_ogm_slide_own_bcast_window - bitshift own OGM broadcast windows for + * the given interface + * @hard_iface: the interface for which the windows have to be shifted + */ +static void +batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) +{ + struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct hlist_head *head; + struct batadv_orig_node *orig_node; + unsigned long *word; + uint32_t i; + size_t word_index; + uint8_t *w; + + for (i = 0; i < hash->size; i++) { + head = &hash->table[i]; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + spin_lock_bh(&orig_node->ogm_cnt_lock); + word_index = hard_iface->if_num * BATADV_NUM_WORDS; + word = &(orig_node->bcast_own[word_index]); + + batadv_bit_get_packet(bat_priv, word, 1, 0); + w = &orig_node->bcast_own_sum[hard_iface->if_num]; + *w = bitmap_weight(word, BATADV_TQ_LOCAL_WINDOW_SIZE); + spin_unlock_bh(&orig_node->ogm_cnt_lock); + } + rcu_read_unlock(); + } +} + static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); @@ -649,7 +723,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) batadv_ogm_packet->gw_flags = BATADV_NO_FLAGS; } - batadv_slide_own_bcast_window(hard_iface); + batadv_iv_ogm_slide_own_bcast_window(hard_iface); batadv_iv_ogm_queue_add(bat_priv, hard_iface->bat_iv.ogm_buff, hard_iface->bat_iv.ogm_buff_len, hard_iface, 1, batadv_iv_ogm_emit_send_time(bat_priv)); @@ -685,7 +759,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, if (batadv_compare_eth(neigh_addr, ethhdr->h_source) && tmp_neigh_node->if_incoming == if_incoming && atomic_inc_not_zero(&tmp_neigh_node->refcount)) { - if (neigh_node) + if (WARN(neigh_node, "too many matching neigh_nodes")) batadv_neigh_node_free_ref(neigh_node); neigh_node = tmp_neigh_node; continue; @@ -711,8 +785,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, neigh_node = batadv_iv_ogm_neigh_new(if_incoming, ethhdr->h_source, - orig_node, orig_tmp, - batadv_ogm_packet->seqno); + orig_node, orig_tmp); batadv_orig_node_free_ref(orig_tmp); if (!neigh_node) @@ -844,8 +917,7 @@ static int batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, neigh_node = batadv_iv_ogm_neigh_new(if_incoming, orig_neigh_node->orig, orig_neigh_node, - orig_neigh_node, - batadv_ogm_packet->seqno); + orig_neigh_node); if (!neigh_node) goto out; @@ -1013,7 +1085,7 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, struct batadv_neigh_node *orig_neigh_router = NULL; int has_directlink_flag; int is_my_addr = 0, is_my_orig = 0, is_my_oldorig = 0; - int is_broadcast = 0, is_bidirect; + int is_bidirect; bool is_single_hop_neigh = false; bool is_from_best_next_hop = false; int sameseq, similar_ttl; @@ -1077,19 +1149,9 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, if (batadv_compare_eth(batadv_ogm_packet->prev_sender, hard_iface->net_dev->dev_addr)) is_my_oldorig = 1; - - if (is_broadcast_ether_addr(ethhdr->h_source)) - is_broadcast = 1; } rcu_read_unlock(); - if (batadv_ogm_packet->header.version != BATADV_COMPAT_VERSION) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Drop packet: incompatible batman version (%i)\n", - batadv_ogm_packet->header.version); - return; - } - if (is_my_addr) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: received my own broadcast (sender: %pM)\n", @@ -1097,13 +1159,6 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr, return; } - if (is_broadcast) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Drop packet: ignoring all packets with broadcast source addr (sender: %pM)\n", - ethhdr->h_source); - return; - } - if (is_my_orig) { unsigned long *word; int offset; @@ -1312,7 +1367,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, skb->len + ETH_HLEN); packet_len = skb_headlen(skb); - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); packet_buff = skb->data; batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff; diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index de27b3175cfd..e14531f1ce1c 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -180,7 +180,7 @@ static struct batadv_bla_claim */ static struct batadv_bla_backbone_gw * batadv_backbone_hash_find(struct batadv_priv *bat_priv, - uint8_t *addr, short vid) + uint8_t *addr, unsigned short vid) { struct batadv_hashtable *hash = bat_priv->bla.backbone_hash; struct hlist_head *head; @@ -257,7 +257,7 @@ batadv_bla_del_backbone_claims(struct batadv_bla_backbone_gw *backbone_gw) * @claimtype: the type of the claim (CLAIM, UNCLAIM, ANNOUNCE, ...) */ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, - short vid, int claimtype) + unsigned short vid, int claimtype) { struct sk_buff *skb; struct ethhdr *ethhdr; @@ -307,7 +307,8 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, */ memcpy(ethhdr->h_source, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_send_claim(): CLAIM %pM on vid %d\n", mac, vid); + "bla_send_claim(): CLAIM %pM on vid %d\n", mac, + BATADV_PRINT_VID(vid)); break; case BATADV_CLAIM_TYPE_UNCLAIM: /* unclaim frame @@ -316,7 +317,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, memcpy(hw_src, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): UNCLAIM %pM on vid %d\n", mac, - vid); + BATADV_PRINT_VID(vid)); break; case BATADV_CLAIM_TYPE_ANNOUNCE: /* announcement frame @@ -325,7 +326,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, memcpy(hw_src, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_send_claim(): ANNOUNCE of %pM on vid %d\n", - ethhdr->h_source, vid); + ethhdr->h_source, BATADV_PRINT_VID(vid)); break; case BATADV_CLAIM_TYPE_REQUEST: /* request frame @@ -335,13 +336,15 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac, memcpy(hw_src, mac, ETH_ALEN); memcpy(ethhdr->h_dest, mac, ETH_ALEN); batadv_dbg(BATADV_DBG_BLA, bat_priv, - "bla_send_claim(): REQUEST of %pM to %pMon vid %d\n", - ethhdr->h_source, ethhdr->h_dest, vid); + "bla_send_claim(): REQUEST of %pM to %pM on vid %d\n", + ethhdr->h_source, ethhdr->h_dest, + BATADV_PRINT_VID(vid)); break; } - if (vid != -1) - skb = vlan_insert_tag(skb, htons(ETH_P_8021Q), vid); + if (vid & BATADV_VLAN_HAS_TAG) + skb = vlan_insert_tag(skb, htons(ETH_P_8021Q), + vid & VLAN_VID_MASK); skb_reset_mac_header(skb); skb->protocol = eth_type_trans(skb, soft_iface); @@ -367,7 +370,7 @@ out: */ static struct batadv_bla_backbone_gw * batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, - short vid, bool own_backbone) + unsigned short vid, bool own_backbone) { struct batadv_bla_backbone_gw *entry; struct batadv_orig_node *orig_node; @@ -380,7 +383,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_get_backbone_gw(): not found (%pM, %d), creating new entry\n", - orig, vid); + orig, BATADV_PRINT_VID(vid)); entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) @@ -434,7 +437,7 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, uint8_t *orig, static void batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, - short vid) + unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -456,7 +459,7 @@ batadv_bla_update_own_backbone_gw(struct batadv_priv *bat_priv, */ static void batadv_bla_answer_request(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, - short vid) + unsigned short vid) { struct hlist_head *head; struct batadv_hashtable *hash; @@ -547,7 +550,7 @@ static void batadv_bla_send_announce(struct batadv_priv *bat_priv, * @backbone_gw: the backbone gateway which claims it */ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, - const uint8_t *mac, const short vid, + const uint8_t *mac, const unsigned short vid, struct batadv_bla_backbone_gw *backbone_gw) { struct batadv_bla_claim *claim; @@ -572,7 +575,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, atomic_set(&claim->refcount, 2); batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n", - mac, vid); + mac, BATADV_PRINT_VID(vid)); hash_added = batadv_hash_add(bat_priv->bla.claim_hash, batadv_compare_claim, batadv_choose_claim, claim, @@ -591,7 +594,7 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_add_claim(): changing ownership for %pM, vid %d\n", - mac, vid); + mac, BATADV_PRINT_VID(vid)); claim->backbone_gw->crc ^= crc16(0, claim->addr, ETH_ALEN); batadv_backbone_gw_free_ref(claim->backbone_gw); @@ -611,7 +614,7 @@ claim_free_ref: * given mac address and vid. */ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, - const uint8_t *mac, const short vid) + const uint8_t *mac, const unsigned short vid) { struct batadv_bla_claim search_claim, *claim; @@ -622,7 +625,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, return; batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_del_claim(): %pM, vid %d\n", - mac, vid); + mac, BATADV_PRINT_VID(vid)); batadv_hash_remove(bat_priv->bla.claim_hash, batadv_compare_claim, batadv_choose_claim, claim); @@ -637,7 +640,7 @@ static void batadv_bla_del_claim(struct batadv_priv *bat_priv, /* check for ANNOUNCE frame, return 1 if handled */ static int batadv_handle_announce(struct batadv_priv *bat_priv, uint8_t *an_addr, uint8_t *backbone_addr, - short vid) + unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; uint16_t crc; @@ -658,12 +661,13 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %#.4x\n", - vid, backbone_gw->orig, crc); + BATADV_PRINT_VID(vid), backbone_gw->orig, crc); if (backbone_gw->crc != crc) { batadv_dbg(BATADV_DBG_BLA, backbone_gw->bat_priv, "handle_announce(): CRC FAILED for %pM/%d (my = %#.4x, sent = %#.4x)\n", - backbone_gw->orig, backbone_gw->vid, + backbone_gw->orig, + BATADV_PRINT_VID(backbone_gw->vid), backbone_gw->crc, crc); batadv_bla_send_request(backbone_gw); @@ -685,7 +689,7 @@ static int batadv_handle_announce(struct batadv_priv *bat_priv, static int batadv_handle_request(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, uint8_t *backbone_addr, - struct ethhdr *ethhdr, short vid) + struct ethhdr *ethhdr, unsigned short vid) { /* check for REQUEST frame */ if (!batadv_compare_eth(backbone_addr, ethhdr->h_dest)) @@ -699,7 +703,7 @@ static int batadv_handle_request(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_request(): REQUEST vid %d (sent by %pM)...\n", - vid, ethhdr->h_source); + BATADV_PRINT_VID(vid), ethhdr->h_source); batadv_bla_answer_request(bat_priv, primary_if, vid); return 1; @@ -709,7 +713,7 @@ static int batadv_handle_request(struct batadv_priv *bat_priv, static int batadv_handle_unclaim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, uint8_t *backbone_addr, - uint8_t *claim_addr, short vid) + uint8_t *claim_addr, unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -727,7 +731,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv, /* this must be an UNCLAIM frame */ batadv_dbg(BATADV_DBG_BLA, bat_priv, "handle_unclaim(): UNCLAIM %pM on vid %d (sent by %pM)...\n", - claim_addr, vid, backbone_gw->orig); + claim_addr, BATADV_PRINT_VID(vid), backbone_gw->orig); batadv_bla_del_claim(bat_priv, claim_addr, vid); batadv_backbone_gw_free_ref(backbone_gw); @@ -738,7 +742,7 @@ static int batadv_handle_unclaim(struct batadv_priv *bat_priv, static int batadv_handle_claim(struct batadv_priv *bat_priv, struct batadv_hard_iface *primary_if, uint8_t *backbone_addr, uint8_t *claim_addr, - short vid) + unsigned short vid) { struct batadv_bla_backbone_gw *backbone_gw; @@ -861,14 +865,15 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, struct batadv_bla_claim_dst *bla_dst; uint16_t proto; int headlen; - short vid = -1; + unsigned short vid = BATADV_NO_FLAGS; int ret; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); if (ntohs(ethhdr->h_proto) == ETH_P_8021Q) { vhdr = (struct vlan_ethhdr *)ethhdr; vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; + vid |= BATADV_VLAN_HAS_TAG; proto = ntohs(vhdr->h_vlan_encapsulated_proto); headlen = sizeof(*vhdr); } else { @@ -885,7 +890,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, return 0; /* pskb_may_pull() may have modified the pointers, get ethhdr again */ - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); arphdr = (struct arphdr *)((uint8_t *)ethhdr + headlen); /* Check whether the ARP frame carries a valid @@ -910,7 +915,8 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, if (ret == 1) batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_process_claim(): received a claim frame from another group. From: %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", - ethhdr->h_source, vid, hw_src, hw_dst); + ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, + hw_dst); if (ret < 2) return ret; @@ -945,7 +951,7 @@ static int batadv_bla_process_claim(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_process_claim(): ERROR - this looks like a claim frame, but is useless. eth src %pM on vid %d ...(hw_src %pM, hw_dst %pM)\n", - ethhdr->h_source, vid, hw_src, hw_dst); + ethhdr->h_source, BATADV_PRINT_VID(vid), hw_src, hw_dst); return 1; } @@ -1362,7 +1368,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb, struct ethhdr *ethhdr; struct vlan_ethhdr *vhdr; struct batadv_bla_backbone_gw *backbone_gw; - short vid = -1; + unsigned short vid = BATADV_NO_FLAGS; if (!atomic_read(&orig_node->bat_priv->bridge_loop_avoidance)) return 0; @@ -1379,6 +1385,7 @@ int batadv_bla_is_backbone_gw(struct sk_buff *skb, vhdr = (struct vlan_ethhdr *)(skb->data + hdr_size); vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; + vid |= BATADV_VLAN_HAS_TAG; } /* see if this originator is a backbone gw for this VLAN */ @@ -1428,15 +1435,15 @@ void batadv_bla_free(struct batadv_priv *bat_priv) * returns 1, otherwise it returns 0 and the caller shall further * process the skb. */ -int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid, - bool is_bcast) +int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, bool is_bcast) { struct ethhdr *ethhdr; struct batadv_bla_claim search_claim, *claim = NULL; struct batadv_hard_iface *primary_if; int ret; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) @@ -1523,7 +1530,8 @@ out: * returns 1, otherwise it returns 0 and the caller shall further * process the skb. */ -int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid) +int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid) { struct ethhdr *ethhdr; struct batadv_bla_claim search_claim, *claim = NULL; @@ -1543,7 +1551,7 @@ int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid) if (batadv_bla_process_claim(bat_priv, primary_if, skb)) goto handled; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); if (unlikely(atomic_read(&bat_priv->bla.num_requests))) /* don't allow broadcasts while requests are in flight */ @@ -1627,8 +1635,8 @@ int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset) hlist_for_each_entry_rcu(claim, head, hash_entry) { is_own = batadv_compare_eth(claim->backbone_gw->orig, primary_addr); - seq_printf(seq, " * %pM on % 5d by %pM [%c] (%#.4x)\n", - claim->addr, claim->vid, + seq_printf(seq, " * %pM on %5d by %pM [%c] (%#.4x)\n", + claim->addr, BATADV_PRINT_VID(claim->vid), claim->backbone_gw->orig, (is_own ? 'x' : ' '), claim->backbone_gw->crc); @@ -1680,10 +1688,10 @@ int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset) if (is_own) continue; - seq_printf(seq, - " * %pM on % 5d % 4i.%03is (%#.4x)\n", - backbone_gw->orig, backbone_gw->vid, - secs, msecs, backbone_gw->crc); + seq_printf(seq, " * %pM on %5d %4i.%03is (%#.4x)\n", + backbone_gw->orig, + BATADV_PRINT_VID(backbone_gw->vid), secs, + msecs, backbone_gw->crc); } rcu_read_unlock(); } diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index dea2fbc5d98d..4b102e71e5bd 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -21,9 +21,10 @@ #define _NET_BATMAN_ADV_BLA_H_ #ifdef CONFIG_BATMAN_ADV_BLA -int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid, - bool is_bcast); -int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, short vid); +int batadv_bla_rx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid, bool is_bcast); +int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb, + unsigned short vid); int batadv_bla_is_backbone_gw(struct sk_buff *skb, struct batadv_orig_node *orig_node, int hdr_size); int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset); @@ -42,13 +43,14 @@ void batadv_bla_free(struct batadv_priv *bat_priv); #else /* ifdef CONFIG_BATMAN_ADV_BLA */ static inline int batadv_bla_rx(struct batadv_priv *bat_priv, - struct sk_buff *skb, short vid, bool is_bcast) + struct sk_buff *skb, unsigned short vid, + bool is_bcast) { return 0; } static inline int batadv_bla_tx(struct batadv_priv *bat_priv, - struct sk_buff *skb, short vid) + struct sk_buff *skb, unsigned short vid) { return 0; } diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 239992021b1d..06345d401588 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -45,9 +45,9 @@ static void batadv_dat_start_timer(struct batadv_priv *bat_priv) } /** - * batadv_dat_entry_free_ref - decrements the dat_entry refcounter and possibly + * batadv_dat_entry_free_ref - decrement the dat_entry refcounter and possibly * free it - * @dat_entry: the oentry to free + * @dat_entry: the entry to free */ static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry) { @@ -56,10 +56,10 @@ static void batadv_dat_entry_free_ref(struct batadv_dat_entry *dat_entry) } /** - * batadv_dat_to_purge - checks whether a dat_entry has to be purged or not + * batadv_dat_to_purge - check whether a dat_entry has to be purged or not * @dat_entry: the entry to check * - * Returns true if the entry has to be purged now, false otherwise + * Returns true if the entry has to be purged now, false otherwise. */ static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry) { @@ -75,8 +75,8 @@ static bool batadv_dat_to_purge(struct batadv_dat_entry *dat_entry) * returns a boolean value: true is the entry has to be deleted, * false otherwise * - * Loops over each entry in the DAT local storage and delete it if and only if - * the to_purge function passed as argument returns true + * Loops over each entry in the DAT local storage and deletes it if and only if + * the to_purge function passed as argument returns true. */ static void __batadv_dat_purge(struct batadv_priv *bat_priv, bool (*to_purge)(struct batadv_dat_entry *)) @@ -97,7 +97,7 @@ static void __batadv_dat_purge(struct batadv_priv *bat_priv, spin_lock_bh(list_lock); hlist_for_each_entry_safe(dat_entry, node_tmp, head, hash_entry) { - /* if an helper function has been passed as parameter, + /* if a helper function has been passed as parameter, * ask it if the entry has to be purged or not */ if (to_purge && !to_purge(dat_entry)) @@ -134,7 +134,7 @@ static void batadv_dat_purge(struct work_struct *work) * @node: node in the local table * @data2: second object to compare the node to * - * Returns 1 if the two entry are the same, 0 otherwise + * Returns 1 if the two entries are the same, 0 otherwise. */ static int batadv_compare_dat(const struct hlist_node *node, const void *data2) { @@ -149,7 +149,7 @@ static int batadv_compare_dat(const struct hlist_node *node, const void *data2) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the hw_src field in the ARP packet + * Returns the value of the hw_src field in the ARP packet. */ static uint8_t *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size) { @@ -166,7 +166,7 @@ static uint8_t *batadv_arp_hw_src(struct sk_buff *skb, int hdr_size) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the ip_src field in the ARP packet + * Returns the value of the ip_src field in the ARP packet. */ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size) { @@ -178,7 +178,7 @@ static __be32 batadv_arp_ip_src(struct sk_buff *skb, int hdr_size) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the hw_dst field in the ARP packet + * Returns the value of the hw_dst field in the ARP packet. */ static uint8_t *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size) { @@ -190,7 +190,7 @@ static uint8_t *batadv_arp_hw_dst(struct sk_buff *skb, int hdr_size) * @skb: ARP packet * @hdr_size: size of the possible header before the ARP packet * - * Returns the value of the ip_dst field in the ARP packet + * Returns the value of the ip_dst field in the ARP packet. */ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size) { @@ -202,7 +202,7 @@ static __be32 batadv_arp_ip_dst(struct sk_buff *skb, int hdr_size) * @data: data to hash * @size: size of the hash table * - * Returns the selected index in the hash table for the given data + * Returns the selected index in the hash table for the given data. */ static uint32_t batadv_hash_dat(const void *data, uint32_t size) { @@ -224,12 +224,12 @@ static uint32_t batadv_hash_dat(const void *data, uint32_t size) } /** - * batadv_dat_entry_hash_find - looks for a given dat_entry in the local hash + * batadv_dat_entry_hash_find - look for a given dat_entry in the local hash * table * @bat_priv: the bat priv with all the soft interface information * @ip: search key * - * Returns the dat_entry if found, NULL otherwise + * Returns the dat_entry if found, NULL otherwise. */ static struct batadv_dat_entry * batadv_dat_entry_hash_find(struct batadv_priv *bat_priv, __be32 ip) @@ -343,9 +343,6 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, if (hdr_size == 0) return; - /* if the ARP packet is encapsulated in a batman packet, let's print - * some debug messages - */ unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; switch (unicast_4addr_packet->u.header.packet_type) { @@ -409,7 +406,8 @@ static void batadv_dbg_arp(struct batadv_priv *bat_priv, struct sk_buff *skb, * @candidate: orig_node under evaluation * @max_orig_node: last selected candidate * - * Returns true if the node has been elected as next candidate or false othrwise + * Returns true if the node has been elected as next candidate or false + * otherwise. */ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res, int select, batadv_dat_addr_t tmp_max, @@ -472,7 +470,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, */ cands[select].type = BATADV_DAT_CANDIDATE_NOT_FOUND; - /* iterate over the originator list and find the node with closest + /* iterate over the originator list and find the node with the closest * dat_address which has not been selected yet */ for (i = 0; i < hash->size; i++) { @@ -480,7 +478,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - /* the dht space is a ring and addresses are unsigned */ + /* the dht space is a ring using unsigned addresses */ tmp_max = BATADV_DAT_ADDR_MAX - orig_node->dat_addr + ip_key; @@ -512,7 +510,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, } /** - * batadv_dat_select_candidates - selects the nodes which the DHT message has to + * batadv_dat_select_candidates - select the nodes which the DHT message has to * be sent to * @bat_priv: the bat priv with all the soft interface information * @ip_dst: ipv4 to look up in the DHT @@ -521,7 +519,7 @@ static void batadv_choose_next_candidate(struct batadv_priv *bat_priv, * closest values (from the LEFT, with wrap around if needed) then the hash * value of the key. ip_dst is the key. * - * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM + * Returns the candidate array of size BATADV_DAT_CANDIDATE_NUM. */ static struct batadv_dat_candidate * batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) @@ -558,10 +556,11 @@ batadv_dat_select_candidates(struct batadv_priv *bat_priv, __be32 ip_dst) * @ip: the DHT key * @packet_subtype: unicast4addr packet subtype to use * - * In this function the skb is copied by means of pskb_copy() and is sent as - * unicast packet to each of the selected candidates + * This function copies the skb with pskb_copy() and is sent as unicast packet + * to each of the selected candidates. * - * Returns true if the packet is sent to at least one candidate, false otherwise + * Returns true if the packet is sent to at least one candidate, false + * otherwise. */ static bool batadv_dat_send_data(struct batadv_priv *bat_priv, struct sk_buff *skb, __be32 ip, @@ -727,7 +726,7 @@ out: * @skb: packet to analyse * @hdr_size: size of the possible header before the ARP packet in the skb * - * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise + * Returns the ARP type if the skb contains a valid ARP packet, 0 otherwise. */ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) @@ -754,9 +753,7 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, arphdr = (struct arphdr *)(skb->data + hdr_size + ETH_HLEN); - /* Check whether the ARP packet carries a valid - * IP information - */ + /* check whether the ARP packet carries a valid IP information */ if (arphdr->ar_hrd != htons(ARPHRD_ETHER)) goto out; @@ -784,7 +781,7 @@ static uint16_t batadv_arp_get_type(struct batadv_priv *bat_priv, if (is_zero_ether_addr(hw_src) || is_multicast_ether_addr(hw_src)) goto out; - /* we don't care about the destination MAC address in ARP requests */ + /* don't care about the destination MAC address in ARP requests */ if (arphdr->ar_op != htons(ARPOP_REQUEST)) { hw_dst = batadv_arp_hw_dst(skb, hdr_size); if (is_zero_ether_addr(hw_dst) || @@ -804,8 +801,8 @@ out: * @skb: packet to check * * Returns true if the message has been sent to the dht candidates, false - * otherwise. In case of true the message has to be enqueued to permit the - * fallback + * otherwise. In case of a positive return value the message has to be enqueued + * to permit the fallback. */ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, struct sk_buff *skb) @@ -867,7 +864,7 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv, batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n"); ret = true; } else { - /* Send the request on the DHT */ + /* Send the request to the DHT */ ret = batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_GET); } @@ -884,7 +881,7 @@ out: * @skb: packet to check * @hdr_size: size of the encapsulation header * - * Returns true if the request has been answered, false otherwise + * Returns true if the request has been answered, false otherwise. */ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) @@ -924,10 +921,9 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv, if (!skb_new) goto out; - /* to preserve backwards compatibility, here the node has to answer - * using the same packet type it received for the request. This is due - * to that if a node is not using the 4addr packet format it may not - * support it. + /* To preserve backwards compatibility, the node has choose the outgoing + * format based on the incoming request packet type. The assumption is + * that a node not using the 4addr packet format doesn't support it. */ if (hdr_size == sizeof(struct batadv_unicast_4addr_packet)) err = batadv_unicast_4addr_send_skb(bat_priv, skb_new, @@ -977,7 +973,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, batadv_dat_entry_add(bat_priv, ip_dst, hw_dst); /* Send the ARP reply to the candidates for both the IP addresses that - * the node got within the ARP reply + * the node obtained from the ARP reply */ batadv_dat_send_data(bat_priv, skb, ip_src, BATADV_P_DAT_DHT_PUT); batadv_dat_send_data(bat_priv, skb, ip_dst, BATADV_P_DAT_DHT_PUT); @@ -987,7 +983,7 @@ void batadv_dat_snoop_outgoing_arp_reply(struct batadv_priv *bat_priv, * DAT storage only * @bat_priv: the bat priv with all the soft interface information * @skb: packet to check - * @hdr_size: siaze of the encapsulation header + * @hdr_size: size of the encapsulation header */ bool batadv_dat_snoop_incoming_arp_reply(struct batadv_priv *bat_priv, struct sk_buff *skb, int hdr_size) @@ -1031,11 +1027,11 @@ out: /** * batadv_dat_drop_broadcast_packet - check if an ARP request has to be dropped - * (because the node has already got the reply via DAT) or not + * (because the node has already obtained the reply via DAT) or not * @bat_priv: the bat priv with all the soft interface information * @forw_packet: the broadcast packet * - * Returns true if the node can drop the packet, false otherwise + * Returns true if the node can drop the packet, false otherwise. */ bool batadv_dat_drop_broadcast_packet(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 522243aff2f3..c478e6bcf89b 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -117,6 +117,58 @@ static int batadv_is_valid_iface(const struct net_device *net_dev) return 1; } +/** + * batadv_is_wifi_netdev - check if the given net_device struct is a wifi + * interface + * @net_device: the device to check + * + * Returns true if the net device is a 802.11 wireless device, false otherwise. + */ +static bool batadv_is_wifi_netdev(struct net_device *net_device) +{ +#ifdef CONFIG_WIRELESS_EXT + /* pre-cfg80211 drivers have to implement WEXT, so it is possible to + * check for wireless_handlers != NULL + */ + if (net_device->wireless_handlers) + return true; +#endif + + /* cfg80211 drivers have to set ieee80211_ptr */ + if (net_device->ieee80211_ptr) + return true; + + return false; +} + +/** + * batadv_is_wifi_iface - check if the given interface represented by ifindex + * is a wifi interface + * @ifindex: interface index to check + * + * Returns true if the interface represented by ifindex is a 802.11 wireless + * device, false otherwise. + */ +bool batadv_is_wifi_iface(int ifindex) +{ + struct net_device *net_device = NULL; + bool ret = false; + + if (ifindex == BATADV_NULL_IFINDEX) + goto out; + + net_device = dev_get_by_index(&init_net, ifindex); + if (!net_device) + goto out; + + ret = batadv_is_wifi_netdev(net_device); + +out: + if (net_device) + dev_put(net_device); + return ret; +} + static struct batadv_hard_iface * batadv_hardif_get_active(const struct net_device *soft_iface) { @@ -525,7 +577,7 @@ batadv_hardif_add_interface(struct net_device *net_dev) dev_hold(net_dev); - hard_iface = kmalloc(sizeof(*hard_iface), GFP_ATOMIC); + hard_iface = kzalloc(sizeof(*hard_iface), GFP_ATOMIC); if (!hard_iface) goto release_dev; @@ -541,18 +593,16 @@ batadv_hardif_add_interface(struct net_device *net_dev) INIT_WORK(&hard_iface->cleanup_work, batadv_hardif_remove_interface_finish); + hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT; + if (batadv_is_wifi_netdev(net_dev)) + hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; + /* extra reference for return */ atomic_set(&hard_iface->refcount, 2); batadv_check_known_mac_addr(hard_iface->net_dev); list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list); - /* This can't be called via a bat_priv callback because - * we have no bat_priv yet. - */ - atomic_set(&hard_iface->bat_iv.ogm_seqno, 1); - hard_iface->bat_iv.ogm_buff = NULL; - return hard_iface; free_if: @@ -595,7 +645,7 @@ void batadv_hardif_remove_interfaces(void) static int batadv_hard_if_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *net_dev = ptr; + struct net_device *net_dev = netdev_notifier_info_to_dev(ptr); struct batadv_hard_iface *hard_iface; struct batadv_hard_iface *primary_if = NULL; struct batadv_priv *bat_priv; @@ -657,38 +707,6 @@ out: return NOTIFY_DONE; } -/* This function returns true if the interface represented by ifindex is a - * 802.11 wireless device - */ -bool batadv_is_wifi_iface(int ifindex) -{ - struct net_device *net_device = NULL; - bool ret = false; - - if (ifindex == BATADV_NULL_IFINDEX) - goto out; - - net_device = dev_get_by_index(&init_net, ifindex); - if (!net_device) - goto out; - -#ifdef CONFIG_WIRELESS_EXT - /* pre-cfg80211 drivers have to implement WEXT, so it is possible to - * check for wireless_handlers != NULL - */ - if (net_device->wireless_handlers) - ret = true; - else -#endif - /* cfg80211 drivers have to set ieee80211_ptr */ - if (net_device->ieee80211_ptr) - ret = true; -out: - if (net_device) - dev_put(net_device); - return ret; -} - struct notifier_block batadv_hard_if_notifier = { .notifier_call = batadv_hard_if_event, }; diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 0ba6c899b2d3..b27508b8085c 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -177,13 +177,13 @@ static ssize_t batadv_socket_write(struct file *file, const char __user *buff, if (len >= sizeof(struct batadv_icmp_packet_rr)) packet_len = sizeof(struct batadv_icmp_packet_rr); - skb = dev_alloc_skb(packet_len + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, packet_len + ETH_HLEN); if (!skb) { len = -ENOMEM; goto out; } - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); icmp_packet = (struct batadv_icmp_packet_rr *)skb_put(skb, packet_len); if (copy_from_user(icmp_packet, buff, packet_len)) { diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 51aafd669cbb..08125f3f6064 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -473,7 +473,6 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr) crc = crc32c(crc, data, len); consumed += len; } - skb_abort_seq_read(&st); return htonl(crc); } diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 59a0d6af15c8..5e9aebb7d56b 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -26,7 +26,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2013.2.0" +#define BATADV_SOURCE_VERSION "2013.3.0" #endif /* B.A.T.M.A.N. parameters */ @@ -76,6 +76,11 @@ #define BATADV_LOG_BUF_LEN 8192 /* has to be a power of 2 */ +/* number of packets to send for broadcasts on different interface types */ +#define BATADV_NUM_BCASTS_DEFAULT 1 +#define BATADV_NUM_BCASTS_WIRELESS 3 +#define BATADV_NUM_BCASTS_MAX 3 + /* msecs after which an ARP_REQUEST is sent in broadcast as fallback */ #define ARP_REQ_DELAY 250 /* numbers of originator to contact for any PUT/GET DHT operation */ @@ -157,6 +162,17 @@ enum batadv_uev_type { #include <linux/seq_file.h> #include "types.h" +/** + * batadv_vlan_flags - flags for the four MSB of any vlan ID field + * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not + */ +enum batadv_vlan_flags { + BATADV_VLAN_HAS_TAG = BIT(15), +}; + +#define BATADV_PRINT_VID(vid) (vid & BATADV_VLAN_HAS_TAG ? \ + (int)(vid & VLAN_VID_MASK) : -1) + extern char batadv_routing_algo[]; extern struct list_head batadv_hardif_list; diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index e84629ece9b7..a487d46e0aec 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1245,7 +1245,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv, return; /* Set the mac header as if we actually sent the packet uncoded */ - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); memcpy(ethhdr->h_source, ethhdr->h_dest, ETH_ALEN); memcpy(ethhdr->h_dest, eth_dst_new, ETH_ALEN); @@ -1359,18 +1359,17 @@ static bool batadv_nc_skb_add_to_path(struct sk_buff *skb, * buffer * @skb: data skb to forward * @neigh_node: next hop to forward packet to - * @ethhdr: pointer to the ethernet header inside the skb * * Returns true if the skb was consumed (encoded packet sent) or false otherwise */ bool batadv_nc_skb_forward(struct sk_buff *skb, - struct batadv_neigh_node *neigh_node, - struct ethhdr *ethhdr) + struct batadv_neigh_node *neigh_node) { const struct net_device *netdev = neigh_node->if_incoming->soft_iface; struct batadv_priv *bat_priv = netdev_priv(netdev); struct batadv_unicast_packet *packet; struct batadv_nc_path *nc_path; + struct ethhdr *ethhdr = eth_hdr(skb); __be32 packet_id; u8 *payload; @@ -1423,7 +1422,7 @@ void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, { struct batadv_unicast_packet *packet; struct batadv_nc_path *nc_path; - struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + struct ethhdr *ethhdr = eth_hdr(skb); __be32 packet_id; u8 *payload; @@ -1482,7 +1481,7 @@ out: void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, struct sk_buff *skb) { - struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + struct ethhdr *ethhdr = eth_hdr(skb); if (batadv_is_my_mac(bat_priv, ethhdr->h_dest)) return; @@ -1533,7 +1532,7 @@ batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, skb_reset_network_header(skb); /* Reconstruct original mac header */ - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); memcpy(ethhdr, ðhdr_tmp, sizeof(*ethhdr)); /* Select the correct unicast header information based on the location @@ -1677,7 +1676,7 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, return NET_RX_DROP; coded_packet = (struct batadv_coded_packet *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* Verify frame is destined for us */ if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) && @@ -1763,6 +1762,13 @@ int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset) /* For each orig_node in this bin */ rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + /* no need to print the orig node if it does not have + * network coding neighbors + */ + if (list_empty(&orig_node->in_coding_list) && + list_empty(&orig_node->out_coding_list)) + continue; + seq_printf(seq, "Node: %pM\n", orig_node->orig); seq_puts(seq, " Ingoing: "); diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h index 4fa6d0caddbd..85a4ec81ad50 100644 --- a/net/batman-adv/network-coding.h +++ b/net/batman-adv/network-coding.h @@ -36,8 +36,7 @@ void batadv_nc_purge_orig(struct batadv_priv *bat_priv, void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv); void batadv_nc_init_orig(struct batadv_orig_node *orig_node); bool batadv_nc_skb_forward(struct sk_buff *skb, - struct batadv_neigh_node *neigh_node, - struct ethhdr *ethhdr); + struct batadv_neigh_node *neigh_node); void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, struct sk_buff *skb); void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, @@ -87,8 +86,7 @@ static inline void batadv_nc_init_orig(struct batadv_orig_node *orig_node) } static inline bool batadv_nc_skb_forward(struct sk_buff *skb, - struct batadv_neigh_node *neigh_node, - struct ethhdr *ethhdr) + struct batadv_neigh_node *neigh_node) { return false; } diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index fad1a2093e15..f50553a7de62 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -92,7 +92,7 @@ batadv_orig_node_get_router(struct batadv_orig_node *orig_node) struct batadv_neigh_node * batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, - const uint8_t *neigh_addr, uint32_t seqno) + const uint8_t *neigh_addr) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_neigh_node *neigh_node; @@ -110,8 +110,8 @@ batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, atomic_set(&neigh_node->refcount, 2); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Creating new neighbor %pM, initial seqno %d\n", - neigh_addr, seqno); + "Creating new neighbor %pM on interface %s\n", neigh_addr, + hard_iface->net_dev->name); out: return neigh_node; diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 734e5a3d8a5b..7887b84a9af4 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -31,7 +31,7 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv, const uint8_t *addr); struct batadv_neigh_node * batadv_neigh_node_new(struct batadv_hard_iface *hard_iface, - const uint8_t *neigh_addr, uint32_t seqno); + const uint8_t *neigh_addr); void batadv_neigh_node_free_ref(struct batadv_neigh_node *neigh_node); struct batadv_neigh_node * batadv_orig_node_get_router(struct batadv_orig_node *orig_node); diff --git a/net/batman-adv/ring_buffer.c b/net/batman-adv/ring_buffer.c deleted file mode 100644 index ccab0bbdbb59..000000000000 --- a/net/batman-adv/ring_buffer.c +++ /dev/null @@ -1,51 +0,0 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: - * - * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA - */ - -#include "main.h" -#include "ring_buffer.h" - -void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, - uint8_t value) -{ - lq_recv[*lq_index] = value; - *lq_index = (*lq_index + 1) % BATADV_TQ_GLOBAL_WINDOW_SIZE; -} - -uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[]) -{ - const uint8_t *ptr; - uint16_t count = 0, i = 0, sum = 0; - - ptr = lq_recv; - - while (i < BATADV_TQ_GLOBAL_WINDOW_SIZE) { - if (*ptr != 0) { - count++; - sum += *ptr; - } - - i++; - ptr++; - } - - if (count == 0) - return 0; - - return (uint8_t)(sum / count); -} diff --git a/net/batman-adv/ring_buffer.h b/net/batman-adv/ring_buffer.h deleted file mode 100644 index 3f92ae248e83..000000000000 --- a/net/batman-adv/ring_buffer.h +++ /dev/null @@ -1,27 +0,0 @@ -/* Copyright (C) 2007-2013 B.A.T.M.A.N. contributors: - * - * Marek Lindner - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - * 02110-1301, USA - */ - -#ifndef _NET_BATMAN_ADV_RING_BUFFER_H_ -#define _NET_BATMAN_ADV_RING_BUFFER_H_ - -void batadv_ring_buffer_set(uint8_t lq_recv[], uint8_t *lq_index, - uint8_t value); -uint8_t batadv_ring_buffer_avg(const uint8_t lq_recv[]); - -#endif /* _NET_BATMAN_ADV_RING_BUFFER_H_ */ diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index b27a4d792d15..2f0bd3ffe6e8 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -34,35 +34,6 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); -void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) -{ - struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); - struct batadv_hashtable *hash = bat_priv->orig_hash; - struct hlist_head *head; - struct batadv_orig_node *orig_node; - unsigned long *word; - uint32_t i; - size_t word_index; - uint8_t *w; - - for (i = 0; i < hash->size; i++) { - head = &hash->table[i]; - - rcu_read_lock(); - hlist_for_each_entry_rcu(orig_node, head, hash_entry) { - spin_lock_bh(&orig_node->ogm_cnt_lock); - word_index = hard_iface->if_num * BATADV_NUM_WORDS; - word = &(orig_node->bcast_own[word_index]); - - batadv_bit_get_packet(bat_priv, word, 1, 0); - w = &orig_node->bcast_own_sum[hard_iface->if_num]; - *w = bitmap_weight(word, BATADV_TQ_LOCAL_WINDOW_SIZE); - spin_unlock_bh(&orig_node->ogm_cnt_lock); - } - rcu_read_unlock(); - } -} - static void _batadv_update_route(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node) @@ -256,7 +227,7 @@ bool batadv_check_management_packet(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, header_len))) return false; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* packet with broadcast indication but unicast recipient */ if (!is_broadcast_ether_addr(ethhdr->h_dest)) @@ -314,7 +285,7 @@ static int batadv_recv_my_icmp_packet(struct batadv_priv *bat_priv, icmp_packet->msg_type = BATADV_ECHO_REPLY; icmp_packet->header.ttl = BATADV_TTL; - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; out: @@ -362,7 +333,7 @@ static int batadv_recv_icmp_ttl_exceeded(struct batadv_priv *bat_priv, icmp_packet->msg_type = BATADV_TTL_EXCEEDED; icmp_packet->header.ttl = BATADV_TTL; - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; out: @@ -392,7 +363,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, hdr_size))) goto out; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* packet with unicast indication but broadcast recipient */ if (is_broadcast_ether_addr(ethhdr->h_dest)) @@ -439,7 +410,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, icmp_packet->header.ttl--; /* route it */ - if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) + if (batadv_send_skb_to_orig(skb, orig_node, recv_if) != NET_XMIT_DROP) ret = NET_RX_SUCCESS; out: @@ -569,7 +540,7 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv, if (unlikely(!pskb_may_pull(skb, hdr_size))) return -ENODATA; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* packet with unicast indication but broadcast recipient */ if (is_broadcast_ether_addr(ethhdr->h_dest)) @@ -803,8 +774,8 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, struct batadv_orig_node *orig_node = NULL; struct batadv_neigh_node *neigh_node = NULL; struct batadv_unicast_packet *unicast_packet; - struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); - int ret = NET_RX_DROP; + struct ethhdr *ethhdr = eth_hdr(skb); + int res, ret = NET_RX_DROP; struct sk_buff *new_skb; unicast_packet = (struct batadv_unicast_packet *)skb->data; @@ -864,16 +835,19 @@ static int batadv_route_unicast_packet(struct sk_buff *skb, /* decrement ttl */ unicast_packet->header.ttl--; - /* network code packet if possible */ - if (batadv_nc_skb_forward(skb, neigh_node, ethhdr)) { - ret = NET_RX_SUCCESS; - } else if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) { - ret = NET_RX_SUCCESS; + res = batadv_send_skb_to_orig(skb, orig_node, recv_if); - /* Update stats counter */ + /* translate transmit result into receive result */ + if (res == NET_XMIT_SUCCESS) { + /* skb was transmitted and consumed */ batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, skb->len + ETH_HLEN); + + ret = NET_RX_SUCCESS; + } else if (res == NET_XMIT_POLICED) { + /* skb was buffered and consumed */ + ret = NET_RX_SUCCESS; } out: @@ -1165,7 +1139,7 @@ int batadv_recv_bcast_packet(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, hdr_size))) goto out; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* packet with broadcast indication but unicast recipient */ if (!is_broadcast_ether_addr(ethhdr->h_dest)) @@ -1265,7 +1239,7 @@ int batadv_recv_vis_packet(struct sk_buff *skb, return NET_RX_DROP; vis_packet = (struct batadv_vis_packet *)skb->data; - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); /* not for me */ if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h index 99eeafaba407..72a29bde2010 100644 --- a/net/batman-adv/routing.h +++ b/net/batman-adv/routing.h @@ -20,7 +20,6 @@ #ifndef _NET_BATMAN_ADV_ROUTING_H_ #define _NET_BATMAN_ADV_ROUTING_H_ -void batadv_slide_own_bcast_window(struct batadv_hard_iface *hard_iface); bool batadv_check_management_packet(struct sk_buff *skb, struct batadv_hard_iface *hard_iface, int header_len); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 263cfd1ccee7..e9ff8d801201 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -61,7 +61,7 @@ int batadv_send_skb_packet(struct sk_buff *skb, skb_reset_mac_header(skb); - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); memcpy(ethhdr->h_source, hard_iface->net_dev->dev_addr, ETH_ALEN); memcpy(ethhdr->h_dest, dst_addr, ETH_ALEN); ethhdr->h_proto = __constant_htons(ETH_P_BATMAN); @@ -96,26 +96,37 @@ send_skb_err: * host, NULL can be passed as recv_if and no interface alternating is * attempted. * - * Returns TRUE on success; FALSE otherwise. + * Returns NET_XMIT_SUCCESS on success, NET_XMIT_DROP on failure, or + * NET_XMIT_POLICED if the skb is buffered for later transmit. */ -bool batadv_send_skb_to_orig(struct sk_buff *skb, - struct batadv_orig_node *orig_node, - struct batadv_hard_iface *recv_if) +int batadv_send_skb_to_orig(struct sk_buff *skb, + struct batadv_orig_node *orig_node, + struct batadv_hard_iface *recv_if) { struct batadv_priv *bat_priv = orig_node->bat_priv; struct batadv_neigh_node *neigh_node; + int ret = NET_XMIT_DROP; /* batadv_find_router() increases neigh_nodes refcount if found. */ neigh_node = batadv_find_router(bat_priv, orig_node, recv_if); if (!neigh_node) - return false; + return ret; - /* route it */ - batadv_send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); + /* try to network code the packet, if it is received on an interface + * (i.e. being forwarded). If the packet originates from this node or if + * network coding fails, then send the packet as usual. + */ + if (recv_if && batadv_nc_skb_forward(skb, neigh_node)) { + ret = NET_XMIT_POLICED; + } else { + batadv_send_skb_packet(skb, neigh_node->if_incoming, + neigh_node->addr); + ret = NET_XMIT_SUCCESS; + } batadv_neigh_node_free_ref(neigh_node); - return true; + return ret; } void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface) @@ -152,8 +163,6 @@ _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet, unsigned long send_time) { - INIT_HLIST_NODE(&forw_packet->list); - /* add new packet to packet list */ spin_lock_bh(&bat_priv->forw_bcast_list_lock); hlist_add_head(&forw_packet->list, &bat_priv->forw_bcast_list); @@ -260,6 +269,9 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) if (hard_iface->soft_iface != soft_iface) continue; + if (forw_packet->num_packets >= hard_iface->num_bcasts) + continue; + /* send a copy of the saved skb */ skb1 = skb_clone(forw_packet->skb, GFP_ATOMIC); if (skb1) @@ -271,7 +283,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) forw_packet->num_packets++; /* if we still have some more bcasts to send */ - if (forw_packet->num_packets < 3) { + if (forw_packet->num_packets < BATADV_NUM_BCASTS_MAX) { _batadv_add_bcast_packet_to_list(bat_priv, forw_packet, msecs_to_jiffies(5)); return; diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 38e662f619ac..e7b17880fca4 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -23,9 +23,9 @@ int batadv_send_skb_packet(struct sk_buff *skb, struct batadv_hard_iface *hard_iface, const uint8_t *dst_addr); -bool batadv_send_skb_to_orig(struct sk_buff *skb, - struct batadv_orig_node *orig_node, - struct batadv_hard_iface *recv_if); +int batadv_send_skb_to_orig(struct sk_buff *skb, + struct batadv_orig_node *orig_node, + struct batadv_hard_iface *recv_if); void batadv_schedule_bat_ogm(struct batadv_hard_iface *hard_iface); int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, const struct sk_buff *skb, diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 819dfb006cdf..700d0b49742d 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -154,7 +154,7 @@ static int batadv_interface_tx(struct sk_buff *skb, 0x00, 0x00}; unsigned int header_len = 0; int data_len = skb->len, ret; - short vid __maybe_unused = -1; + unsigned short vid __maybe_unused = BATADV_NO_FLAGS; bool do_bcast = false; uint32_t seqno; unsigned long brd_delay = 1; @@ -303,7 +303,7 @@ void batadv_interface_rx(struct net_device *soft_iface, struct ethhdr *ethhdr; struct vlan_ethhdr *vhdr; struct batadv_header *batadv_header = (struct batadv_header *)skb->data; - short vid __maybe_unused = -1; + unsigned short vid __maybe_unused = BATADV_NO_FLAGS; __be16 ethertype = __constant_htons(ETH_P_BATMAN); bool is_bcast; @@ -316,7 +316,7 @@ void batadv_interface_rx(struct net_device *soft_iface, skb_pull_rcsum(skb, hdr_size); skb_reset_mac_header(skb); - ethhdr = (struct ethhdr *)skb_mac_header(skb); + ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 9e8748575845..429aeef3d8b2 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -163,10 +163,19 @@ batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry) call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu); } +/** + * batadv_tt_local_event - store a local TT event (ADD/DEL) + * @bat_priv: the bat priv with all the soft interface information + * @tt_local_entry: the TT entry involved in the event + * @event_flags: flags to store in the event structure + */ static void batadv_tt_local_event(struct batadv_priv *bat_priv, - const uint8_t *addr, uint8_t flags) + struct batadv_tt_local_entry *tt_local_entry, + uint8_t event_flags) { struct batadv_tt_change_node *tt_change_node, *entry, *safe; + struct batadv_tt_common_entry *common = &tt_local_entry->common; + uint8_t flags = common->flags | event_flags; bool event_removed = false; bool del_op_requested, del_op_entry; @@ -176,7 +185,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, return; tt_change_node->change.flags = flags; - memcpy(tt_change_node->change.addr, addr, ETH_ALEN); + memcpy(tt_change_node->change.addr, common->addr, ETH_ALEN); del_op_requested = flags & BATADV_TT_CLIENT_DEL; @@ -184,7 +193,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, spin_lock_bh(&bat_priv->tt.changes_list_lock); list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, list) { - if (!batadv_compare_eth(entry->change.addr, addr)) + if (!batadv_compare_eth(entry->change.addr, common->addr)) continue; /* DEL+ADD in the same orig interval have no effect and can be @@ -332,7 +341,7 @@ void batadv_tt_local_add(struct net_device *soft_iface, const uint8_t *addr, } add_event: - batadv_tt_local_event(bat_priv, addr, tt_local->common.flags); + batadv_tt_local_event(bat_priv, tt_local, BATADV_NO_FLAGS); check_roaming: /* Check whether it is a roaming, but don't do anything if the roaming @@ -529,8 +538,7 @@ batadv_tt_local_set_pending(struct batadv_priv *bat_priv, struct batadv_tt_local_entry *tt_local_entry, uint16_t flags, const char *message) { - batadv_tt_local_event(bat_priv, tt_local_entry->common.addr, - tt_local_entry->common.flags | flags); + batadv_tt_local_event(bat_priv, tt_local_entry, flags); /* The local client has to be marked as "pending to be removed" but has * to be kept in the table in order to send it in a full table @@ -584,8 +592,7 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv, /* if this client has been added right now, it is possible to * immediately purge it */ - batadv_tt_local_event(bat_priv, tt_local_entry->common.addr, - curr_flags | BATADV_TT_CLIENT_DEL); + batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL); hlist_del_rcu(&tt_local_entry->common.hash_entry); batadv_tt_local_entry_free_ref(tt_local_entry); @@ -791,10 +798,25 @@ out: batadv_tt_orig_list_entry_free_ref(orig_entry); } -/* caller must hold orig_node refcount */ +/** + * batadv_tt_global_add - add a new TT global entry or update an existing one + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: the originator announcing the client + * @tt_addr: the mac address of the non-mesh client + * @flags: TT flags that have to be set for this non-mesh client + * @ttvn: the tt version number ever announcing this non-mesh client + * + * Add a new TT global entry for the given originator. If the entry already + * exists add a new reference to the given originator (a global entry can have + * references to multiple originators) and adjust the flags attribute to reflect + * the function argument. + * If a TT local entry exists for this non-mesh client remove it. + * + * The caller must hold orig_node refcount. + */ int batadv_tt_global_add(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const unsigned char *tt_addr, uint8_t flags, + const unsigned char *tt_addr, uint16_t flags, uint8_t ttvn) { struct batadv_tt_global_entry *tt_global_entry; @@ -1600,11 +1622,11 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn, tt_tot = tt_len / sizeof(struct batadv_tt_change); len = tt_query_size + tt_len; - skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!skb) goto out; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); tt_response = (struct batadv_tt_query_packet *)skb_put(skb, len); tt_response->ttvn = ttvn; @@ -1665,11 +1687,11 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv, if (!tt_req_node) goto out; - skb = dev_alloc_skb(sizeof(*tt_request) + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, sizeof(*tt_request) + ETH_HLEN); if (!skb) goto out; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); tt_req_len = sizeof(*tt_request); tt_request = (struct batadv_tt_query_packet *)skb_put(skb, tt_req_len); @@ -1691,7 +1713,7 @@ static int batadv_send_tt_request(struct batadv_priv *bat_priv, batadv_inc_counter(bat_priv, BATADV_CNT_TT_REQUEST_TX); - if (batadv_send_skb_to_orig(skb, dst_orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, dst_orig_node, NULL) != NET_XMIT_DROP) ret = 0; out: @@ -1715,7 +1737,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, struct batadv_orig_node *req_dst_orig_node; struct batadv_orig_node *res_dst_orig_node = NULL; uint8_t orig_ttvn, req_ttvn, ttvn; - int ret = false; + int res, ret = false; unsigned char *tt_buff; bool full_table; uint16_t tt_len, tt_tot; @@ -1762,11 +1784,11 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, tt_tot = tt_len / sizeof(struct batadv_tt_change); len = sizeof(*tt_response) + tt_len; - skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!skb) goto unlock; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); packet_pos = skb_put(skb, len); tt_response = (struct batadv_tt_query_packet *)packet_pos; tt_response->ttvn = req_ttvn; @@ -1810,8 +1832,10 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv, batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX); - if (batadv_send_skb_to_orig(skb, res_dst_orig_node, NULL)) + res = batadv_send_skb_to_orig(skb, res_dst_orig_node, NULL); + if (res != NET_XMIT_DROP) ret = true; + goto out; unlock: @@ -1878,11 +1902,11 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, tt_tot = tt_len / sizeof(struct batadv_tt_change); len = sizeof(*tt_response) + tt_len; - skb = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!skb) goto unlock; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); packet_pos = skb_put(skb, len); tt_response = (struct batadv_tt_query_packet *)packet_pos; tt_response->ttvn = req_ttvn; @@ -1925,7 +1949,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv, batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX); - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = true; goto out; @@ -2212,11 +2236,11 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, if (!batadv_tt_check_roam_count(bat_priv, client)) goto out; - skb = dev_alloc_skb(sizeof(*roam_adv_packet) + ETH_HLEN + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!skb) goto out; - skb_reserve(skb, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(skb, ETH_HLEN); roam_adv_packet = (struct batadv_roam_adv_packet *)skb_put(skb, len); @@ -2238,7 +2262,7 @@ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, uint8_t *client, batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_TX); - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = 0; out: diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index ab8e683b402f..659a3bb759ce 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -33,7 +33,7 @@ void batadv_tt_global_add_orig(struct batadv_priv *bat_priv, const unsigned char *tt_buff, int tt_buff_len); int batadv_tt_global_add(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, - const unsigned char *addr, uint8_t flags, + const unsigned char *addr, uint16_t flags, uint8_t ttvn); int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset); void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index aba8364c3689..b2c94e139319 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -61,6 +61,7 @@ struct batadv_hard_iface_bat_iv { * @if_status: status of the interface for batman-adv * @net_dev: pointer to the net_device * @frag_seqno: last fragment sequence number sent by this interface + * @num_bcasts: number of payload re-broadcasts on this interface (ARQ) * @hardif_obj: kobject of the per interface sysfs "mesh" directory * @refcount: number of contexts the object is used * @batman_adv_ptype: packet type describing packets that should be processed by @@ -76,6 +77,7 @@ struct batadv_hard_iface { char if_status; struct net_device *net_dev; atomic_t frag_seqno; + uint8_t num_bcasts; struct kobject *hardif_obj; atomic_t refcount; struct packet_type batman_adv_ptype; @@ -640,7 +642,7 @@ struct batadv_socket_packet { #ifdef CONFIG_BATMAN_ADV_BLA struct batadv_bla_backbone_gw { uint8_t orig[ETH_ALEN]; - short vid; + unsigned short vid; struct hlist_node hash_entry; struct batadv_priv *bat_priv; unsigned long lasttime; @@ -663,7 +665,7 @@ struct batadv_bla_backbone_gw { */ struct batadv_bla_claim { uint8_t addr[ETH_ALEN]; - short vid; + unsigned short vid; struct batadv_bla_backbone_gw *backbone_gw; unsigned long lasttime; struct hlist_node hash_entry; diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 0bb3b5982f94..dc8b5d4dd636 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -464,7 +464,7 @@ find_router: goto out; } - if (batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) != NET_XMIT_DROP) ret = 0; out: diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 1625e5793a89..4983340f1943 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -392,12 +392,12 @@ batadv_add_packet(struct batadv_priv *bat_priv, return NULL; len = sizeof(*packet) + vis_info_len; - info->skb_packet = dev_alloc_skb(len + ETH_HLEN + NET_IP_ALIGN); + info->skb_packet = netdev_alloc_skb_ip_align(NULL, len + ETH_HLEN); if (!info->skb_packet) { kfree(info); return NULL; } - skb_reserve(info->skb_packet, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(info->skb_packet, ETH_HLEN); packet = (struct batadv_vis_packet *)skb_put(info->skb_packet, len); kref_init(&info->refcount); @@ -697,7 +697,7 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node; struct batadv_vis_packet *packet; struct sk_buff *skb; - uint32_t i; + uint32_t i, res; packet = (struct batadv_vis_packet *)info->skb_packet->data; @@ -724,7 +724,8 @@ static void batadv_broadcast_vis_packet(struct batadv_priv *bat_priv, if (!skb) continue; - if (!batadv_send_skb_to_orig(skb, orig_node, NULL)) + res = batadv_send_skb_to_orig(skb, orig_node, NULL); + if (res == NET_XMIT_DROP) kfree_skb(skb); } rcu_read_unlock(); @@ -748,7 +749,7 @@ static void batadv_unicast_vis_packet(struct batadv_priv *bat_priv, if (!skb) goto out; - if (!batadv_send_skb_to_orig(skb, orig_node, NULL)) + if (batadv_send_skb_to_orig(skb, orig_node, NULL) == NET_XMIT_DROP) kfree_skb(skb); out: @@ -854,13 +855,13 @@ int batadv_vis_init(struct batadv_priv *bat_priv) if (!bat_priv->vis.my_info) goto err; - len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE; - len += ETH_HLEN + NET_IP_ALIGN; - bat_priv->vis.my_info->skb_packet = dev_alloc_skb(len); + len = sizeof(*packet) + BATADV_MAX_VIS_PACKET_SIZE + ETH_HLEN; + bat_priv->vis.my_info->skb_packet = netdev_alloc_skb_ip_align(NULL, + len); if (!bat_priv->vis.my_info->skb_packet) goto free_info; - skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN + NET_IP_ALIGN); + skb_reserve(bat_priv->vis.my_info->skb_packet, ETH_HLEN); tmp_skb = bat_priv->vis.my_info->skb_packet; packet = (struct batadv_vis_packet *)skb_put(tmp_skb, sizeof(*packet)); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index db7de80b88a2..cc27297da5a9 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -513,7 +513,10 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt) hci_setup_event_mask(req); - if (hdev->hci_ver > BLUETOOTH_VER_1_1) + /* AVM Berlin (31), aka "BlueFRITZ!", doesn't support the read + * local supported commands HCI command. + */ + if (hdev->manufacturer != 31 && hdev->hci_ver > BLUETOOTH_VER_1_1) hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); if (lmp_ssp_capable(hdev)) { @@ -597,7 +600,15 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt) struct hci_dev *hdev = req->hdev; u8 p; - /* Only send HCI_Delete_Stored_Link_Key if it is supported */ + /* Some Broadcom based Bluetooth controllers do not support the + * Delete Stored Link Key command. They are clearly indicating its + * absence in the bit mask of supported commands. + * + * Check the supported commands and only if the the command is marked + * as supported send it. If not supported assume that the controller + * does not have actual support for stored link keys which makes this + * command redundant anyway. + */ if (hdev->commands[6] & 0x80) { struct hci_cp_delete_stored_link_key cp; @@ -751,7 +762,7 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state) hdev->discovery.state = state; } -static void inquiry_cache_flush(struct hci_dev *hdev) +void hci_inquiry_cache_flush(struct hci_dev *hdev) { struct discovery_state *cache = &hdev->discovery; struct inquiry_entry *p, *n; @@ -964,7 +975,7 @@ int hci_inquiry(void __user *arg) hci_dev_lock(hdev); if (inquiry_cache_age(hdev) > INQUIRY_CACHE_AGE_MAX || inquiry_cache_empty(hdev) || ir.flags & IREQ_CACHE_FLUSH) { - inquiry_cache_flush(hdev); + hci_inquiry_cache_flush(hdev); do_inquiry = 1; } hci_dev_unlock(hdev); @@ -1201,8 +1212,6 @@ static int hci_dev_do_close(struct hci_dev *hdev) { BT_DBG("%s %p", hdev->name, hdev); - cancel_work_sync(&hdev->le_scan); - cancel_delayed_work(&hdev->power_off); hci_req_cancel(hdev, ENODEV); @@ -1230,7 +1239,7 @@ static int hci_dev_do_close(struct hci_dev *hdev) cancel_delayed_work_sync(&hdev->le_scan_disable); hci_dev_lock(hdev); - inquiry_cache_flush(hdev); + hci_inquiry_cache_flush(hdev); hci_conn_hash_flush(hdev); hci_dev_unlock(hdev); @@ -1331,7 +1340,7 @@ int hci_dev_reset(__u16 dev) skb_queue_purge(&hdev->cmd_q); hci_dev_lock(hdev); - inquiry_cache_flush(hdev); + hci_inquiry_cache_flush(hdev); hci_conn_hash_flush(hdev); hci_dev_unlock(hdev); @@ -1991,80 +2000,59 @@ int hci_blacklist_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) return mgmt_device_unblocked(hdev, bdaddr, type); } -static void le_scan_param_req(struct hci_request *req, unsigned long opt) +static void inquiry_complete(struct hci_dev *hdev, u8 status) { - struct le_scan_params *param = (struct le_scan_params *) opt; - struct hci_cp_le_set_scan_param cp; - - memset(&cp, 0, sizeof(cp)); - cp.type = param->type; - cp.interval = cpu_to_le16(param->interval); - cp.window = cpu_to_le16(param->window); + if (status) { + BT_ERR("Failed to start inquiry: status %d", status); - hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp); -} - -static void le_scan_enable_req(struct hci_request *req, unsigned long opt) -{ - struct hci_cp_le_set_scan_enable cp; - - memset(&cp, 0, sizeof(cp)); - cp.enable = LE_SCAN_ENABLE; - cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; - - hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); + hci_dev_lock(hdev); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + hci_dev_unlock(hdev); + return; + } } -static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval, - u16 window, int timeout) +static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status) { - long timeo = msecs_to_jiffies(3000); - struct le_scan_params param; + /* General inquiry access code (GIAC) */ + u8 lap[3] = { 0x33, 0x8b, 0x9e }; + struct hci_request req; + struct hci_cp_inquiry cp; int err; - BT_DBG("%s", hdev->name); - - if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) - return -EINPROGRESS; - - param.type = type; - param.interval = interval; - param.window = window; - - hci_req_lock(hdev); - - err = __hci_req_sync(hdev, le_scan_param_req, (unsigned long) ¶m, - timeo); - if (!err) - err = __hci_req_sync(hdev, le_scan_enable_req, 0, timeo); - - hci_req_unlock(hdev); + if (status) { + BT_ERR("Failed to disable LE scanning: status %d", status); + return; + } - if (err < 0) - return err; + switch (hdev->discovery.type) { + case DISCOV_TYPE_LE: + hci_dev_lock(hdev); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + hci_dev_unlock(hdev); + break; - queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, - timeout); + case DISCOV_TYPE_INTERLEAVED: + hci_req_init(&req, hdev); - return 0; -} + memset(&cp, 0, sizeof(cp)); + memcpy(&cp.lap, lap, sizeof(cp.lap)); + cp.length = DISCOV_INTERLEAVED_INQUIRY_LEN; + hci_req_add(&req, HCI_OP_INQUIRY, sizeof(cp), &cp); -int hci_cancel_le_scan(struct hci_dev *hdev) -{ - BT_DBG("%s", hdev->name); + hci_dev_lock(hdev); - if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags)) - return -EALREADY; + hci_inquiry_cache_flush(hdev); - if (cancel_delayed_work(&hdev->le_scan_disable)) { - struct hci_cp_le_set_scan_enable cp; + err = hci_req_run(&req, inquiry_complete); + if (err) { + BT_ERR("Inquiry request failed: err %d", err); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + } - /* Send HCI command to disable LE Scan */ - memset(&cp, 0, sizeof(cp)); - hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); + hci_dev_unlock(hdev); + break; } - - return 0; } static void le_scan_disable_work(struct work_struct *work) @@ -2072,46 +2060,20 @@ static void le_scan_disable_work(struct work_struct *work) struct hci_dev *hdev = container_of(work, struct hci_dev, le_scan_disable.work); struct hci_cp_le_set_scan_enable cp; + struct hci_request req; + int err; BT_DBG("%s", hdev->name); - memset(&cp, 0, sizeof(cp)); - - hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); -} - -static void le_scan_work(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, le_scan); - struct le_scan_params *param = &hdev->le_scan_params; - - BT_DBG("%s", hdev->name); - - hci_do_le_scan(hdev, param->type, param->interval, param->window, - param->timeout); -} - -int hci_le_scan(struct hci_dev *hdev, u8 type, u16 interval, u16 window, - int timeout) -{ - struct le_scan_params *param = &hdev->le_scan_params; - - BT_DBG("%s", hdev->name); - - if (test_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags)) - return -ENOTSUPP; - - if (work_busy(&hdev->le_scan)) - return -EINPROGRESS; - - param->type = type; - param->interval = interval; - param->window = window; - param->timeout = timeout; + hci_req_init(&req, hdev); - queue_work(system_long_wq, &hdev->le_scan); + memset(&cp, 0, sizeof(cp)); + cp.enable = LE_SCAN_DISABLE; + hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); - return 0; + err = hci_req_run(&req, le_scan_disable_work_complete); + if (err) + BT_ERR("Disable LE scanning request failed: err %d", err); } /* Alloc HCI device */ @@ -2148,7 +2110,6 @@ struct hci_dev *hci_alloc_dev(void) INIT_WORK(&hdev->cmd_work, hci_cmd_work); INIT_WORK(&hdev->tx_work, hci_tx_work); INIT_WORK(&hdev->power_on, hci_power_on); - INIT_WORK(&hdev->le_scan, le_scan_work); INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off); @@ -2207,10 +2168,6 @@ int hci_register_dev(struct hci_dev *hdev) BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); - write_lock(&hci_dev_list_lock); - list_add(&hdev->list, &hci_dev_list); - write_unlock(&hci_dev_list_lock); - hdev->workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, 1, hdev->name); if (!hdev->workqueue) { @@ -2245,6 +2202,10 @@ int hci_register_dev(struct hci_dev *hdev) if (hdev->dev_type != HCI_AMP) set_bit(HCI_AUTO_OFF, &hdev->dev_flags); + write_lock(&hci_dev_list_lock); + list_add(&hdev->list, &hci_dev_list); + write_unlock(&hci_dev_list_lock); + hci_notify(hdev, HCI_DEV_REG); hci_dev_hold(hdev); @@ -2257,9 +2218,6 @@ err_wqueue: destroy_workqueue(hdev->req_workqueue); err: ida_simple_remove(&hci_index_ida, hdev->id); - write_lock(&hci_dev_list_lock); - list_del(&hdev->list); - write_unlock(&hci_dev_list_lock); return error; } @@ -3441,8 +3399,16 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status) */ if (hdev->sent_cmd) { req_complete = bt_cb(hdev->sent_cmd)->req.complete; - if (req_complete) + + if (req_complete) { + /* We must set the complete callback to NULL to + * avoid calling the callback more than once if + * this function gets called again. + */ + bt_cb(hdev->sent_cmd)->req.complete = NULL; + goto call_complete; + } } /* Remove all pending commands belonging to this request */ @@ -3550,36 +3516,6 @@ static void hci_cmd_work(struct work_struct *work) } } -int hci_do_inquiry(struct hci_dev *hdev, u8 length) -{ - /* General inquiry access code (GIAC) */ - u8 lap[3] = { 0x33, 0x8b, 0x9e }; - struct hci_cp_inquiry cp; - - BT_DBG("%s", hdev->name); - - if (test_bit(HCI_INQUIRY, &hdev->flags)) - return -EINPROGRESS; - - inquiry_cache_flush(hdev); - - memset(&cp, 0, sizeof(cp)); - memcpy(&cp.lap, lap, sizeof(cp.lap)); - cp.length = length; - - return hci_send_cmd(hdev, HCI_OP_INQUIRY, sizeof(cp), &cp); -} - -int hci_cancel_inquiry(struct hci_dev *hdev) -{ - BT_DBG("%s", hdev->name); - - if (!test_bit(HCI_INQUIRY, &hdev->flags)) - return -EALREADY; - - return hci_send_cmd(hdev, HCI_OP_INQUIRY_CANCEL, 0, NULL); -} - u8 bdaddr_to_le(u8 bdaddr_type) { switch (bdaddr_type) { diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b93cd2eb5d58..0437200d92f4 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -40,21 +40,13 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%2.2x", hdev->name, status); - if (status) { - hci_dev_lock(hdev); - mgmt_stop_discovery_failed(hdev, status); - hci_dev_unlock(hdev); + if (status) return; - } clear_bit(HCI_INQUIRY, &hdev->flags); smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */ wake_up_bit(&hdev->flags, HCI_INQUIRY); - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - hci_dev_unlock(hdev); - hci_conn_check_pending(hdev); } @@ -937,20 +929,6 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_unlock(hdev); } -static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb) -{ - __u8 status = *((__u8 *) skb->data); - - BT_DBG("%s status 0x%2.2x", hdev->name, status); - - if (status) { - hci_dev_lock(hdev); - mgmt_start_discovery_failed(hdev, status); - hci_dev_unlock(hdev); - return; - } -} - static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) { @@ -963,41 +941,16 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, if (!cp) return; + if (status) + return; + switch (cp->enable) { case LE_SCAN_ENABLE: - if (status) { - hci_dev_lock(hdev); - mgmt_start_discovery_failed(hdev, status); - hci_dev_unlock(hdev); - return; - } - set_bit(HCI_LE_SCAN, &hdev->dev_flags); - - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_FINDING); - hci_dev_unlock(hdev); break; case LE_SCAN_DISABLE: - if (status) { - hci_dev_lock(hdev); - mgmt_stop_discovery_failed(hdev, status); - hci_dev_unlock(hdev); - return; - } - clear_bit(HCI_LE_SCAN, &hdev->dev_flags); - - if (hdev->discovery.type == DISCOV_TYPE_INTERLEAVED && - hdev->discovery.state == DISCOVERY_FINDING) { - mgmt_interleaved_discovery(hdev); - } else { - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - hci_dev_unlock(hdev); - } - break; default: @@ -1077,18 +1030,10 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) if (status) { hci_conn_check_pending(hdev); - hci_dev_lock(hdev); - if (test_bit(HCI_MGMT, &hdev->dev_flags)) - mgmt_start_discovery_failed(hdev, status); - hci_dev_unlock(hdev); return; } set_bit(HCI_INQUIRY, &hdev->flags); - - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_FINDING); - hci_dev_unlock(hdev); } static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) @@ -2298,10 +2243,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_cc_user_passkey_neg_reply(hdev, skb); break; - case HCI_OP_LE_SET_SCAN_PARAM: - hci_cc_le_set_scan_param(hdev, skb); - break; - case HCI_OP_LE_SET_ADV_ENABLE: hci_cc_le_set_adv_enable(hdev, skb); break; @@ -2670,7 +2611,7 @@ static void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s", hdev->name); - if (!test_bit(HCI_LINK_KEYS, &hdev->dev_flags)) + if (!test_bit(HCI_MGMT, &hdev->dev_flags)) return; hci_dev_lock(hdev); @@ -2746,7 +2687,7 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_conn_drop(conn); } - if (test_bit(HCI_LINK_KEYS, &hdev->dev_flags)) + if (test_bit(HCI_MGMT, &hdev->dev_flags)) hci_add_link_key(hdev, conn, 1, &ev->bdaddr, ev->link_key, ev->key_type, pin_len); diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 2e658ade4454..d38ab1527006 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -76,25 +76,19 @@ static void hidp_copy_session(struct hidp_session *session, struct hidp_conninfo ci->flags = session->flags; ci->state = BT_CONNECTED; - ci->vendor = 0x0000; - ci->product = 0x0000; - ci->version = 0x0000; - if (session->input) { ci->vendor = session->input->id.vendor; ci->product = session->input->id.product; ci->version = session->input->id.version; if (session->input->name) - strncpy(ci->name, session->input->name, 128); + strlcpy(ci->name, session->input->name, 128); else - strncpy(ci->name, "HID Boot Device", 128); - } - - if (session->hid) { + strlcpy(ci->name, "HID Boot Device", 128); + } else if (session->hid) { ci->vendor = session->hid->vendor; ci->product = session->hid->product; ci->version = session->hid->version; - strncpy(ci->name, session->hid->name, 128); + strlcpy(ci->name, session->hid->name, 128); } } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 68843a28a7af..8c3499bec893 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -504,8 +504,10 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) if (conn->hcon->type == LE_LINK) { /* LE connection */ chan->omtu = L2CAP_DEFAULT_MTU; - chan->scid = L2CAP_CID_LE_DATA; - chan->dcid = L2CAP_CID_LE_DATA; + if (chan->dcid == L2CAP_CID_ATT) + chan->scid = L2CAP_CID_ATT; + else + chan->scid = l2cap_alloc_cid(conn); } else { /* Alloc CID for connection-oriented socket */ chan->scid = l2cap_alloc_cid(conn); @@ -543,6 +545,8 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) l2cap_chan_hold(chan); + hci_conn_hold(conn->hcon); + list_add(&chan->list, &conn->chan_l); } @@ -1338,17 +1342,21 @@ static struct l2cap_chan *l2cap_global_chan_by_scid(int state, u16 cid, static void l2cap_le_conn_ready(struct l2cap_conn *conn) { - struct sock *parent, *sk; + struct sock *parent; struct l2cap_chan *chan, *pchan; BT_DBG(""); /* Check if we have socket listening on cid */ - pchan = l2cap_global_chan_by_scid(BT_LISTEN, L2CAP_CID_LE_DATA, + pchan = l2cap_global_chan_by_scid(BT_LISTEN, L2CAP_CID_ATT, conn->src, conn->dst); if (!pchan) return; + /* Client ATT sockets should override the server one */ + if (__l2cap_get_chan_by_dcid(conn, L2CAP_CID_ATT)) + return; + parent = pchan->sk; lock_sock(parent); @@ -1357,17 +1365,12 @@ static void l2cap_le_conn_ready(struct l2cap_conn *conn) if (!chan) goto clean; - sk = chan->sk; - - hci_conn_hold(conn->hcon); - conn->hcon->disc_timeout = HCI_DISCONN_TIMEOUT; - - bacpy(&bt_sk(sk)->src, conn->src); - bacpy(&bt_sk(sk)->dst, conn->dst); + chan->dcid = L2CAP_CID_ATT; - l2cap_chan_add(conn, chan); + bacpy(&bt_sk(chan->sk)->src, conn->src); + bacpy(&bt_sk(chan->sk)->dst, conn->dst); - l2cap_chan_ready(chan); + __l2cap_chan_add(conn, chan); clean: release_sock(parent); @@ -1380,14 +1383,17 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) BT_DBG("conn %p", conn); - if (!hcon->out && hcon->type == LE_LINK) - l2cap_le_conn_ready(conn); - + /* For outgoing pairing which doesn't necessarily have an + * associated socket (e.g. mgmt_pair_device). + */ if (hcon->out && hcon->type == LE_LINK) smp_conn_security(hcon, hcon->pending_sec_level); mutex_lock(&conn->chan_lock); + if (hcon->type == LE_LINK) + l2cap_le_conn_ready(conn); + list_for_each_entry(chan, &conn->chan_l, list) { l2cap_chan_lock(chan); @@ -1792,7 +1798,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, auth_type = l2cap_get_auth_type(chan); - if (chan->dcid == L2CAP_CID_LE_DATA) + if (bdaddr_type_is_le(dst_type)) hcon = hci_connect(hdev, LE_LINK, dst, dst_type, chan->sec_level, auth_type); else @@ -1811,16 +1817,10 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, goto done; } - if (hcon->type == LE_LINK) { - err = 0; - - if (!list_empty(&conn->chan_l)) { - err = -EBUSY; - hci_conn_drop(hcon); - } - - if (err) - goto done; + if (cid && __l2cap_get_chan_by_dcid(conn, cid)) { + hci_conn_drop(hcon); + err = -EBUSY; + goto done; } /* Update source addr of the socket */ @@ -1830,6 +1830,9 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, l2cap_chan_add(conn, chan); l2cap_chan_lock(chan); + /* l2cap_chan_add takes its own ref so we can drop this one */ + hci_conn_drop(hcon); + l2cap_state_change(chan, BT_CONNECT); __set_chan_timer(chan, sk->sk_sndtimeo); @@ -3751,8 +3754,6 @@ static struct l2cap_chan *l2cap_connect(struct l2cap_conn *conn, sk = chan->sk; - hci_conn_hold(conn->hcon); - bacpy(&bt_sk(sk)->src, conn->src); bacpy(&bt_sk(sk)->dst, conn->dst); chan->psm = psm; @@ -5292,6 +5293,51 @@ static inline int l2cap_le_sig_cmd(struct l2cap_conn *conn, } } +static inline void l2cap_le_sig_channel(struct l2cap_conn *conn, + struct sk_buff *skb) +{ + u8 *data = skb->data; + int len = skb->len; + struct l2cap_cmd_hdr cmd; + int err; + + l2cap_raw_recv(conn, skb); + + while (len >= L2CAP_CMD_HDR_SIZE) { + u16 cmd_len; + memcpy(&cmd, data, L2CAP_CMD_HDR_SIZE); + data += L2CAP_CMD_HDR_SIZE; + len -= L2CAP_CMD_HDR_SIZE; + + cmd_len = le16_to_cpu(cmd.len); + + BT_DBG("code 0x%2.2x len %d id 0x%2.2x", cmd.code, cmd_len, + cmd.ident); + + if (cmd_len > len || !cmd.ident) { + BT_DBG("corrupted command"); + break; + } + + err = l2cap_le_sig_cmd(conn, &cmd, data); + if (err) { + struct l2cap_cmd_rej_unk rej; + + BT_ERR("Wrong link type (%d)", err); + + /* FIXME: Map err to a valid reason */ + rej.reason = __constant_cpu_to_le16(L2CAP_REJ_NOT_UNDERSTOOD); + l2cap_send_cmd(conn, cmd.ident, L2CAP_COMMAND_REJ, + sizeof(rej), &rej); + } + + data += cmd_len; + len -= cmd_len; + } + + kfree_skb(skb); +} + static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) { @@ -5318,11 +5364,7 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, break; } - if (conn->hcon->type == LE_LINK) - err = l2cap_le_sig_cmd(conn, &cmd, data); - else - err = l2cap_bredr_sig_cmd(conn, &cmd, cmd_len, data); - + err = l2cap_bredr_sig_cmd(conn, &cmd, cmd_len, data); if (err) { struct l2cap_cmd_rej_unk rej; @@ -6356,16 +6398,13 @@ static void l2cap_att_channel(struct l2cap_conn *conn, { struct l2cap_chan *chan; - chan = l2cap_global_chan_by_scid(0, L2CAP_CID_LE_DATA, + chan = l2cap_global_chan_by_scid(BT_CONNECTED, L2CAP_CID_ATT, conn->src, conn->dst); if (!chan) goto drop; BT_DBG("chan %p, len %d", chan, skb->len); - if (chan->state != BT_BOUND && chan->state != BT_CONNECTED) - goto drop; - if (chan->imtu < skb->len) goto drop; @@ -6395,6 +6434,8 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) switch (cid) { case L2CAP_CID_LE_SIGNALING: + l2cap_le_sig_channel(conn, skb); + break; case L2CAP_CID_SIGNALING: l2cap_sig_channel(conn, skb); break; @@ -6405,7 +6446,7 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb) l2cap_conless_channel(conn, psm, skb); break; - case L2CAP_CID_LE_DATA: + case L2CAP_CID_ATT: l2cap_att_channel(conn, skb); break; @@ -6531,7 +6572,7 @@ int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) continue; } - if (chan->scid == L2CAP_CID_LE_DATA) { + if (chan->scid == L2CAP_CID_ATT) { if (!status && encrypt) { chan->sec_level = hcon->sec_level; l2cap_chan_ready(chan); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 36fed40c162c..0098af80b213 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -466,7 +466,7 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, static bool l2cap_valid_mtu(struct l2cap_chan *chan, u16 mtu) { switch (chan->scid) { - case L2CAP_CID_LE_DATA: + case L2CAP_CID_ATT: if (mtu < L2CAP_LE_MIN_MTU) return false; break; @@ -630,7 +630,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, conn = chan->conn; /*change security for LE channels */ - if (chan->scid == L2CAP_CID_LE_DATA) { + if (chan->scid == L2CAP_CID_ATT) { if (!conn->hcon->out) { err = -EINVAL; break; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index f8ecbc70293d..fedc5399d465 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -102,18 +102,6 @@ static const u16 mgmt_events[] = { MGMT_EV_PASSKEY_NOTIFY, }; -/* - * These LE scan and inquiry parameters were chosen according to LE General - * Discovery Procedure specification. - */ -#define LE_SCAN_WIN 0x12 -#define LE_SCAN_INT 0x12 -#define LE_SCAN_TIMEOUT_LE_ONLY msecs_to_jiffies(10240) -#define LE_SCAN_TIMEOUT_BREDR_LE msecs_to_jiffies(5120) - -#define INQUIRY_LEN_BREDR 0x08 /* TGAP(100) */ -#define INQUIRY_LEN_BREDR_LE 0x04 /* TGAP(100)/2 */ - #define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000) #define hdev_is_powered(hdev) (test_bit(HCI_UP, &hdev->flags) && \ @@ -1748,8 +1736,6 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, hci_link_keys_clear(hdev); - set_bit(HCI_LINK_KEYS, &hdev->dev_flags); - if (cp->debug_keys) set_bit(HCI_DEBUG_KEYS, &hdev->dev_flags); else @@ -2633,28 +2619,72 @@ static int remove_remote_oob_data(struct sock *sk, struct hci_dev *hdev, return err; } -int mgmt_interleaved_discovery(struct hci_dev *hdev) +static int mgmt_start_discovery_failed(struct hci_dev *hdev, u8 status) { + struct pending_cmd *cmd; + u8 type; int err; - BT_DBG("%s", hdev->name); + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - hci_dev_lock(hdev); + cmd = mgmt_pending_find(MGMT_OP_START_DISCOVERY, hdev); + if (!cmd) + return -ENOENT; - err = hci_do_inquiry(hdev, INQUIRY_LEN_BREDR_LE); - if (err < 0) - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + type = hdev->discovery.type; - hci_dev_unlock(hdev); + err = cmd_complete(cmd->sk, hdev->id, cmd->opcode, mgmt_status(status), + &type, sizeof(type)); + mgmt_pending_remove(cmd); return err; } +static void start_discovery_complete(struct hci_dev *hdev, u8 status) +{ + BT_DBG("status %d", status); + + if (status) { + hci_dev_lock(hdev); + mgmt_start_discovery_failed(hdev, status); + hci_dev_unlock(hdev); + return; + } + + hci_dev_lock(hdev); + hci_discovery_set_state(hdev, DISCOVERY_FINDING); + hci_dev_unlock(hdev); + + switch (hdev->discovery.type) { + case DISCOV_TYPE_LE: + queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, + DISCOV_LE_TIMEOUT); + break; + + case DISCOV_TYPE_INTERLEAVED: + queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, + DISCOV_INTERLEAVED_TIMEOUT); + break; + + case DISCOV_TYPE_BREDR: + break; + + default: + BT_ERR("Invalid discovery type %d", hdev->discovery.type); + } +} + static int start_discovery(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_start_discovery *cp = data; struct pending_cmd *cmd; + struct hci_cp_le_set_scan_param param_cp; + struct hci_cp_le_set_scan_enable enable_cp; + struct hci_cp_inquiry inq_cp; + struct hci_request req; + /* General inquiry access code (GIAC) */ + u8 lap[3] = { 0x33, 0x8b, 0x9e }; int err; BT_DBG("%s", hdev->name); @@ -2687,6 +2717,8 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, hdev->discovery.type = cp->type; + hci_req_init(&req, hdev); + switch (hdev->discovery.type) { case DISCOV_TYPE_BREDR: if (!lmp_bredr_capable(hdev)) { @@ -2696,10 +2728,23 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } - err = hci_do_inquiry(hdev, INQUIRY_LEN_BREDR); + if (test_bit(HCI_INQUIRY, &hdev->flags)) { + err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_BUSY); + mgmt_pending_remove(cmd); + goto failed; + } + + hci_inquiry_cache_flush(hdev); + + memset(&inq_cp, 0, sizeof(inq_cp)); + memcpy(&inq_cp.lap, lap, sizeof(inq_cp.lap)); + inq_cp.length = DISCOV_BREDR_INQUIRY_LEN; + hci_req_add(&req, HCI_OP_INQUIRY, sizeof(inq_cp), &inq_cp); break; case DISCOV_TYPE_LE: + case DISCOV_TYPE_INTERLEAVED: if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, MGMT_STATUS_NOT_SUPPORTED); @@ -2707,20 +2752,40 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } - err = hci_le_scan(hdev, LE_SCAN_ACTIVE, LE_SCAN_INT, - LE_SCAN_WIN, LE_SCAN_TIMEOUT_LE_ONLY); - break; - - case DISCOV_TYPE_INTERLEAVED: - if (!lmp_host_le_capable(hdev) || !lmp_bredr_capable(hdev)) { + if (hdev->discovery.type == DISCOV_TYPE_INTERLEAVED && + !lmp_bredr_capable(hdev)) { err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, MGMT_STATUS_NOT_SUPPORTED); mgmt_pending_remove(cmd); goto failed; } - err = hci_le_scan(hdev, LE_SCAN_ACTIVE, LE_SCAN_INT, - LE_SCAN_WIN, LE_SCAN_TIMEOUT_BREDR_LE); + if (test_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags)) { + err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_REJECTED); + mgmt_pending_remove(cmd); + goto failed; + } + + if (test_bit(HCI_LE_SCAN, &hdev->dev_flags)) { + err = cmd_status(sk, hdev->id, MGMT_OP_START_DISCOVERY, + MGMT_STATUS_BUSY); + mgmt_pending_remove(cmd); + goto failed; + } + + memset(¶m_cp, 0, sizeof(param_cp)); + param_cp.type = LE_SCAN_ACTIVE; + param_cp.interval = cpu_to_le16(DISCOV_LE_SCAN_INT); + param_cp.window = cpu_to_le16(DISCOV_LE_SCAN_WIN); + hci_req_add(&req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp), + ¶m_cp); + + memset(&enable_cp, 0, sizeof(enable_cp)); + enable_cp.enable = LE_SCAN_ENABLE; + enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; + hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp), + &enable_cp); break; default: @@ -2730,6 +2795,7 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev, goto failed; } + err = hci_req_run(&req, start_discovery_complete); if (err < 0) mgmt_pending_remove(cmd); else @@ -2740,6 +2806,39 @@ failed: return err; } +static int mgmt_stop_discovery_failed(struct hci_dev *hdev, u8 status) +{ + struct pending_cmd *cmd; + int err; + + cmd = mgmt_pending_find(MGMT_OP_STOP_DISCOVERY, hdev); + if (!cmd) + return -ENOENT; + + err = cmd_complete(cmd->sk, hdev->id, cmd->opcode, mgmt_status(status), + &hdev->discovery.type, sizeof(hdev->discovery.type)); + mgmt_pending_remove(cmd); + + return err; +} + +static void stop_discovery_complete(struct hci_dev *hdev, u8 status) +{ + BT_DBG("status %d", status); + + hci_dev_lock(hdev); + + if (status) { + mgmt_stop_discovery_failed(hdev, status); + goto unlock; + } + + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + +unlock: + hci_dev_unlock(hdev); +} + static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -2747,6 +2846,8 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, struct pending_cmd *cmd; struct hci_cp_remote_name_req_cancel cp; struct inquiry_entry *e; + struct hci_request req; + struct hci_cp_le_set_scan_enable enable_cp; int err; BT_DBG("%s", hdev->name); @@ -2773,12 +2874,20 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, goto unlock; } + hci_req_init(&req, hdev); + switch (hdev->discovery.state) { case DISCOVERY_FINDING: - if (test_bit(HCI_INQUIRY, &hdev->flags)) - err = hci_cancel_inquiry(hdev); - else - err = hci_cancel_le_scan(hdev); + if (test_bit(HCI_INQUIRY, &hdev->flags)) { + hci_req_add(&req, HCI_OP_INQUIRY_CANCEL, 0, NULL); + } else { + cancel_delayed_work(&hdev->le_scan_disable); + + memset(&enable_cp, 0, sizeof(enable_cp)); + enable_cp.enable = LE_SCAN_DISABLE; + hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE, + sizeof(enable_cp), &enable_cp); + } break; @@ -2796,16 +2905,22 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data, } bacpy(&cp.bdaddr, &e->data.bdaddr); - err = hci_send_cmd(hdev, HCI_OP_REMOTE_NAME_REQ_CANCEL, - sizeof(cp), &cp); + hci_req_add(&req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp), + &cp); break; default: BT_DBG("unknown discovery state %u", hdev->discovery.state); - err = -EFAULT; + + mgmt_pending_remove(cmd); + err = cmd_complete(sk, hdev->id, MGMT_OP_STOP_DISCOVERY, + MGMT_STATUS_FAILED, &mgmt_cp->type, + sizeof(mgmt_cp->type)); + goto unlock; } + err = hci_req_run(&req, stop_discovery_complete); if (err < 0) mgmt_pending_remove(cmd); else @@ -4063,6 +4178,9 @@ int mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, struct mgmt_ev_device_found *ev = (void *) buf; size_t ev_size; + if (!hci_discovery_active(hdev)) + return -EPERM; + /* Leave 5 bytes for a potential CoD field */ if (sizeof(*ev) + eir_len + 5 > sizeof(buf)) return -EINVAL; @@ -4114,43 +4232,6 @@ int mgmt_remote_name(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, sizeof(*ev) + eir_len, NULL); } -int mgmt_start_discovery_failed(struct hci_dev *hdev, u8 status) -{ - struct pending_cmd *cmd; - u8 type; - int err; - - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - - cmd = mgmt_pending_find(MGMT_OP_START_DISCOVERY, hdev); - if (!cmd) - return -ENOENT; - - type = hdev->discovery.type; - - err = cmd_complete(cmd->sk, hdev->id, cmd->opcode, mgmt_status(status), - &type, sizeof(type)); - mgmt_pending_remove(cmd); - - return err; -} - -int mgmt_stop_discovery_failed(struct hci_dev *hdev, u8 status) -{ - struct pending_cmd *cmd; - int err; - - cmd = mgmt_pending_find(MGMT_OP_STOP_DISCOVERY, hdev); - if (!cmd) - return -ENOENT; - - err = cmd_complete(cmd->sk, hdev->id, cmd->opcode, mgmt_status(status), - &hdev->discovery.type, sizeof(hdev->discovery.type)); - mgmt_pending_remove(cmd); - - return err; -} - int mgmt_discovering(struct hci_dev *hdev, u8 discovering) { struct mgmt_ev_discovering ev; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 967312803e41..69363bd37f64 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -22,6 +22,9 @@ #include <asm/uaccess.h> #include "br_private.h" +#define COMMON_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | \ + NETIF_F_GSO_MASK | NETIF_F_HW_CSUM) + /* net device transmit always called with BH disabled */ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -55,10 +58,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb_pull(skb, ETH_HLEN); if (is_broadcast_ether_addr(dest)) - br_flood_deliver(br, skb); + br_flood_deliver(br, skb, false); else if (is_multicast_ether_addr(dest)) { if (unlikely(netpoll_tx_running(dev))) { - br_flood_deliver(br, skb); + br_flood_deliver(br, skb, false); goto out; } if (br_multicast_rcv(br, NULL, skb)) { @@ -67,14 +70,15 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) } mdst = br_mdb_get(br, skb, vid); - if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) + if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && + br_multicast_querier_exists(br)) br_multicast_deliver(mdst, skb); else - br_flood_deliver(br, skb); + br_flood_deliver(br, skb, false); } else if ((dst = __br_fdb_get(br, dest, vid)) != NULL) br_deliver(dst->dst, skb); else - br_flood_deliver(br, skb); + br_flood_deliver(br, skb, true); out: rcu_read_unlock(); @@ -346,12 +350,10 @@ void br_dev_setup(struct net_device *dev) dev->tx_queue_len = 0; dev->priv_flags = IFF_EBRIDGE; - dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | - NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_LLTX | - NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_CTAG_TX; - dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | - NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | - NETIF_F_HW_VLAN_CTAG_TX; + dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL | + NETIF_F_HW_VLAN_CTAG_TX; + dev->hw_features = COMMON_FEATURES | NETIF_F_HW_VLAN_CTAG_TX; + dev->vlan_features = COMMON_FEATURES; br->dev = dev; spin_lock_init(&br->lock); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index ebfa4443c69b..60aca9109a50 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -707,6 +707,11 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } } + if (is_zero_ether_addr(addr)) { + pr_info("bridge: RTM_NEWNEIGH with invalid ether address\n"); + return -EINVAL; + } + p = br_port_get_rtnl(dev); if (p == NULL) { pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 092b20e4ee4c..4b81b1471789 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -174,7 +174,8 @@ out: static void br_flood(struct net_bridge *br, struct sk_buff *skb, struct sk_buff *skb0, void (*__packet_hook)(const struct net_bridge_port *p, - struct sk_buff *skb)) + struct sk_buff *skb), + bool unicast) { struct net_bridge_port *p; struct net_bridge_port *prev; @@ -182,6 +183,9 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, prev = NULL; list_for_each_entry_rcu(p, &br->port_list, list) { + /* Do not flood unicast traffic to ports that turn it off */ + if (unicast && !(p->flags & BR_FLOOD)) + continue; prev = maybe_deliver(prev, p, skb, __packet_hook); if (IS_ERR(prev)) goto out; @@ -203,16 +207,16 @@ out: /* called with rcu_read_lock */ -void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb) +void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast) { - br_flood(br, skb, NULL, __br_deliver); + br_flood(br, skb, NULL, __br_deliver, unicast); } /* called under bridge lock */ void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, - struct sk_buff *skb2) + struct sk_buff *skb2, bool unicast) { - br_flood(br, skb, skb2, __br_forward); + br_flood(br, skb, skb2, __br_forward, unicast); } #ifdef CONFIG_BRIDGE_IGMP_SNOOPING diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 4cdba60926ff..5623be6b9ecd 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -221,7 +221,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, p->path_cost = port_cost(dev); p->priority = 0x8000 >> BR_PORT_BITS; p->port_no = index; - p->flags = 0; + p->flags = BR_LEARNING | BR_FLOOD; br_init_port(p); p->state = BR_STATE_DISABLED; br_stp_port_timer_init(p); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 828e2bcc1f52..8c561c0aa636 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -65,6 +65,7 @@ int br_handle_frame_finish(struct sk_buff *skb) struct net_bridge_fdb_entry *dst; struct net_bridge_mdb_entry *mdst; struct sk_buff *skb2; + bool unicast = true; u16 vid = 0; if (!p || p->state == BR_STATE_DISABLED) @@ -75,7 +76,8 @@ int br_handle_frame_finish(struct sk_buff *skb) /* insert into forwarding database after filtering to avoid spoofing */ br = p->br; - br_fdb_update(br, p, eth_hdr(skb)->h_source, vid); + if (p->flags & BR_LEARNING) + br_fdb_update(br, p, eth_hdr(skb)->h_source, vid); if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) && br_multicast_rcv(br, p, skb)) @@ -94,11 +96,13 @@ int br_handle_frame_finish(struct sk_buff *skb) dst = NULL; - if (is_broadcast_ether_addr(dest)) + if (is_broadcast_ether_addr(dest)) { skb2 = skb; - else if (is_multicast_ether_addr(dest)) { + unicast = false; + } else if (is_multicast_ether_addr(dest)) { mdst = br_mdb_get(br, skb, vid); - if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) { + if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && + br_multicast_querier_exists(br)) { if ((mdst && mdst->mglist) || br_multicast_is_router(br)) skb2 = skb; @@ -109,6 +113,7 @@ int br_handle_frame_finish(struct sk_buff *skb) } else skb2 = skb; + unicast = false; br->dev->stats.multicast++; } else if ((dst = __br_fdb_get(br, dest, vid)) && dst->is_local) { @@ -122,7 +127,7 @@ int br_handle_frame_finish(struct sk_buff *skb) dst->used = jiffies; br_forward(dst->dst, skb, skb2); } else - br_flood_forward(br, skb, skb2); + br_flood_forward(br, skb, skb2, unicast); } if (skb2) @@ -142,7 +147,8 @@ static int br_handle_local_finish(struct sk_buff *skb) u16 vid = 0; br_vlan_get_tag(skb, &vid); - br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid); + if (p->flags & BR_LEARNING) + br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid); return 0; /* process further */ } diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 19942e38fd2d..0daae3ec2355 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -447,7 +447,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) call_rcu_bh(&p->rcu, br_multicast_free_pg); err = 0; - if (!mp->ports && !mp->mglist && + if (!mp->ports && !mp->mglist && mp->timer_armed && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); break; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index d6448e35e027..61c5e819380e 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -23,6 +23,7 @@ #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/timer.h> +#include <linux/inetdevice.h> #include <net/ip.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> @@ -269,7 +270,7 @@ static void br_multicast_del_pg(struct net_bridge *br, del_timer(&p->timer); call_rcu_bh(&p->rcu, br_multicast_free_pg); - if (!mp->ports && !mp->mglist && + if (!mp->ports && !mp->mglist && mp->timer_armed && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); @@ -381,7 +382,8 @@ static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br, iph->frag_off = htons(IP_DF); iph->ttl = 1; iph->protocol = IPPROTO_IGMP; - iph->saddr = 0; + iph->saddr = br->multicast_query_use_ifaddr ? + inet_select_addr(br->dev, 0, RT_SCOPE_LINK) : 0; iph->daddr = htonl(INADDR_ALLHOSTS_GROUP); ((u8 *)&iph[1])[0] = IPOPT_RA; ((u8 *)&iph[1])[1] = 4; @@ -616,6 +618,7 @@ rehash: mp->br = br; mp->addr = *group; + setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp); @@ -655,7 +658,6 @@ static int br_multicast_add_group(struct net_bridge *br, struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; - unsigned long now = jiffies; int err; spin_lock(&br->multicast_lock); @@ -670,7 +672,6 @@ static int br_multicast_add_group(struct net_bridge *br, if (!port) { mp->mglist = true; - mod_timer(&mp->timer, now + br->multicast_membership_interval); goto out; } @@ -678,7 +679,7 @@ static int br_multicast_add_group(struct net_bridge *br, (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { if (p->port == port) - goto found; + goto out; if ((unsigned long)p->port < (unsigned long)port) break; } @@ -689,8 +690,6 @@ static int br_multicast_add_group(struct net_bridge *br, rcu_assign_pointer(*pp, p); br_mdb_notify(br->dev, port, group, RTM_NEWMDB); -found: - mod_timer(&p->timer, now + br->multicast_membership_interval); out: err = 0; @@ -1015,8 +1014,18 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, } #endif +static void br_multicast_update_querier_timer(struct net_bridge *br, + unsigned long max_delay) +{ + if (!timer_pending(&br->multicast_querier_timer)) + br->multicast_querier_delay_time = jiffies + max_delay; + + mod_timer(&br->multicast_querier_timer, + jiffies + br->multicast_querier_interval); +} + /* - * Add port to rotuer_list + * Add port to router_list * list is maintained ordered by pointer value * and locked by br->multicast_lock and RCU */ @@ -1065,11 +1074,11 @@ timer: static void br_multicast_query_received(struct net_bridge *br, struct net_bridge_port *port, - int saddr) + int saddr, + unsigned long max_delay) { if (saddr) - mod_timer(&br->multicast_querier_timer, - jiffies + br->multicast_querier_interval); + br_multicast_update_querier_timer(br, max_delay); else if (timer_pending(&br->multicast_querier_timer)) return; @@ -1097,8 +1106,6 @@ static int br_ip4_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - br_multicast_query_received(br, port, !!iph->saddr); - group = ih->group; if (skb->len == sizeof(*ih)) { @@ -1122,6 +1129,8 @@ static int br_ip4_multicast_query(struct net_bridge *br, IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1; } + br_multicast_query_received(br, port, !!iph->saddr, max_delay); + if (!group) goto out; @@ -1130,6 +1139,9 @@ static int br_ip4_multicast_query(struct net_bridge *br, if (!mp) goto out; + mod_timer(&mp->timer, now + br->multicast_membership_interval); + mp->timer_armed = true; + max_delay *= br->multicast_last_member_count; if (mp->mglist && @@ -1174,8 +1186,6 @@ static int br_ip6_multicast_query(struct net_bridge *br, (port && port->state == BR_STATE_DISABLED)) goto out; - br_multicast_query_received(br, port, !ipv6_addr_any(&ip6h->saddr)); - if (skb->len == sizeof(*mld)) { if (!pskb_may_pull(skb, sizeof(*mld))) { err = -EINVAL; @@ -1196,6 +1206,9 @@ static int br_ip6_multicast_query(struct net_bridge *br, max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(ntohs(mld2q->mld2q_mrc)) : 1; } + br_multicast_query_received(br, port, !ipv6_addr_any(&ip6h->saddr), + max_delay); + if (!group) goto out; @@ -1204,6 +1217,9 @@ static int br_ip6_multicast_query(struct net_bridge *br, if (!mp) goto out; + mod_timer(&mp->timer, now + br->multicast_membership_interval); + mp->timer_armed = true; + max_delay *= br->multicast_last_member_count; if (mp->mglist && (timer_pending(&mp->timer) ? @@ -1247,6 +1263,32 @@ static void br_multicast_leave_group(struct net_bridge *br, if (!mp) goto out; + if (br->multicast_querier && + !timer_pending(&br->multicast_querier_timer)) { + __br_multicast_send_query(br, port, &mp->addr); + + time = jiffies + br->multicast_last_member_count * + br->multicast_last_member_interval; + mod_timer(port ? &port->multicast_query_timer : + &br->multicast_query_timer, time); + + for (p = mlock_dereference(mp->ports, br); + p != NULL; + p = mlock_dereference(p->next, br)) { + if (p->port != port) + continue; + + if (!hlist_unhashed(&p->mglist) && + (timer_pending(&p->timer) ? + time_after(p->timer.expires, time) : + try_to_del_timer_sync(&p->timer) >= 0)) { + mod_timer(&p->timer, time); + } + + break; + } + } + if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) { struct net_bridge_port_group __rcu **pp; @@ -1262,7 +1304,7 @@ static void br_multicast_leave_group(struct net_bridge *br, call_rcu_bh(&p->rcu, br_multicast_free_pg); br_mdb_notify(br->dev, port, group, RTM_DELMDB); - if (!mp->ports && !mp->mglist && + if (!mp->ports && !mp->mglist && mp->timer_armed && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); } @@ -1274,30 +1316,12 @@ static void br_multicast_leave_group(struct net_bridge *br, br->multicast_last_member_interval; if (!port) { - if (mp->mglist && + if (mp->mglist && mp->timer_armed && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, time) : try_to_del_timer_sync(&mp->timer) >= 0)) { mod_timer(&mp->timer, time); } - - goto out; - } - - for (p = mlock_dereference(mp->ports, br); - p != NULL; - p = mlock_dereference(p->next, br)) { - if (p->port != port) - continue; - - if (!hlist_unhashed(&p->mglist) && - (timer_pending(&p->timer) ? - time_after(p->timer.expires, time) : - try_to_del_timer_sync(&p->timer) >= 0)) { - mod_timer(&p->timer, time); - } - - break; } out: @@ -1619,6 +1643,7 @@ void br_multicast_init(struct net_bridge *br) br->multicast_router = 1; br->multicast_querier = 0; + br->multicast_query_use_ifaddr = 0; br->multicast_last_member_count = 2; br->multicast_startup_query_count = 2; @@ -1629,6 +1654,8 @@ void br_multicast_init(struct net_bridge *br) br->multicast_querier_interval = 255 * HZ; br->multicast_membership_interval = 260 * HZ; + br->multicast_querier_delay_time = 0; + spin_lock_init(&br->multicast_lock); setup_timer(&br->multicast_router_timer, br_multicast_local_router_expired, 0); @@ -1672,6 +1699,7 @@ void br_multicast_stop(struct net_bridge *br) hlist_for_each_entry_safe(mp, n, &mdb->mhash[i], hlist[ver]) { del_timer(&mp->timer); + mp->timer_armed = false; call_rcu_bh(&mp->rcu, br_multicast_free_group); } } @@ -1816,6 +1844,8 @@ unlock: int br_multicast_set_querier(struct net_bridge *br, unsigned long val) { + unsigned long max_delay; + val = !!val; spin_lock_bh(&br->multicast_lock); @@ -1823,8 +1853,14 @@ int br_multicast_set_querier(struct net_bridge *br, unsigned long val) goto unlock; br->multicast_querier = val; - if (val) - br_multicast_start_querier(br); + if (!val) + goto unlock; + + max_delay = br->multicast_query_response_interval; + if (!timer_pending(&br->multicast_querier_timer)) + br->multicast_querier_delay_time = jiffies + max_delay; + + br_multicast_start_querier(br); unlock: spin_unlock_bh(&br->multicast_lock); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 1ed75bfd8d1d..f87736270eaa 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -992,7 +992,7 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = { #ifdef CONFIG_SYSCTL static -int brnf_sysctl_call_tables(ctl_table * ctl, int write, +int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, void __user * buffer, size_t * lenp, loff_t * ppos) { int ret; @@ -1004,7 +1004,7 @@ int brnf_sysctl_call_tables(ctl_table * ctl, int write, return ret; } -static ctl_table brnf_table[] = { +static struct ctl_table brnf_table[] = { { .procname = "bridge-nf-call-arptables", .data = &brnf_call_arptables, diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 8e3abf564798..1fc30abd3a52 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -30,6 +30,8 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_GUARD */ + nla_total_size(1) /* IFLA_BRPORT_PROTECT */ + nla_total_size(1) /* IFLA_BRPORT_FAST_LEAVE */ + + nla_total_size(1) /* IFLA_BRPORT_LEARNING */ + + nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */ + 0; } @@ -56,7 +58,9 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u8(skb, IFLA_BRPORT_MODE, mode) || nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) || nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) || - nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE))) + nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) || + nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) || + nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD))) return -EMSGSIZE; return 0; @@ -281,6 +285,8 @@ static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, + [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, + [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ @@ -328,6 +334,8 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); + br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); + br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 1644b3e1f947..3a3f371b2841 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -31,7 +31,7 @@ struct notifier_block br_device_notifier = { */ static int br_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net_bridge_port *p; struct net_bridge *br; bool changed_addr; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index d2c043a857b6..2f7da41851bf 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -112,6 +112,7 @@ struct net_bridge_mdb_entry struct timer_list timer; struct br_ip addr; bool mglist; + bool timer_armed; }; struct net_bridge_mdb_htable @@ -157,6 +158,8 @@ struct net_bridge_port #define BR_ROOT_BLOCK 0x00000004 #define BR_MULTICAST_FAST_LEAVE 0x00000008 #define BR_ADMIN_COST 0x00000010 +#define BR_LEARNING 0x00000020 +#define BR_FLOOD 0x00000040 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; @@ -249,6 +252,7 @@ struct net_bridge u8 multicast_disabled:1; u8 multicast_querier:1; + u8 multicast_query_use_ifaddr:1; u32 hash_elasticity; u32 hash_max; @@ -263,6 +267,7 @@ struct net_bridge unsigned long multicast_query_interval; unsigned long multicast_query_response_interval; unsigned long multicast_startup_query_interval; + unsigned long multicast_querier_delay_time; spinlock_t multicast_lock; struct net_bridge_mdb_htable __rcu *mdb; @@ -411,9 +416,10 @@ extern int br_dev_queue_push_xmit(struct sk_buff *skb); extern void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0); extern int br_forward_finish(struct sk_buff *skb); -extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb); +extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, + bool unicast); extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, - struct sk_buff *skb2); + struct sk_buff *skb2, bool unicast); /* br_if.c */ extern void br_port_carrier_check(struct net_bridge_port *p); @@ -496,6 +502,13 @@ static inline bool br_multicast_is_router(struct net_bridge *br) (br->multicast_router == 1 && timer_pending(&br->multicast_router_timer)); } + +static inline bool br_multicast_querier_exists(struct net_bridge *br) +{ + return time_is_before_jiffies(br->multicast_querier_delay_time) && + (br->multicast_querier || + timer_pending(&br->multicast_querier_timer)); +} #else static inline int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port, @@ -552,6 +565,10 @@ static inline bool br_multicast_is_router(struct net_bridge *br) { return 0; } +static inline bool br_multicast_querier_exists(struct net_bridge *br) +{ + return false; +} static inline void br_mdb_init(void) { } diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 8baa9c08e1a4..394bb96b6087 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -375,6 +375,31 @@ static ssize_t store_multicast_snooping(struct device *d, static DEVICE_ATTR(multicast_snooping, S_IRUGO | S_IWUSR, show_multicast_snooping, store_multicast_snooping); +static ssize_t show_multicast_query_use_ifaddr(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct net_bridge *br = to_bridge(d); + return sprintf(buf, "%d\n", br->multicast_query_use_ifaddr); +} + +static int set_query_use_ifaddr(struct net_bridge *br, unsigned long val) +{ + br->multicast_query_use_ifaddr = !!val; + return 0; +} + +static ssize_t +store_multicast_query_use_ifaddr(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return store_bridge_parm(d, buf, len, set_query_use_ifaddr); +} +static DEVICE_ATTR(multicast_query_use_ifaddr, S_IRUGO | S_IWUSR, + show_multicast_query_use_ifaddr, + store_multicast_query_use_ifaddr); + static ssize_t show_multicast_querier(struct device *d, struct device_attribute *attr, char *buf) @@ -734,6 +759,7 @@ static struct attribute *bridge_attrs[] = { &dev_attr_multicast_router.attr, &dev_attr_multicast_snooping.attr, &dev_attr_multicast_querier.attr, + &dev_attr_multicast_query_use_ifaddr.attr, &dev_attr_hash_elasticity.attr, &dev_attr_hash_max.attr, &dev_attr_multicast_last_member_count.attr, diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index a1ef1b6e14dc..2a2cdb756d51 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -158,6 +158,8 @@ static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush); BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); +BRPORT_ATTR_FLAG(learning, BR_LEARNING); +BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -195,6 +197,8 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_hairpin_mode, &brport_attr_bpdu_guard, &brport_attr_root_block, + &brport_attr_learning, + &brport_attr_unicast_flood, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING &brport_attr_multicast_router, &brport_attr_multicast_fast_leave, diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index df0364aa12d5..518093802d1d 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -271,6 +271,12 @@ static int ebt_ulog_tg_check(const struct xt_tgchk_param *par) { struct ebt_ulog_info *uloginfo = par->targinfo; + if (!par->net->xt.ebt_ulog_warn_deprecated) { + pr_info("ebt_ulog is deprecated and it will be removed soon, " + "use ebt_nflog instead\n"); + par->net->xt.ebt_ulog_warn_deprecated = true; + } + if (uloginfo->nlgroup > 31) return -EINVAL; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 3d110c4fc787..ac7802428384 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1339,7 +1339,7 @@ static inline int ebt_make_matchname(const struct ebt_entry_match *m, /* ebtables expects 32 bytes long names but xt_match names are 29 bytes long. Copy 29 bytes and fill remaining bytes with zeroes. */ - strncpy(name, m->u.match->name, sizeof(name)); + strlcpy(name, m->u.match->name, sizeof(name)); if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; return 0; @@ -1351,7 +1351,7 @@ static inline int ebt_make_watchername(const struct ebt_entry_watcher *w, char __user *hlp = ubase + ((char *)w - base); char name[EBT_FUNCTION_MAXNAMELEN] = {}; - strncpy(name, w->u.watcher->name, sizeof(name)); + strlcpy(name, w->u.watcher->name, sizeof(name)); if (copy_to_user(hlp , name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; return 0; @@ -1377,7 +1377,7 @@ ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase) ret = EBT_WATCHER_ITERATE(e, ebt_make_watchername, base, ubase); if (ret != 0) return ret; - strncpy(name, t->u.target->name, sizeof(name)); + strlcpy(name, t->u.target->name, sizeof(name)); if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN)) return -EFAULT; return 0; diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 1f9ece1a9c34..4dca159435cf 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -352,9 +352,9 @@ EXPORT_SYMBOL(caif_enroll_dev); /* notify Caif of device events */ static int caif_device_notify(struct notifier_block *me, unsigned long what, - void *arg) + void *ptr) { - struct net_device *dev = arg; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct caif_device_entry *caifd = NULL; struct caif_dev_common *caifdev; struct cfcnfg *cfg; diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index 942e00a425fd..75ed04b78fa4 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -121,9 +121,9 @@ static struct packet_type caif_usb_type __read_mostly = { }; static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, - void *arg) + void *ptr) { - struct net_device *dev = arg; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct caif_dev_common common; struct cflayer *layer, *link_support; struct usbnet *usbnet; diff --git a/net/can/af_can.c b/net/can/af_can.c index c4e50852c9f4..3ab8dd2e1282 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -794,9 +794,9 @@ EXPORT_SYMBOL(can_proto_unregister); * af_can notifier to create/remove CAN netdevice specific structs */ static int can_notifier(struct notifier_block *nb, unsigned long msg, - void *data) + void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct dev_rcv_lists *d; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/can/bcm.c b/net/can/bcm.c index 8f113e6ff327..46f20bfafc0e 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1350,9 +1350,9 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock, * notification handler for netdevice status changes */ static int bcm_notifier(struct notifier_block *nb, unsigned long msg, - void *data) + void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct bcm_sock *bo = container_of(nb, struct bcm_sock, notifier); struct sock *sk = &bo->sk; struct bcm_op *op; diff --git a/net/can/gw.c b/net/can/gw.c index 3ee690e8c7d3..2f291f961a17 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -445,9 +445,9 @@ static inline void cgw_unregister_filter(struct cgw_job *gwj) } static int cgw_notifier(struct notifier_block *nb, - unsigned long msg, void *data) + unsigned long msg, void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/can/raw.c b/net/can/raw.c index 1085e65f848e..641e1c895123 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -239,9 +239,9 @@ static int raw_enable_allfilters(struct net_device *dev, struct sock *sk) } static int raw_notifier(struct notifier_block *nb, - unsigned long msg, void *data) + unsigned long msg, void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct raw_sock *ro = container_of(nb, struct raw_sock, notifier); struct sock *sk = &ro->sk; diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c index 925ca583c09c..8c93fa8d81bc 100644 --- a/net/ceph/auth_none.c +++ b/net/ceph/auth_none.c @@ -39,6 +39,11 @@ static int should_authenticate(struct ceph_auth_client *ac) return xi->starting; } +static int build_request(struct ceph_auth_client *ac, void *buf, void *end) +{ + return 0; +} + /* * the generic auth code decode the global_id, and we carry no actual * authenticate state, so nothing happens here. @@ -106,6 +111,7 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = { .destroy = destroy, .is_authenticated = is_authenticated, .should_authenticate = should_authenticate, + .build_request = build_request, .handle_reply = handle_reply, .create_authorizer = ceph_auth_none_create_authorizer, .destroy_authorizer = ceph_auth_none_destroy_authorizer, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 3a246a6cab47..dd47889adc4a 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -733,12 +733,14 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, object_size = le32_to_cpu(layout->fl_object_size); object_base = off - objoff; - if (truncate_size <= object_base) { - truncate_size = 0; - } else { - truncate_size -= object_base; - if (truncate_size > object_size) - truncate_size = object_size; + if (!(truncate_seq == 1 && truncate_size == -1ULL)) { + if (truncate_size <= object_base) { + truncate_size = 0; + } else { + truncate_size -= object_base; + if (truncate_size > object_size) + truncate_size = object_size; + } } osd_req_op_extent_init(req, 0, opcode, objoff, objlen, @@ -1174,6 +1176,7 @@ static void __register_linger_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req) { dout("__register_linger_request %p\n", req); + ceph_osdc_get_request(req); list_add_tail(&req->r_linger_item, &osdc->req_linger); if (req->r_osd) list_add_tail(&req->r_linger_osd, @@ -1196,6 +1199,7 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, if (list_empty(&req->r_osd_item)) req->r_osd = NULL; } + ceph_osdc_put_request(req); } void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc, @@ -1203,9 +1207,8 @@ void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc, { mutex_lock(&osdc->request_mutex); if (req->r_linger) { - __unregister_linger_request(osdc, req); req->r_linger = 0; - ceph_osdc_put_request(req); + __unregister_linger_request(osdc, req); } mutex_unlock(&osdc->request_mutex); } @@ -1217,11 +1220,6 @@ void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, if (!req->r_linger) { dout("set_request_linger %p\n", req); req->r_linger = 1; - /* - * caller is now responsible for calling - * unregister_linger_request - */ - ceph_osdc_get_request(req); } } EXPORT_SYMBOL(ceph_osdc_set_request_linger); @@ -1339,10 +1337,6 @@ static void __send_request(struct ceph_osd_client *osdc, ceph_msg_get(req->r_request); /* send consumes a ref */ - /* Mark the request unsafe if this is the first timet's being sent. */ - - if (!req->r_sent && req->r_unsafe_callback) - req->r_unsafe_callback(req, true); req->r_sent = req->r_osd->o_incarnation; ceph_con_send(&req->r_osd->o_con, req->r_request); @@ -1433,8 +1427,6 @@ static void handle_osds_timeout(struct work_struct *work) static void complete_request(struct ceph_osd_request *req) { - if (req->r_unsafe_callback) - req->r_unsafe_callback(req, false); complete_all(&req->r_safe_completion); /* fsync waiter */ } @@ -1526,6 +1518,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, for (i = 0; i < numops; i++) req->r_reply_op_result[i] = ceph_decode_32(&p); + already_completed = req->r_got_reply; + if (!req->r_got_reply) { req->r_result = result; @@ -1556,19 +1550,23 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, ((flags & CEPH_OSD_FLAG_WRITE) == 0)) __unregister_request(osdc, req); - already_completed = req->r_completed; - req->r_completed = 1; mutex_unlock(&osdc->request_mutex); - if (already_completed) - goto done; - if (req->r_callback) - req->r_callback(req, msg); - else - complete_all(&req->r_completion); + if (!already_completed) { + if (req->r_unsafe_callback && + result >= 0 && !(flags & CEPH_OSD_FLAG_ONDISK)) + req->r_unsafe_callback(req, true); + if (req->r_callback) + req->r_callback(req, msg); + else + complete_all(&req->r_completion); + } - if (flags & CEPH_OSD_FLAG_ONDISK) + if (flags & CEPH_OSD_FLAG_ONDISK) { + if (req->r_unsafe_callback && already_completed) + req->r_unsafe_callback(req, false); complete_request(req); + } done: dout("req=%p req->r_linger=%d\n", req, req->r_linger); @@ -1633,8 +1631,10 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) dout("%p tid %llu restart on osd%d\n", req, req->r_tid, req->r_osd ? req->r_osd->o_osd : -1); + ceph_osdc_get_request(req); __unregister_request(osdc, req); __register_linger_request(osdc, req); + ceph_osdc_put_request(req); continue; } @@ -2123,7 +2123,6 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, __register_request(osdc, req); req->r_sent = 0; req->r_got_reply = 0; - req->r_completed = 0; rc = __map_request(osdc, req, 0); if (rc < 0) { if (nofail) { @@ -2456,8 +2455,10 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, ceph_msg_revoke_incoming(req->r_reply); if (front > req->r_reply->front.iov_len) { - pr_warning("get_reply front %d > preallocated %d\n", - front, (int)req->r_reply->front.iov_len); + pr_warning("get_reply front %d > preallocated %d (%u#%llu)\n", + front, (int)req->r_reply->front.iov_len, + (unsigned int)con->peer_name.type, + le64_to_cpu(con->peer_name.num)); m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS, false); if (!m) goto out; diff --git a/net/core/datagram.c b/net/core/datagram.c index b71423db7785..8ab48cd89559 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -56,6 +56,7 @@ #include <net/sock.h> #include <net/tcp_states.h> #include <trace/events/skb.h> +#include <net/busy_poll.h> /* * Is a socket 'connection oriented' ? @@ -207,6 +208,10 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags, } spin_unlock_irqrestore(&queue->lock, cpu_flags); + if (sk_can_busy_loop(sk) && + sk_busy_loop(sk, flags & MSG_DONTWAIT)) + continue; + /* User doesn't want to wait */ error = -EAGAIN; if (!timeo) diff --git a/net/core/dev.c b/net/core/dev.c index faebb398fb46..26755dd40daa 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -129,6 +129,8 @@ #include <linux/inetdevice.h> #include <linux/cpu_rmap.h> #include <linux/static_key.h> +#include <linux/hashtable.h> +#include <linux/vmalloc.h> #include "net-sysfs.h" @@ -166,6 +168,12 @@ static struct list_head offload_base __read_mostly; DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base_lock); +/* protects napi_hash addition/deletion and napi_gen_id */ +static DEFINE_SPINLOCK(napi_hash_lock); + +static unsigned int napi_gen_id; +static DEFINE_HASHTABLE(napi_hash, 8); + seqcount_t devnet_rename_seq; static inline void dev_base_seq_inc(struct net *net) @@ -1232,9 +1240,7 @@ static int __dev_open(struct net_device *dev) * If we don't do this there is a chance ndo_poll_controller * or ndo_poll may be running while we open the device */ - ret = netpoll_rx_disable(dev); - if (ret) - return ret; + netpoll_rx_disable(dev); ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); ret = notifier_to_errno(ret); @@ -1343,9 +1349,7 @@ static int __dev_close(struct net_device *dev) LIST_HEAD(single); /* Temporarily disable netpoll until the interface is down */ - retval = netpoll_rx_disable(dev); - if (retval) - return retval; + netpoll_rx_disable(dev); list_add(&dev->unreg_list, &single); retval = __dev_close_many(&single); @@ -1387,14 +1391,11 @@ static int dev_close_many(struct list_head *head) */ int dev_close(struct net_device *dev) { - int ret = 0; if (dev->flags & IFF_UP) { LIST_HEAD(single); /* Block netpoll rx while the interface is going down */ - ret = netpoll_rx_disable(dev); - if (ret) - return ret; + netpoll_rx_disable(dev); list_add(&dev->unreg_list, &single); dev_close_many(&single); @@ -1402,7 +1403,7 @@ int dev_close(struct net_device *dev) netpoll_rx_enable(dev); } - return ret; + return 0; } EXPORT_SYMBOL(dev_close); @@ -1432,6 +1433,14 @@ void dev_disable_lro(struct net_device *dev) } EXPORT_SYMBOL(dev_disable_lro); +static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val, + struct net_device *dev) +{ + struct netdev_notifier_info info; + + netdev_notifier_info_init(&info, dev); + return nb->notifier_call(nb, val, &info); +} static int dev_boot_phase = 1; @@ -1464,7 +1473,7 @@ int register_netdevice_notifier(struct notifier_block *nb) goto unlock; for_each_net(net) { for_each_netdev(net, dev) { - err = nb->notifier_call(nb, NETDEV_REGISTER, dev); + err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev); err = notifier_to_errno(err); if (err) goto rollback; @@ -1472,7 +1481,7 @@ int register_netdevice_notifier(struct notifier_block *nb) if (!(dev->flags & IFF_UP)) continue; - nb->notifier_call(nb, NETDEV_UP, dev); + call_netdevice_notifier(nb, NETDEV_UP, dev); } } @@ -1488,10 +1497,11 @@ rollback: goto outroll; if (dev->flags & IFF_UP) { - nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); - nb->notifier_call(nb, NETDEV_DOWN, dev); + call_netdevice_notifier(nb, NETDEV_GOING_DOWN, + dev); + call_netdevice_notifier(nb, NETDEV_DOWN, dev); } - nb->notifier_call(nb, NETDEV_UNREGISTER, dev); + call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); } } @@ -1529,10 +1539,11 @@ int unregister_netdevice_notifier(struct notifier_block *nb) for_each_net(net) { for_each_netdev(net, dev) { if (dev->flags & IFF_UP) { - nb->notifier_call(nb, NETDEV_GOING_DOWN, dev); - nb->notifier_call(nb, NETDEV_DOWN, dev); + call_netdevice_notifier(nb, NETDEV_GOING_DOWN, + dev); + call_netdevice_notifier(nb, NETDEV_DOWN, dev); } - nb->notifier_call(nb, NETDEV_UNREGISTER, dev); + call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev); } } unlock: @@ -1542,6 +1553,25 @@ unlock: EXPORT_SYMBOL(unregister_netdevice_notifier); /** + * call_netdevice_notifiers_info - call all network notifier blocks + * @val: value passed unmodified to notifier function + * @dev: net_device pointer passed unmodified to notifier function + * @info: notifier information data + * + * Call all network notifier blocks. Parameters and return value + * are as for raw_notifier_call_chain(). + */ + +int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev, + struct netdev_notifier_info *info) +{ + ASSERT_RTNL(); + netdev_notifier_info_init(info, dev); + return raw_notifier_call_chain(&netdev_chain, val, info); +} +EXPORT_SYMBOL(call_netdevice_notifiers_info); + +/** * call_netdevice_notifiers - call all network notifier blocks * @val: value passed unmodified to notifier function * @dev: net_device pointer passed unmodified to notifier function @@ -1552,8 +1582,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); int call_netdevice_notifiers(unsigned long val, struct net_device *dev) { - ASSERT_RTNL(); - return raw_notifier_call_chain(&netdev_chain, val, dev); + struct netdev_notifier_info info; + + return call_netdevice_notifiers_info(val, dev, &info); } EXPORT_SYMBOL(call_netdevice_notifiers); @@ -1655,23 +1686,19 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb) } } - skb_orphan(skb); - if (unlikely(!is_skb_forwardable(dev, skb))) { atomic_long_inc(&dev->rx_dropped); kfree_skb(skb); return NET_RX_DROP; } - skb->skb_iif = 0; - skb->dev = dev; - skb_dst_drop(skb); - skb->tstamp.tv64 = 0; - skb->pkt_type = PACKET_HOST; + skb_scrub_packet(skb); skb->protocol = eth_type_trans(skb, dev); - skb->mark = 0; - secpath_reset(skb); - nf_reset(skb); - nf_reset_trace(skb); + + /* eth_type_trans() can set pkt_type. + * clear pkt_type _after_ calling eth_type_trans() + */ + skb->pkt_type = PACKET_HOST; + return netif_rx(skb); } EXPORT_SYMBOL_GPL(dev_forward_skb); @@ -1736,7 +1763,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) skb_reset_mac_header(skb2); if (skb_network_header(skb2) < skb2->data || - skb2->network_header > skb2->tail) { + skb_network_header(skb2) > skb_tail_pointer(skb2)) { net_crit_ratelimited("protocol %04x is buggy, dev %s\n", ntohs(skb2->protocol), dev->name); @@ -2454,10 +2481,10 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) } static netdev_features_t harmonize_features(struct sk_buff *skb, - __be16 protocol, netdev_features_t features) + netdev_features_t features) { if (skb->ip_summed != CHECKSUM_NONE && - !can_checksum_protocol(features, protocol)) { + !can_checksum_protocol(features, skb_network_protocol(skb))) { features &= ~NETIF_F_ALL_CSUM; } else if (illegal_highdma(skb->dev, skb)) { features &= ~NETIF_F_SG; @@ -2478,20 +2505,18 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } else if (!vlan_tx_tag_present(skb)) { - return harmonize_features(skb, protocol, features); + return harmonize_features(skb, features); } features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); - if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) { - return harmonize_features(skb, protocol, features); - } else { + if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; - return harmonize_features(skb, protocol, features); - } + + return harmonize_features(skb, features); } EXPORT_SYMBOL(netif_skb_features); @@ -3099,6 +3124,46 @@ static int rps_ipi_queued(struct softnet_data *sd) return 0; } +#ifdef CONFIG_NET_FLOW_LIMIT +int netdev_flow_limit_table_len __read_mostly = (1 << 12); +#endif + +static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen) +{ +#ifdef CONFIG_NET_FLOW_LIMIT + struct sd_flow_limit *fl; + struct softnet_data *sd; + unsigned int old_flow, new_flow; + + if (qlen < (netdev_max_backlog >> 1)) + return false; + + sd = &__get_cpu_var(softnet_data); + + rcu_read_lock(); + fl = rcu_dereference(sd->flow_limit); + if (fl) { + new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1); + old_flow = fl->history[fl->history_head]; + fl->history[fl->history_head] = new_flow; + + fl->history_head++; + fl->history_head &= FLOW_LIMIT_HISTORY - 1; + + if (likely(fl->buckets[old_flow])) + fl->buckets[old_flow]--; + + if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) { + fl->count++; + rcu_read_unlock(); + return true; + } + } + rcu_read_unlock(); +#endif + return false; +} + /* * enqueue_to_backlog is called to queue an skb to a per CPU backlog * queue (may be a remote CPU queue). @@ -3108,13 +3173,15 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu, { struct softnet_data *sd; unsigned long flags; + unsigned int qlen; sd = &per_cpu(softnet_data, cpu); local_irq_save(flags); rps_lock(sd); - if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) { + qlen = skb_queue_len(&sd->input_pkt_queue); + if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) { if (skb_queue_len(&sd->input_pkt_queue)) { enqueue: __skb_queue_tail(&sd->input_pkt_queue, skb); @@ -3513,8 +3580,15 @@ ncls: } } - if (vlan_tx_nonzero_tag_present(skb)) - skb->pkt_type = PACKET_OTHERHOST; + if (unlikely(vlan_tx_tag_present(skb))) { + if (vlan_tx_tag_get_id(skb)) + skb->pkt_type = PACKET_OTHERHOST; + /* Note: we might in the future use prio bits + * and set skb->priority like in vlan_do_receive() + * For the time being, just ignore Priority Code Point + */ + skb->vlan_tci = 0; + } /* deliver only exact match when indicated */ null_or_dev = deliver_exact ? skb->dev : NULL; @@ -3862,7 +3936,7 @@ static void skb_gro_reset_offset(struct sk_buff *skb) NAPI_GRO_CB(skb)->frag0 = NULL; NAPI_GRO_CB(skb)->frag0_len = 0; - if (skb->mac_header == skb->tail && + if (skb_mac_header(skb) == skb_tail_pointer(skb) && pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); @@ -4106,6 +4180,58 @@ void napi_complete(struct napi_struct *n) } EXPORT_SYMBOL(napi_complete); +/* must be called under rcu_read_lock(), as we dont take a reference */ +struct napi_struct *napi_by_id(unsigned int napi_id) +{ + unsigned int hash = napi_id % HASH_SIZE(napi_hash); + struct napi_struct *napi; + + hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node) + if (napi->napi_id == napi_id) + return napi; + + return NULL; +} +EXPORT_SYMBOL_GPL(napi_by_id); + +void napi_hash_add(struct napi_struct *napi) +{ + if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) { + + spin_lock(&napi_hash_lock); + + /* 0 is not a valid id, we also skip an id that is taken + * we expect both events to be extremely rare + */ + napi->napi_id = 0; + while (!napi->napi_id) { + napi->napi_id = ++napi_gen_id; + if (napi_by_id(napi->napi_id)) + napi->napi_id = 0; + } + + hlist_add_head_rcu(&napi->napi_hash_node, + &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]); + + spin_unlock(&napi_hash_lock); + } +} +EXPORT_SYMBOL_GPL(napi_hash_add); + +/* Warning : caller is responsible to make sure rcu grace period + * is respected before freeing memory containing @napi + */ +void napi_hash_del(struct napi_struct *napi) +{ + spin_lock(&napi_hash_lock); + + if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) + hlist_del_rcu(&napi->napi_hash_node); + + spin_unlock(&napi_hash_lock); +} +EXPORT_SYMBOL_GPL(napi_hash_del); + void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { @@ -4404,7 +4530,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, else list_add_tail_rcu(&upper->list, &dev->upper_dev_list); dev_hold(upper_dev); - + call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); return 0; } @@ -4464,6 +4590,7 @@ void netdev_upper_dev_unlink(struct net_device *dev, list_del_rcu(&upper->list); dev_put(upper_dev); kfree_rcu(upper, rcu); + call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev); } EXPORT_SYMBOL(netdev_upper_dev_unlink); @@ -4734,8 +4861,13 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags) } if (dev->flags & IFF_UP && - (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) - call_netdevice_notifiers(NETDEV_CHANGE, dev); + (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) { + struct netdev_notifier_change_info change_info; + + change_info.flags_changed = changes; + call_netdevice_notifiers_info(NETDEV_CHANGE, dev, + &change_info.info); + } } /** @@ -5158,17 +5290,28 @@ static void netdev_init_one_queue(struct net_device *dev, #endif } +static void netif_free_tx_queues(struct net_device *dev) +{ + if (is_vmalloc_addr(dev->_tx)) + vfree(dev->_tx); + else + kfree(dev->_tx); +} + static int netif_alloc_netdev_queues(struct net_device *dev) { unsigned int count = dev->num_tx_queues; struct netdev_queue *tx; + size_t sz = count * sizeof(*tx); - BUG_ON(count < 1); - - tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL); - if (!tx) - return -ENOMEM; + BUG_ON(count < 1 || count > 0xffff); + tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + if (!tx) { + tx = vzalloc(sz); + if (!tx) + return -ENOMEM; + } dev->_tx = tx; netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); @@ -5269,6 +5412,10 @@ int register_netdevice(struct net_device *dev) */ dev->hw_enc_features |= NETIF_F_SG; + /* Make NETIF_F_SG inheritable to MPLS. + */ + dev->mpls_features |= NETIF_F_SG; + ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); ret = notifier_to_errno(ret); if (ret) @@ -5712,7 +5859,7 @@ free_all: free_pcpu: free_percpu(dev->pcpu_refcnt); - kfree(dev->_tx); + netif_free_tx_queues(dev); #ifdef CONFIG_RPS kfree(dev->_rx); #endif @@ -5737,7 +5884,7 @@ void free_netdev(struct net_device *dev) release_net(dev_net(dev)); - kfree(dev->_tx); + netif_free_tx_queues(dev); #ifdef CONFIG_RPS kfree(dev->_rx); #endif @@ -6048,7 +6195,7 @@ netdev_features_t netdev_increment_features(netdev_features_t all, } EXPORT_SYMBOL(netdev_increment_features); -static struct hlist_head *netdev_create_hash(void) +static struct hlist_head * __net_init netdev_create_hash(void) { int i; struct hlist_head *hash; @@ -6304,6 +6451,10 @@ static int __init net_dev_init(void) sd->backlog.weight = weight_p; sd->backlog.gro_list = NULL; sd->backlog.gro_count = 0; + +#ifdef CONFIG_NET_FLOW_LIMIT + sd->flow_limit = NULL; +#endif } dev_boot_phase = 0; diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index d23b6682f4e9..5e78d44333b9 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -295,9 +295,9 @@ static int net_dm_cmd_trace(struct sk_buff *skb, } static int dropmon_net_event(struct notifier_block *ev_block, - unsigned long event, void *ptr) + unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct dm_hw_stat_delta *new_stat = NULL; struct dm_hw_stat_delta *tmp; diff --git a/net/core/dst.c b/net/core/dst.c index df9cc810ec8e..ca4231ec7347 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -372,7 +372,7 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct dst_entry *dst, *last = NULL; switch (event) { diff --git a/net/core/ethtool.c b/net/core/ethtool.c index ce91766eeca9..78e9d9223e40 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -82,6 +82,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", + [NETIF_F_GSO_MPLS_BIT] = "tx-mpls-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", @@ -278,11 +279,16 @@ static u32 __ethtool_get_flags(struct net_device *dev) { u32 flags = 0; - if (dev->features & NETIF_F_LRO) flags |= ETH_FLAG_LRO; - if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) flags |= ETH_FLAG_RXVLAN; - if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) flags |= ETH_FLAG_TXVLAN; - if (dev->features & NETIF_F_NTUPLE) flags |= ETH_FLAG_NTUPLE; - if (dev->features & NETIF_F_RXHASH) flags |= ETH_FLAG_RXHASH; + if (dev->features & NETIF_F_LRO) + flags |= ETH_FLAG_LRO; + if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) + flags |= ETH_FLAG_RXVLAN; + if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) + flags |= ETH_FLAG_TXVLAN; + if (dev->features & NETIF_F_NTUPLE) + flags |= ETH_FLAG_NTUPLE; + if (dev->features & NETIF_F_RXHASH) + flags |= ETH_FLAG_RXHASH; return flags; } @@ -294,11 +300,16 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data) if (data & ~ETH_ALL_FLAGS) return -EINVAL; - if (data & ETH_FLAG_LRO) features |= NETIF_F_LRO; - if (data & ETH_FLAG_RXVLAN) features |= NETIF_F_HW_VLAN_CTAG_RX; - if (data & ETH_FLAG_TXVLAN) features |= NETIF_F_HW_VLAN_CTAG_TX; - if (data & ETH_FLAG_NTUPLE) features |= NETIF_F_NTUPLE; - if (data & ETH_FLAG_RXHASH) features |= NETIF_F_RXHASH; + if (data & ETH_FLAG_LRO) + features |= NETIF_F_LRO; + if (data & ETH_FLAG_RXVLAN) + features |= NETIF_F_HW_VLAN_CTAG_RX; + if (data & ETH_FLAG_TXVLAN) + features |= NETIF_F_HW_VLAN_CTAG_TX; + if (data & ETH_FLAG_NTUPLE) + features |= NETIF_F_NTUPLE; + if (data & ETH_FLAG_RXHASH) + features |= NETIF_F_RXHASH; /* allow changing only bits set in hw_features */ changed = (features ^ dev->features) & ETH_ALL_FEATURES; @@ -1319,10 +1330,19 @@ static int ethtool_get_dump_data(struct net_device *dev, if (ret) return ret; - len = (tmp.len > dump.len) ? dump.len : tmp.len; + len = min(tmp.len, dump.len); if (!len) return -EFAULT; + /* Don't ever let the driver think there's more space available + * than it requested with .get_dump_flag(). + */ + dump.len = len; + + /* Always allocate enough space to hold the whole thing so that the + * driver does not need to check the length and bother with partial + * dumping. + */ data = vzalloc(tmp.len); if (!data) return -ENOMEM; @@ -1330,6 +1350,16 @@ static int ethtool_get_dump_data(struct net_device *dev, if (ret) goto out; + /* There are two sane possibilities: + * 1. The driver's .get_dump_data() does not touch dump.len. + * 2. Or it may set dump.len to how much it really writes, which + * should be tmp.len (or len if it can do a partial dump). + * In any case respond to userspace with the actual length of data + * it's receiving. + */ + WARN_ON(dump.len != len && dump.len != tmp.len); + dump.len = len; + if (copy_to_user(useraddr, &dump, sizeof(dump))) { ret = -EFAULT; goto out; @@ -1413,7 +1443,7 @@ static int ethtool_get_module_eeprom(struct net_device *dev, modinfo.eeprom_len); } -/* The main entry point in this file. Called from net/core/dev.c */ +/* The main entry point in this file. Called from net/core/dev_ioctl.c */ int dev_ethtool(struct net *net, struct ifreq *ifr) { diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index d5a9f8ead0d8..21735440c44a 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -705,9 +705,9 @@ static void detach_rules(struct list_head *rules, struct net_device *dev) static int fib_rules_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct fib_rules_ops *ops; diff --git a/net/core/flow.c b/net/core/flow.c index 7102f166482d..dfa602ceb8cd 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -403,7 +403,7 @@ void flow_cache_flush_deferred(void) schedule_work(&flow_cache_flush_work); } -static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) +static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) { struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc); @@ -421,7 +421,7 @@ static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) return 0; } -static int __cpuinit flow_cache_cpu(struct notifier_block *nfb, +static int flow_cache_cpu(struct notifier_block *nfb, unsigned long action, void *hcpu) { diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index d9d198aa9fed..6b5b6e7013ca 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -82,7 +82,7 @@ struct gen_estimator { struct list_head list; struct gnet_stats_basic_packed *bstats; - struct gnet_stats_rate_est *rate_est; + struct gnet_stats_rate_est64 *rate_est; spinlock_t *stats_lock; int ewma_log; u64 last_bytes; @@ -167,7 +167,7 @@ static void gen_add_node(struct gen_estimator *est) static struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats, - const struct gnet_stats_rate_est *rate_est) + const struct gnet_stats_rate_est64 *rate_est) { struct rb_node *p = est_root.rb_node; @@ -203,7 +203,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats * */ int gen_new_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est *rate_est, + struct gnet_stats_rate_est64 *rate_est, spinlock_t *stats_lock, struct nlattr *opt) { @@ -258,7 +258,7 @@ EXPORT_SYMBOL(gen_new_estimator); * Note : Caller should respect an RCU grace period before freeing stats_lock */ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est *rate_est) + struct gnet_stats_rate_est64 *rate_est) { struct gen_estimator *e; @@ -290,7 +290,7 @@ EXPORT_SYMBOL(gen_kill_estimator); * Returns 0 on success or a negative error code. */ int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, - struct gnet_stats_rate_est *rate_est, + struct gnet_stats_rate_est64 *rate_est, spinlock_t *stats_lock, struct nlattr *opt) { gen_kill_estimator(bstats, rate_est); @@ -306,7 +306,7 @@ EXPORT_SYMBOL(gen_replace_estimator); * Returns true if estimator is active, and false if not. */ bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, - const struct gnet_stats_rate_est *rate_est) + const struct gnet_stats_rate_est64 *rate_est) { bool res; diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index ddedf211e588..9d3d9e78397b 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -143,18 +143,30 @@ EXPORT_SYMBOL(gnet_stats_copy_basic); int gnet_stats_copy_rate_est(struct gnet_dump *d, const struct gnet_stats_basic_packed *b, - struct gnet_stats_rate_est *r) + struct gnet_stats_rate_est64 *r) { + struct gnet_stats_rate_est est; + int res; + if (b && !gen_estimator_active(b, r)) return 0; + est.bps = min_t(u64, UINT_MAX, r->bps); + /* we have some time before reaching 2^32 packets per second */ + est.pps = r->pps; + if (d->compat_tc_stats) { - d->tc_stats.bps = r->bps; - d->tc_stats.pps = r->pps; + d->tc_stats.bps = est.bps; + d->tc_stats.pps = est.pps; } - if (d->tail) - return gnet_stats_copy(d, TCA_STATS_RATE_EST, r, sizeof(*r)); + if (d->tail) { + res = gnet_stats_copy(d, TCA_STATS_RATE_EST, &est, sizeof(est)); + if (res < 0 || est.bps == r->bps) + return res; + /* emit 64bit stats only if needed */ + return gnet_stats_copy(d, TCA_STATS_RATE_EST64, r, sizeof(*r)); + } return 0; } diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 8f82a5cc3851..9c3a839322ba 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -92,6 +92,9 @@ static bool linkwatch_urgent_event(struct net_device *dev) if (dev->ifindex != dev->iflink) return true; + if (dev->priv_flags & IFF_TEAM_PORT) + return true; + return netif_carrier_ok(dev) && qdisc_tx_changing(dev); } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5c56b217b999..9232c68941ab 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -231,7 +231,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) we must kill timers etc. and move it to safe state. */ - skb_queue_purge(&n->arp_queue); + __skb_queue_purge(&n->arp_queue); n->arp_queue_len_bytes = 0; n->output = neigh_blackhole; if (n->nud_state & NUD_VALID) @@ -286,7 +286,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device if (!n) goto out_entries; - skb_queue_head_init(&n->arp_queue); + __skb_queue_head_init(&n->arp_queue); rwlock_init(&n->lock); seqlock_init(&n->ha_lock); n->updated = n->used = now; @@ -708,7 +708,9 @@ void neigh_destroy(struct neighbour *neigh) if (neigh_del_timer(neigh)) pr_warn("Impossible event\n"); - skb_queue_purge(&neigh->arp_queue); + write_lock_bh(&neigh->lock); + __skb_queue_purge(&neigh->arp_queue); + write_unlock_bh(&neigh->lock); neigh->arp_queue_len_bytes = 0; if (dev->netdev_ops->ndo_neigh_destroy) @@ -858,7 +860,7 @@ static void neigh_invalidate(struct neighbour *neigh) neigh->ops->error_report(neigh, skb); write_lock(&neigh->lock); } - skb_queue_purge(&neigh->arp_queue); + __skb_queue_purge(&neigh->arp_queue); neigh->arp_queue_len_bytes = 0; } @@ -1210,7 +1212,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, write_lock_bh(&neigh->lock); } - skb_queue_purge(&neigh->arp_queue); + __skb_queue_purge(&neigh->arp_queue); neigh->arp_queue_len_bytes = 0; } out: @@ -1419,7 +1421,7 @@ static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl, for (p = &tbl->parms; p; p = p->next) { if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) || - (!p->dev && !ifindex)) + (!p->dev && !ifindex && net_eq(net, &init_net))) return p; } @@ -1429,15 +1431,11 @@ static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl, struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl) { - struct neigh_parms *p, *ref; + struct neigh_parms *p; struct net *net = dev_net(dev); const struct net_device_ops *ops = dev->netdev_ops; - ref = lookup_neigh_parms(tbl, net, 0); - if (!ref) - return NULL; - - p = kmemdup(ref, sizeof(*p), GFP_KERNEL); + p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); if (p) { p->tbl = tbl; atomic_set(&p->refcnt, 1); @@ -2053,6 +2051,12 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) } } + err = -ENOENT; + if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] || + tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) && + !net_eq(net, &init_net)) + goto errout_tbl_lock; + if (tb[NDTA_THRESH1]) tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); @@ -2763,13 +2767,14 @@ EXPORT_SYMBOL(neigh_app_ns); #ifdef CONFIG_SYSCTL static int zero; +static int int_max = INT_MAX; static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); -static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer, - size_t *lenp, loff_t *ppos) +static int proc_unres_qlen(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) { int size, ret; - ctl_table tmp = *ctl; + struct ctl_table tmp = *ctl; tmp.extra1 = &zero; tmp.extra2 = &unres_qlen_max; @@ -2815,19 +2820,25 @@ static struct neigh_sysctl_table { .procname = "mcast_solicit", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_UCAST_PROBE] = { .procname = "ucast_solicit", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_APP_PROBE] = { .procname = "app_solicit", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_RETRANS_TIME] = { .procname = "retrans_time", @@ -2870,7 +2881,9 @@ static struct neigh_sysctl_table { .procname = "proxy_qlen", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_ANYCAST_DELAY] = { .procname = "anycast_delay", @@ -2912,19 +2925,25 @@ static struct neigh_sysctl_table { .procname = "gc_thresh1", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_GC_THRESH2] = { .procname = "gc_thresh2", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_GC_THRESH3] = { .procname = "gc_thresh3", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .extra2 = &int_max, + .proc_handler = proc_dointvec_minmax, }, {}, }, diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 569d355fec3e..2bf83299600a 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -146,11 +146,23 @@ static void softnet_seq_stop(struct seq_file *seq, void *v) static int softnet_seq_show(struct seq_file *seq, void *v) { struct softnet_data *sd = v; + unsigned int flow_limit_count = 0; - seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", +#ifdef CONFIG_NET_FLOW_LIMIT + struct sd_flow_limit *fl; + + rcu_read_lock(); + fl = rcu_dereference(sd->flow_limit); + if (fl) + flow_limit_count = fl->count; + rcu_read_unlock(); +#endif + + seq_printf(seq, + "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", sd->processed, sd->dropped, sd->time_squeeze, 0, 0, 0, 0, 0, /* was fastroute */ - sd->cpu_collision, sd->received_rps); + sd->cpu_collision, sd->received_rps, flow_limit_count); return 0; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 35a9f0804b6f..2c637e9a0b27 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -248,7 +248,7 @@ static void netpoll_poll_dev(struct net_device *dev) zap_completion_queue(); } -int netpoll_rx_disable(struct net_device *dev) +void netpoll_rx_disable(struct net_device *dev) { struct netpoll_info *ni; int idx; @@ -258,7 +258,6 @@ int netpoll_rx_disable(struct net_device *dev) if (ni) down(&ni->dev_lock); srcu_read_unlock(&netpoll_srcu, idx); - return 0; } EXPORT_SYMBOL(netpoll_rx_disable); @@ -691,25 +690,20 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo send_skb->dev = skb->dev; skb_reset_network_header(send_skb); - skb_put(send_skb, sizeof(struct ipv6hdr)); - hdr = ipv6_hdr(send_skb); - + hdr = (struct ipv6hdr *) skb_put(send_skb, sizeof(struct ipv6hdr)); *(__be32*)hdr = htonl(0x60000000); - hdr->payload_len = htons(size); hdr->nexthdr = IPPROTO_ICMPV6; hdr->hop_limit = 255; hdr->saddr = *saddr; hdr->daddr = *daddr; - send_skb->transport_header = send_skb->tail; - skb_put(send_skb, size); - - icmp6h = (struct icmp6hdr *)skb_transport_header(skb); + icmp6h = (struct icmp6hdr *) skb_put(send_skb, sizeof(struct icmp6hdr)); icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; icmp6h->icmp6_router = 0; icmp6h->icmp6_solicited = 1; - target = (struct in6_addr *)(skb_transport_header(send_skb) + sizeof(struct icmp6hdr)); + + target = (struct in6_addr *) skb_put(send_skb, sizeof(struct in6_addr)); *target = msg->target; icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size, IPPROTO_ICMPV6, diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 0777d0aa18c3..e533259dce3c 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -261,7 +261,7 @@ struct cgroup_subsys net_prio_subsys = { static int netprio_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netprio_map *old; /* diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 11f2704c3810..9640972ec50e 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1921,7 +1921,7 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d static int pktgen_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct pktgen_net *pn = net_generic(dev_net(dev), pg_net_id); if (pn->pktgen_exiting) @@ -2627,6 +2627,29 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, pgh->tv_usec = htonl(timestamp.tv_usec); } +static struct sk_buff *pktgen_alloc_skb(struct net_device *dev, + struct pktgen_dev *pkt_dev, + unsigned int extralen) +{ + struct sk_buff *skb = NULL; + unsigned int size = pkt_dev->cur_pkt_size + 64 + extralen + + pkt_dev->pkt_overhead; + + if (pkt_dev->flags & F_NODE) { + int node = pkt_dev->node >= 0 ? pkt_dev->node : numa_node_id(); + + skb = __alloc_skb(NET_SKB_PAD + size, GFP_NOWAIT, 0, node); + if (likely(skb)) { + skb_reserve(skb, NET_SKB_PAD); + skb->dev = dev; + } + } else { + skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT); + } + + return skb; +} + static struct sk_buff *fill_packet_ipv4(struct net_device *odev, struct pktgen_dev *pkt_dev) { @@ -2657,32 +2680,13 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, datalen = (odev->hard_header_len + 16) & ~0xf; - if (pkt_dev->flags & F_NODE) { - int node; - - if (pkt_dev->node >= 0) - node = pkt_dev->node; - else - node = numa_node_id(); - - skb = __alloc_skb(NET_SKB_PAD + pkt_dev->cur_pkt_size + 64 - + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT, 0, node); - if (likely(skb)) { - skb_reserve(skb, NET_SKB_PAD); - skb->dev = odev; - } - } - else - skb = __netdev_alloc_skb(odev, - pkt_dev->cur_pkt_size + 64 - + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT); - + skb = pktgen_alloc_skb(odev, pkt_dev, datalen); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; } - prefetchw(skb->data); + prefetchw(skb->data); skb_reserve(skb, datalen); /* Reserve for ethernet and IP header */ @@ -2708,15 +2712,15 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, *vlan_encapsulated_proto = htons(ETH_P_IP); } - skb->network_header = skb->tail; - skb->transport_header = skb->network_header + sizeof(struct iphdr); - skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); + skb_set_mac_header(skb, 0); + skb_set_network_header(skb, skb->len); + iph = (struct iphdr *) skb_put(skb, sizeof(struct iphdr)); + + skb_set_transport_header(skb, skb->len); + udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); skb->priority = pkt_dev->skb_priority; - iph = ip_hdr(skb); - udph = udp_hdr(skb); - memcpy(eth, pkt_dev->hh, 12); *(__be16 *) & eth[12] = protocol; @@ -2746,8 +2750,6 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, iph->check = 0; iph->check = ip_fast_csum((void *)iph, iph->ihl); skb->protocol = protocol; - skb->mac_header = (skb->network_header - ETH_HLEN - - pkt_dev->pkt_overhead); skb->dev = odev; skb->pkt_type = PACKET_HOST; pktgen_finalize_skb(pkt_dev, skb, datalen); @@ -2788,15 +2790,13 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, mod_cur_headers(pkt_dev); queue_map = pkt_dev->cur_queue_map; - skb = __netdev_alloc_skb(odev, - pkt_dev->cur_pkt_size + 64 - + 16 + pkt_dev->pkt_overhead, GFP_NOWAIT); + skb = pktgen_alloc_skb(odev, pkt_dev, 16); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; } - prefetchw(skb->data); + prefetchw(skb->data); skb_reserve(skb, 16); /* Reserve for ethernet and IP header */ @@ -2822,13 +2822,14 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, *vlan_encapsulated_proto = htons(ETH_P_IPV6); } - skb->network_header = skb->tail; - skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); - skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); + skb_set_mac_header(skb, 0); + skb_set_network_header(skb, skb->len); + iph = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr)); + + skb_set_transport_header(skb, skb->len); + udph = (struct udphdr *) skb_put(skb, sizeof(struct udphdr)); skb_set_queue_mapping(skb, queue_map); skb->priority = pkt_dev->skb_priority; - iph = ipv6_hdr(skb); - udph = udp_hdr(skb); memcpy(eth, pkt_dev->hh, 12); *(__be16 *) ð[12] = protocol; @@ -2863,8 +2864,6 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, iph->daddr = pkt_dev->cur_in6_daddr; iph->saddr = pkt_dev->cur_in6_saddr; - skb->mac_header = (skb->network_header - ETH_HLEN - - pkt_dev->pkt_overhead); skb->protocol = protocol; skb->dev = odev; skb->pkt_type = PACKET_HOST; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a08bd2b7fe3f..3de740834d1f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -947,6 +947,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct ifla_vf_vlan vf_vlan; struct ifla_vf_tx_rate vf_tx_rate; struct ifla_vf_spoofchk vf_spoofchk; + struct ifla_vf_link_state vf_linkstate; /* * Not all SR-IOV capable drivers support the @@ -956,18 +957,24 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, */ ivi.spoofchk = -1; memset(ivi.mac, 0, sizeof(ivi.mac)); + /* The default value for VF link state is "auto" + * IFLA_VF_LINK_STATE_AUTO which equals zero + */ + ivi.linkstate = 0; if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) break; vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = - vf_spoofchk.vf = ivi.vf; + vf_spoofchk.vf = + vf_linkstate.vf = ivi.vf; memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos; vf_tx_rate.rate = ivi.tx_rate; vf_spoofchk.setting = ivi.spoofchk; + vf_linkstate.link_state = ivi.linkstate; vf = nla_nest_start(skb, IFLA_VF_INFO); if (!vf) { nla_nest_cancel(skb, vfinfo); @@ -978,7 +985,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate) || nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), - &vf_spoofchk)) + &vf_spoofchk) || + nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate), + &vf_linkstate)) goto nla_put_failure; nla_nest_end(skb, vf); } @@ -1238,6 +1247,15 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) ivs->setting); break; } + case IFLA_VF_LINK_STATE: { + struct ifla_vf_link_state *ivl; + ivl = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_link_state) + err = ops->ndo_set_vf_link_state(dev, ivl->vf, + ivl->link_state); + break; + } default: err = -EINVAL; break; @@ -2091,10 +2109,6 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) } addr = nla_data(tb[NDA_LLADDR]); - if (is_zero_ether_addr(addr)) { - pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ether address\n"); - return -EINVAL; - } err = -EOPNOTSUPP; @@ -2192,10 +2206,6 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) } addr = nla_data(tb[NDA_LLADDR]); - if (is_zero_ether_addr(addr)) { - pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n"); - return -EINVAL; - } err = -EOPNOTSUPP; @@ -2667,7 +2677,7 @@ static void rtnetlink_rcv(struct sk_buff *skb) static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_UP: diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1c1738cc4538..2c3d0f53d198 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -199,9 +199,7 @@ struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node) skb->truesize = sizeof(struct sk_buff); atomic_set(&skb->users, 1); -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->mac_header = ~0U; -#endif + skb->mac_header = (typeof(skb->mac_header))~0U; out: return skb; } @@ -275,10 +273,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, skb->data = data; skb_reset_tail_pointer(skb); skb->end = skb->tail + size; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->mac_header = ~0U; - skb->transport_header = ~0U; -#endif + skb->mac_header = (typeof(skb->mac_header))~0U; + skb->transport_header = (typeof(skb->transport_header))~0U; /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); @@ -313,7 +309,8 @@ EXPORT_SYMBOL(__alloc_skb); * @frag_size: size of fragment, or 0 if head was kmalloced * * Allocate a new &sk_buff. Caller provides space holding head and - * skb_shared_info. @data must have been allocated by kmalloc() + * skb_shared_info. @data must have been allocated by kmalloc() only if + * @frag_size is 0, otherwise data should come from the page allocator. * The return is the new skb buffer. * On a failure the return is %NULL, and @data is not freed. * Notes : @@ -344,10 +341,8 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) skb->data = data; skb_reset_tail_pointer(skb); skb->end = skb->tail + size; -#ifdef NET_SKBUFF_DATA_USES_OFFSET - skb->mac_header = ~0U; - skb->transport_header = ~0U; -#endif + skb->mac_header = (typeof(skb->mac_header))~0U; + skb->transport_header = (typeof(skb->transport_header))~0U; /* make sure we initialize shinfo sequentially */ shinfo = skb_shinfo(skb); @@ -703,6 +698,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->transport_header = old->transport_header; new->network_header = old->network_header; new->mac_header = old->mac_header; + new->inner_protocol = old->inner_protocol; new->inner_transport_header = old->inner_transport_header; new->inner_network_header = old->inner_network_header; new->inner_mac_header = old->inner_mac_header; @@ -743,6 +739,10 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->vlan_tci = old->vlan_tci; skb_copy_secmark(new, old); + +#ifdef CONFIG_NET_RX_BUSY_POLL + new->napi_id = old->napi_id; +#endif } /* @@ -825,7 +825,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) page = alloc_page(gfp_mask); if (!page) { while (head) { - struct page *next = (struct page *)head->private; + struct page *next = (struct page *)page_private(head); put_page(head); head = next; } @@ -835,7 +835,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) memcpy(page_address(page), vaddr + f->page_offset, skb_frag_size(f)); kunmap_atomic(vaddr); - page->private = (unsigned long)head; + set_page_private(page, (unsigned long)head); head = page; } @@ -849,7 +849,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) for (i = num_frags - 1; i >= 0; i--) { __skb_fill_page_desc(skb, i, head, 0, skb_shinfo(skb)->frags[i].size); - head = (struct page *)head->private; + head = (struct page *)page_private(head); } skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; @@ -915,18 +915,8 @@ static void skb_headers_offset_update(struct sk_buff *skb, int off) static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) { -#ifndef NET_SKBUFF_DATA_USES_OFFSET - /* - * Shift between the two data areas in bytes - */ - unsigned long offset = new->data - old->data; -#endif - __copy_skb_header(new, old); -#ifndef NET_SKBUFF_DATA_USES_OFFSET - skb_headers_offset_update(new, offset); -#endif skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; @@ -1118,7 +1108,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->end = skb->head + size; #endif skb->tail += off; - skb_headers_offset_update(skb, off); + skb_headers_offset_update(skb, nhead); /* Only adjust this if it actually is csum_start rather than csum */ if (skb->ip_summed == CHECKSUM_PARTIAL) skb->csum_start += nhead; @@ -1213,9 +1203,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, off = newheadroom - oldheadroom; if (n->ip_summed == CHECKSUM_PARTIAL) n->csum_start += off; -#ifdef NET_SKBUFF_DATA_USES_OFFSET + skb_headers_offset_update(n, off); -#endif return n; } @@ -2558,8 +2547,13 @@ unsigned int skb_seq_read(unsigned int consumed, const u8 **data, unsigned int block_limit, abs_offset = consumed + st->lower_offset; skb_frag_t *frag; - if (unlikely(abs_offset >= st->upper_offset)) + if (unlikely(abs_offset >= st->upper_offset)) { + if (st->frag_data) { + kunmap_atomic(st->frag_data); + st->frag_data = NULL; + } return 0; + } next_skb: block_limit = skb_headlen(st->cur_skb) + st->stepped_offset; @@ -2857,7 +2851,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) doffset + tnl_hlen); if (fskb != skb_shinfo(skb)->frag_list) - continue; + goto perform_csum_check; if (!sg) { nskb->ip_summed = CHECKSUM_NONE; @@ -2921,6 +2915,7 @@ skip_fraglist: nskb->len += nskb->data_len; nskb->truesize += nskb->data_len; +perform_csum_check: if (!csum) { nskb->csum = skb_checksum(nskb, doffset, nskb->len - doffset, 0); @@ -3503,3 +3498,26 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, return true; } EXPORT_SYMBOL(skb_try_coalesce); + +/** + * skb_scrub_packet - scrub an skb before sending it to another netns + * + * @skb: buffer to clean + * + * skb_scrub_packet can be used to clean an skb before injecting it in + * another namespace. We have to clear all information in the skb that + * could impact namespace isolation. + */ +void skb_scrub_packet(struct sk_buff *skb) +{ + skb_orphan(skb); + skb->tstamp.tv64 = 0; + skb->pkt_type = PACKET_HOST; + skb->skb_iif = 0; + skb_dst_drop(skb); + skb->mark = 0; + secpath_reset(skb); + nf_reset(skb); + nf_reset_trace(skb); +} +EXPORT_SYMBOL_GPL(skb_scrub_packet); diff --git a/net/core/sock.c b/net/core/sock.c index d6d024cfaaaf..2c097c5a35dd 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -139,6 +139,8 @@ #include <net/tcp.h> #endif +#include <net/busy_poll.h> + static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); @@ -898,6 +900,19 @@ set_rcvbuf: sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); break; +#ifdef CONFIG_NET_RX_BUSY_POLL + case SO_BUSY_POLL: + /* allow unprivileged users to decrease the value */ + if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN)) + ret = -EPERM; + else { + if (val < 0) + ret = -EINVAL; + else + sk->sk_ll_usec = val; + } + break; +#endif default: ret = -ENOPROTOOPT; break; @@ -1155,6 +1170,12 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); break; +#ifdef CONFIG_NET_RX_BUSY_POLL + case SO_BUSY_POLL: + v.val = sk->sk_ll_usec; + break; +#endif + default: return -ENOPROTOOPT; } @@ -2271,6 +2292,11 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp = ktime_set(-1L, 0); +#ifdef CONFIG_NET_RX_BUSY_POLL + sk->sk_napi_id = 0; + sk->sk_ll_usec = sysctl_net_busy_read; +#endif + /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index cfdb46ab3a7f..31107abd2783 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -19,16 +19,19 @@ #include <net/ip.h> #include <net/sock.h> #include <net/net_ratelimit.h> +#include <net/busy_poll.h> +static int zero = 0; static int one = 1; +static int ushort_max = USHRT_MAX; #ifdef CONFIG_RPS -static int rps_sock_flow_sysctl(ctl_table *table, int write, +static int rps_sock_flow_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { unsigned int orig_size, size; int ret, i; - ctl_table tmp = { + struct ctl_table tmp = { .data = &size, .maxlen = sizeof(size), .mode = table->mode @@ -87,6 +90,109 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write, } #endif /* CONFIG_RPS */ +#ifdef CONFIG_NET_FLOW_LIMIT +static DEFINE_MUTEX(flow_limit_update_mutex); + +static int flow_limit_cpu_sysctl(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct sd_flow_limit *cur; + struct softnet_data *sd; + cpumask_var_t mask; + int i, len, ret = 0; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return -ENOMEM; + + if (write) { + ret = cpumask_parse_user(buffer, *lenp, mask); + if (ret) + goto done; + + mutex_lock(&flow_limit_update_mutex); + len = sizeof(*cur) + netdev_flow_limit_table_len; + for_each_possible_cpu(i) { + sd = &per_cpu(softnet_data, i); + cur = rcu_dereference_protected(sd->flow_limit, + lockdep_is_held(&flow_limit_update_mutex)); + if (cur && !cpumask_test_cpu(i, mask)) { + RCU_INIT_POINTER(sd->flow_limit, NULL); + synchronize_rcu(); + kfree(cur); + } else if (!cur && cpumask_test_cpu(i, mask)) { + cur = kzalloc(len, GFP_KERNEL); + if (!cur) { + /* not unwinding previous changes */ + ret = -ENOMEM; + goto write_unlock; + } + cur->num_buckets = netdev_flow_limit_table_len; + rcu_assign_pointer(sd->flow_limit, cur); + } + } +write_unlock: + mutex_unlock(&flow_limit_update_mutex); + } else { + char kbuf[128]; + + if (*ppos || !*lenp) { + *lenp = 0; + goto done; + } + + cpumask_clear(mask); + rcu_read_lock(); + for_each_possible_cpu(i) { + sd = &per_cpu(softnet_data, i); + if (rcu_dereference(sd->flow_limit)) + cpumask_set_cpu(i, mask); + } + rcu_read_unlock(); + + len = min(sizeof(kbuf) - 1, *lenp); + len = cpumask_scnprintf(kbuf, len, mask); + if (!len) { + *lenp = 0; + goto done; + } + if (len < *lenp) + kbuf[len++] = '\n'; + if (copy_to_user(buffer, kbuf, len)) { + ret = -EFAULT; + goto done; + } + *lenp = len; + *ppos += len; + } + +done: + free_cpumask_var(mask); + return ret; +} + +static int flow_limit_table_len_sysctl(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + unsigned int old, *ptr; + int ret; + + mutex_lock(&flow_limit_update_mutex); + + ptr = table->data; + old = *ptr; + ret = proc_dointvec(table, write, buffer, lenp, ppos); + if (!ret && write && !is_power_of_2(*ptr)) { + *ptr = old; + ret = -EINVAL; + } + + mutex_unlock(&flow_limit_update_mutex); + return ret; +} +#endif /* CONFIG_NET_FLOW_LIMIT */ + static struct ctl_table net_core_table[] = { #ifdef CONFIG_NET { @@ -180,6 +286,37 @@ static struct ctl_table net_core_table[] = { .proc_handler = rps_sock_flow_sysctl }, #endif +#ifdef CONFIG_NET_FLOW_LIMIT + { + .procname = "flow_limit_cpu_bitmap", + .mode = 0644, + .proc_handler = flow_limit_cpu_sysctl + }, + { + .procname = "flow_limit_table_len", + .data = &netdev_flow_limit_table_len, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = flow_limit_table_len_sysctl + }, +#endif /* CONFIG_NET_FLOW_LIMIT */ +#ifdef CONFIG_NET_RX_BUSY_POLL + { + .procname = "busy_poll", + .data = &sysctl_net_busy_poll, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "busy_read", + .data = &sysctl_net_busy_read, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec + }, +# +#endif #endif /* CONFIG_NET */ { .procname = "netdev_budget", @@ -204,7 +341,9 @@ static struct ctl_table netns_core_table[] = { .data = &init_net.core.sysctl_somaxconn, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .extra1 = &zero, + .extra2 = &ushort_max, + .proc_handler = proc_dointvec_minmax }, { } }; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index c21f200eed93..dd4d506ef923 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2078,9 +2078,9 @@ out_err: } static int dn_device_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { - struct net_device *dev = (struct net_device *)ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 7d9197063ebb..dd0dfb25f4b1 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -158,11 +158,11 @@ static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MU static int min_priority[1]; static int max_priority[] = { 127 }; /* From DECnet spec */ -static int dn_forwarding_proc(ctl_table *, int, +static int dn_forwarding_proc(struct ctl_table *, int, void __user *, size_t *, loff_t *); static struct dn_dev_sysctl_table { struct ctl_table_header *sysctl_header; - ctl_table dn_dev_vars[5]; + struct ctl_table dn_dev_vars[5]; } dn_dev_sysctl = { NULL, { @@ -242,7 +242,7 @@ static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms) } } -static int dn_forwarding_proc(ctl_table *table, int write, +static int dn_forwarding_proc(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index a55eeccaa72f..5325b541c526 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -132,7 +132,7 @@ static int parse_addr(__le16 *addr, char *str) return 0; } -static int dn_node_address_handler(ctl_table *table, int write, +static int dn_node_address_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -183,7 +183,7 @@ static int dn_node_address_handler(ctl_table *table, int write, return 0; } -static int dn_def_dev_handler(ctl_table *table, int write, +static int dn_def_dev_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -246,7 +246,7 @@ static int dn_def_dev_handler(ctl_table *table, int write, return 0; } -static ctl_table dn_table[] = { +static struct ctl_table dn_table[] = { { .procname = "node_address", .maxlen = 7, diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index 0a69d0757795..f347a2ca7d7e 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -118,7 +118,7 @@ dns_resolver_instantiate(struct key *key, struct key_preparsed_payload *prep) if (opt_vlen <= 0) goto bad_option_value; - ret = strict_strtoul(eq, 10, &derrno); + ret = kstrtoul(eq, 10, &derrno); if (ret < 0) goto bad_option_value; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 5359560926bc..be1f64d35358 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -401,27 +401,8 @@ struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs, } EXPORT_SYMBOL(alloc_etherdev_mqs); -static size_t _format_mac_addr(char *buf, int buflen, - const unsigned char *addr, int len) -{ - int i; - char *cp = buf; - - for (i = 0; i < len; i++) { - cp += scnprintf(cp, buflen - (cp - buf), "%02x", addr[i]); - if (i == len - 1) - break; - cp += scnprintf(cp, buflen - (cp - buf), ":"); - } - return cp - buf; -} - ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len) { - size_t l; - - l = _format_mac_addr(buf, PAGE_SIZE, addr, len); - l += scnprintf(buf + l, PAGE_SIZE - l, "\n"); - return (ssize_t)l; + return scnprintf(buf, PAGE_SIZE, "%*phC\n", len, addr); } EXPORT_SYMBOL(sysfs_format_mac); diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 55e1fd5b3e56..3b9d5f20bd1c 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -1352,10 +1352,9 @@ static inline void lowpan_netlink_fini(void) } static int lowpan_device_event(struct notifier_block *unused, - unsigned long event, - void *ptr) + unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); LIST_HEAD(del_list); struct lowpan_dev_record *entry, *tmp; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 8603ca827104..37cf1a6ea3ad 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -9,10 +9,7 @@ config IP_MULTICAST intend to participate in the MBONE, a high bandwidth network on top of the Internet which carries audio and video broadcasts. More information about the MBONE is on the WWW at - <http://www.savetz.com/mbone/>. Information about the multicast - capabilities of the various network cards is contained in - <file:Documentation/networking/multicast.txt>. For most people, it's - safe to say N. + <http://www.savetz.com/mbone/>. For most people, it's safe to say N. config IP_ADVANCED_ROUTER bool "IP: advanced router" @@ -223,10 +220,8 @@ config IP_MROUTE packets that have several destination addresses. It is needed on the MBONE, a high bandwidth network on top of the Internet which carries audio and video broadcasts. In order to do that, you would most - likely run the program mrouted. Information about the multicast - capabilities of the various network cards is contained in - <file:Documentation/networking/multicast.txt>. If you haven't heard - about it, you don't need it. + likely run the program mrouted. If you haven't heard about it, you + don't need it. config IP_MROUTE_MULTIPLE_TABLES bool "IP: multicast policy routing" diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 089cb9f36387..4b81e91c80fe 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,10 +8,10 @@ obj-y := route.o inetpeer.o protocol.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ - datagram.o raw.o udp.o udplite.o \ - arp.o icmp.o devinet.o af_inet.o igmp.o \ + tcp_offload.o datagram.o raw.o udp.o udplite.o \ + udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ - inet_fragment.o ping.o + inet_fragment.o ping.o ip_tunnel_core.o obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o @@ -19,6 +19,7 @@ obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o obj-$(CONFIG_IP_MROUTE) += ipmr.o obj-$(CONFIG_NET_IPIP) += ipip.o +gre-y := gre_demux.o gre_offload.o obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o obj-$(CONFIG_NET_IPGRE) += ip_gre.o obj-$(CONFIG_NET_IPVTI) += ip_vti.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d01be2a3ae53..b4d0be2b7ce9 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1295,6 +1295,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_GRE | SKB_GSO_TCPV6 | SKB_GSO_UDP_TUNNEL | + SKB_GSO_MPLS | 0))) goto out; @@ -1384,7 +1385,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, goto out_unlock; id = ntohl(*(__be32 *)&iph->id); - flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF)); + flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF)); id >>= 16; for (p = *head; p; p = p->next) { @@ -1406,6 +1407,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, NAPI_GRO_CB(p)->flush |= (iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | + (__force int)((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)) | ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; @@ -1557,15 +1559,6 @@ static const struct net_protocol tcp_protocol = { .netns_ok = 1, }; -static const struct net_offload tcp_offload = { - .callbacks = { - .gso_send_check = tcp_v4_gso_send_check, - .gso_segment = tcp_tso_segment, - .gro_receive = tcp4_gro_receive, - .gro_complete = tcp4_gro_complete, - }, -}; - static const struct net_protocol udp_protocol = { .handler = udp_rcv, .err_handler = udp_err, @@ -1573,13 +1566,6 @@ static const struct net_protocol udp_protocol = { .netns_ok = 1, }; -static const struct net_offload udp_offload = { - .callbacks = { - .gso_send_check = udp4_ufo_send_check, - .gso_segment = udp4_ufo_fragment, - }, -}; - static const struct net_protocol icmp_protocol = { .handler = icmp_rcv, .err_handler = icmp_err, @@ -1679,10 +1665,10 @@ static int __init ipv4_offload_init(void) /* * Add offloads */ - if (inet_add_offload(&udp_offload, IPPROTO_UDP) < 0) + if (udpv4_offload_init() < 0) pr_crit("%s: Cannot add UDP protocol offload\n", __func__); - if (inet_add_offload(&tcp_offload, IPPROTO_TCP) < 0) - pr_crit("%s: Cannot add TCP protocol offlaod\n", __func__); + if (tcpv4_offload_init() < 0) + pr_crit("%s: Cannot add TCP protocol offload\n", __func__); dev_add_offload(&ip_packet_offload); return 0; diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 2e7f1948216f..717902669d2f 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -419,12 +419,9 @@ static void ah4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { - atomic_inc(&flow_cache_genid); - rt_genid_bump(net); - + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0); - } else + else ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0); xfrm_state_put(x); } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 247ec1951c35..4429b013f269 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1234,13 +1234,19 @@ out: static int arp_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_change_info *change_info; switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); rt_cache_flush(dev_net(dev)); break; + case NETDEV_CHANGE: + change_info = ptr; + if (change_info->flags_changed & IFF_NOARP) + neigh_changeaddr(&arp_tbl, dev); + break; default: break; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index dfc39d4d48b7..34ca6d5a3a4b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -215,6 +215,7 @@ void in_dev_finish_destroy(struct in_device *idev) WARN_ON(idev->ifa_list); WARN_ON(idev->mc_list); + kfree(rcu_dereference_protected(idev->mc_hash, 1)); #ifdef NET_REFCNT_DEBUG pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); #endif @@ -771,7 +772,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, ci = nla_data(tb[IFA_CACHEINFO]); if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { err = -EINVAL; - goto errout; + goto errout_free; } *pvalid_lft = ci->ifa_valid; *pprefered_lft = ci->ifa_prefered; @@ -779,6 +780,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, return ifa; +errout_free: + inet_free_ifa(ifa); errout: return ERR_PTR(err); } @@ -1333,7 +1336,7 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev, static int inetdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct in_device *in_dev = __in_dev_get_rtnl(dev); ASSERT_RTNL(); @@ -1941,7 +1944,7 @@ static void inet_forward_change(struct net *net) } } -static int devinet_conf_proc(ctl_table *ctl, int write, +static int devinet_conf_proc(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -1984,7 +1987,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write, return ret; } -static int devinet_sysctl_forward(ctl_table *ctl, int write, +static int devinet_sysctl_forward(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -2027,7 +2030,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, return ret; } -static int ipv4_doint_and_flush(ctl_table *ctl, int write, +static int ipv4_doint_and_flush(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 4cfe34d4cc96..ab3d814bc80a 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -502,12 +502,9 @@ static void esp4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { - atomic_inc(&flow_cache_genid); - rt_genid_bump(net); - + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0); - } else + else ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c7629a209f9d..b3f627ac4ed8 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -961,7 +961,7 @@ static void nl_fib_input(struct sk_buff *skb) nlmsg_len(nlh) < sizeof(*frn)) return; - skb = skb_clone(skb, GFP_KERNEL); + skb = netlink_skb_clone(skb, GFP_KERNEL); if (skb == NULL) return; nlh = nlmsg_hdr(skb); @@ -1038,7 +1038,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct in_device *in_dev; struct net *net = dev_net(dev); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 8f6cb7a87cd6..d5dbca5ecf62 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -169,7 +169,8 @@ static void free_nh_exceptions(struct fib_nh *nh) next = rcu_dereference_protected(fnhe->fnhe_next, 1); - rt_fibinfo_free(&fnhe->fnhe_rth); + rt_fibinfo_free(&fnhe->fnhe_rth_input); + rt_fibinfo_free(&fnhe->fnhe_rth_output); kfree(fnhe); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 49616fed9340..108a1e9c9eac 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2133,7 +2133,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) max--; pointers = 0; - for (i = 1; i <= max; i++) + for (i = 1; i < max; i++) if (stat->nodesizes[i] != 0) { seq_printf(seq, " %u: %u", i, stat->nodesizes[i]); pointers += (1<<i) * stat->nodesizes[i]; diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c deleted file mode 100644 index 7856d1651d05..000000000000 --- a/net/ipv4/gre.c +++ /dev/null @@ -1,253 +0,0 @@ -/* - * GRE over IPv4 demultiplexer driver - * - * Authors: Dmitry Kozlov (xeb@mail.ru) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/kmod.h> -#include <linux/skbuff.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/netdevice.h> -#include <linux/if_tunnel.h> -#include <linux/spinlock.h> -#include <net/protocol.h> -#include <net/gre.h> - - -static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; -static DEFINE_SPINLOCK(gre_proto_lock); - -int gre_add_protocol(const struct gre_protocol *proto, u8 version) -{ - if (version >= GREPROTO_MAX) - goto err_out; - - spin_lock(&gre_proto_lock); - if (gre_proto[version]) - goto err_out_unlock; - - RCU_INIT_POINTER(gre_proto[version], proto); - spin_unlock(&gre_proto_lock); - return 0; - -err_out_unlock: - spin_unlock(&gre_proto_lock); -err_out: - return -1; -} -EXPORT_SYMBOL_GPL(gre_add_protocol); - -int gre_del_protocol(const struct gre_protocol *proto, u8 version) -{ - if (version >= GREPROTO_MAX) - goto err_out; - - spin_lock(&gre_proto_lock); - if (rcu_dereference_protected(gre_proto[version], - lockdep_is_held(&gre_proto_lock)) != proto) - goto err_out_unlock; - RCU_INIT_POINTER(gre_proto[version], NULL); - spin_unlock(&gre_proto_lock); - synchronize_rcu(); - return 0; - -err_out_unlock: - spin_unlock(&gre_proto_lock); -err_out: - return -1; -} -EXPORT_SYMBOL_GPL(gre_del_protocol); - -static int gre_rcv(struct sk_buff *skb) -{ - const struct gre_protocol *proto; - u8 ver; - int ret; - - if (!pskb_may_pull(skb, 12)) - goto drop; - - ver = skb->data[1]&0x7f; - if (ver >= GREPROTO_MAX) - goto drop; - - rcu_read_lock(); - proto = rcu_dereference(gre_proto[ver]); - if (!proto || !proto->handler) - goto drop_unlock; - ret = proto->handler(skb); - rcu_read_unlock(); - return ret; - -drop_unlock: - rcu_read_unlock(); -drop: - kfree_skb(skb); - return NET_RX_DROP; -} - -static void gre_err(struct sk_buff *skb, u32 info) -{ - const struct gre_protocol *proto; - const struct iphdr *iph = (const struct iphdr *)skb->data; - u8 ver = skb->data[(iph->ihl<<2) + 1]&0x7f; - - if (ver >= GREPROTO_MAX) - return; - - rcu_read_lock(); - proto = rcu_dereference(gre_proto[ver]); - if (proto && proto->err_handler) - proto->err_handler(skb, info); - rcu_read_unlock(); -} - -static struct sk_buff *gre_gso_segment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - netdev_features_t enc_features; - int ghl = GRE_HEADER_SECTION; - struct gre_base_hdr *greh; - int mac_len = skb->mac_len; - __be16 protocol = skb->protocol; - int tnl_hlen; - bool csum; - - if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_TCPV6 | - SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_GRE))) - goto out; - - if (unlikely(!pskb_may_pull(skb, sizeof(*greh)))) - goto out; - - greh = (struct gre_base_hdr *)skb_transport_header(skb); - - if (greh->flags & GRE_KEY) - ghl += GRE_HEADER_SECTION; - if (greh->flags & GRE_SEQ) - ghl += GRE_HEADER_SECTION; - if (greh->flags & GRE_CSUM) { - ghl += GRE_HEADER_SECTION; - csum = true; - } else - csum = false; - - /* setup inner skb. */ - skb->protocol = greh->protocol; - skb->encapsulation = 0; - - if (unlikely(!pskb_may_pull(skb, ghl))) - goto out; - __skb_pull(skb, ghl); - skb_reset_mac_header(skb); - skb_set_network_header(skb, skb_inner_network_offset(skb)); - skb->mac_len = skb_inner_network_offset(skb); - - /* segment inner packet. */ - enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); - segs = skb_mac_gso_segment(skb, enc_features); - if (!segs || IS_ERR(segs)) - goto out; - - skb = segs; - tnl_hlen = skb_tnl_header_len(skb); - do { - __skb_push(skb, ghl); - if (csum) { - __be32 *pcsum; - - if (skb_has_shared_frag(skb)) { - int err; - - err = __skb_linearize(skb); - if (err) { - kfree_skb_list(segs); - segs = ERR_PTR(err); - goto out; - } - } - - greh = (struct gre_base_hdr *)(skb->data); - pcsum = (__be32 *)(greh + 1); - *pcsum = 0; - *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0)); - } - __skb_push(skb, tnl_hlen - ghl); - - skb_reset_mac_header(skb); - skb_set_network_header(skb, mac_len); - skb->mac_len = mac_len; - skb->protocol = protocol; - } while ((skb = skb->next)); -out: - return segs; -} - -static int gre_gso_send_check(struct sk_buff *skb) -{ - if (!skb->encapsulation) - return -EINVAL; - return 0; -} - -static const struct net_protocol net_gre_protocol = { - .handler = gre_rcv, - .err_handler = gre_err, - .netns_ok = 1, -}; - -static const struct net_offload gre_offload = { - .callbacks = { - .gso_send_check = gre_gso_send_check, - .gso_segment = gre_gso_segment, - }, -}; - -static int __init gre_init(void) -{ - pr_info("GRE over IPv4 demultiplexor driver\n"); - - if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) { - pr_err("can't add protocol\n"); - return -EAGAIN; - } - - if (inet_add_offload(&gre_offload, IPPROTO_GRE)) { - pr_err("can't add protocol offload\n"); - inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); - return -EAGAIN; - } - - return 0; -} - -static void __exit gre_exit(void) -{ - inet_del_offload(&gre_offload, IPPROTO_GRE); - inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); -} - -module_init(gre_init); -module_exit(gre_exit); - -MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver"); -MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); -MODULE_LICENSE("GPL"); - diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c new file mode 100644 index 000000000000..736c9fc3ef93 --- /dev/null +++ b/net/ipv4/gre_demux.c @@ -0,0 +1,414 @@ +/* + * GRE over IPv4 demultiplexer driver + * + * Authors: Dmitry Kozlov (xeb@mail.ru) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/if.h> +#include <linux/icmp.h> +#include <linux/kernel.h> +#include <linux/kmod.h> +#include <linux/skbuff.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/netdevice.h> +#include <linux/if_tunnel.h> +#include <linux/spinlock.h> +#include <net/protocol.h> +#include <net/gre.h> + +#include <net/icmp.h> +#include <net/route.h> +#include <net/xfrm.h> + +static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; +static struct gre_cisco_protocol __rcu *gre_cisco_proto_list[GRE_IP_PROTO_MAX]; + +int gre_add_protocol(const struct gre_protocol *proto, u8 version) +{ + if (version >= GREPROTO_MAX) + return -EINVAL; + + return (cmpxchg((const struct gre_protocol **)&gre_proto[version], NULL, proto) == NULL) ? + 0 : -EBUSY; +} +EXPORT_SYMBOL_GPL(gre_add_protocol); + +int gre_del_protocol(const struct gre_protocol *proto, u8 version) +{ + int ret; + + if (version >= GREPROTO_MAX) + return -EINVAL; + + ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], proto, NULL) == proto) ? + 0 : -EBUSY; + + if (ret) + return ret; + + synchronize_rcu(); + return 0; +} +EXPORT_SYMBOL_GPL(gre_del_protocol); + +void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, + int hdr_len) +{ + struct gre_base_hdr *greh; + + skb_push(skb, hdr_len); + + greh = (struct gre_base_hdr *)skb->data; + greh->flags = tnl_flags_to_gre_flags(tpi->flags); + greh->protocol = tpi->proto; + + if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); + + if (tpi->flags&TUNNEL_SEQ) { + *ptr = tpi->seq; + ptr--; + } + if (tpi->flags&TUNNEL_KEY) { + *ptr = tpi->key; + ptr--; + } + if (tpi->flags&TUNNEL_CSUM && + !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { + *ptr = 0; + *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, + skb->len, 0)); + } + } +} +EXPORT_SYMBOL_GPL(gre_build_header); + +struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum) +{ + int err; + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + if (skb_is_gso(skb)) { + err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + goto error; + skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; + return skb; + } else if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) { + err = skb_checksum_help(skb); + if (unlikely(err)) + goto error; + } else if (skb->ip_summed != CHECKSUM_PARTIAL) + skb->ip_summed = CHECKSUM_NONE; + + return skb; +error: + kfree_skb(skb); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(gre_handle_offloads); + +static __sum16 check_checksum(struct sk_buff *skb) +{ + __sum16 csum = 0; + + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + csum = csum_fold(skb->csum); + + if (!csum) + break; + /* Fall through. */ + + case CHECKSUM_NONE: + skb->csum = 0; + csum = __skb_checksum_complete(skb); + skb->ip_summed = CHECKSUM_COMPLETE; + break; + } + + return csum; +} + +static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, + bool *csum_err) +{ + unsigned int ip_hlen = ip_hdrlen(skb); + const struct gre_base_hdr *greh; + __be32 *options; + int hdr_len; + + if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) + return -EINVAL; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); + if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) + return -EINVAL; + + tpi->flags = gre_flags_to_tnl_flags(greh->flags); + hdr_len = ip_gre_calc_hlen(tpi->flags); + + if (!pskb_may_pull(skb, hdr_len)) + return -EINVAL; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); + tpi->proto = greh->protocol; + + options = (__be32 *)(greh + 1); + if (greh->flags & GRE_CSUM) { + if (check_checksum(skb)) { + *csum_err = true; + return -EINVAL; + } + options++; + } + + if (greh->flags & GRE_KEY) { + tpi->key = *options; + options++; + } else + tpi->key = 0; + + if (unlikely(greh->flags & GRE_SEQ)) { + tpi->seq = *options; + options++; + } else + tpi->seq = 0; + + /* WCCP version 1 and 2 protocol decoding. + * - Change protocol to IP + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header + */ + if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { + tpi->proto = htons(ETH_P_IP); + if ((*(u8 *)options & 0xF0) != 0x40) { + hdr_len += 4; + if (!pskb_may_pull(skb, hdr_len)) + return -EINVAL; + } + } + + return iptunnel_pull_header(skb, hdr_len, tpi->proto); +} + +static int gre_cisco_rcv(struct sk_buff *skb) +{ + struct tnl_ptk_info tpi; + int i; + bool csum_err = false; + + if (parse_gre_header(skb, &tpi, &csum_err) < 0) + goto drop; + + rcu_read_lock(); + for (i = 0; i < GRE_IP_PROTO_MAX; i++) { + struct gre_cisco_protocol *proto; + int ret; + + proto = rcu_dereference(gre_cisco_proto_list[i]); + if (!proto) + continue; + ret = proto->handler(skb, &tpi); + if (ret == PACKET_RCVD) { + rcu_read_unlock(); + return 0; + } + } + rcu_read_unlock(); + + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); +drop: + kfree_skb(skb); + return 0; +} + +static void gre_cisco_err(struct sk_buff *skb, u32 info) +{ + /* All the routers (except for Linux) return only + * 8 bytes of packet payload. It means, that precise relaying of + * ICMP in the real Internet is absolutely infeasible. + * + * Moreover, Cisco "wise men" put GRE key to the third word + * in GRE header. It makes impossible maintaining even soft + * state for keyed + * GRE tunnels with enabled checksum. Tell them "thank you". + * + * Well, I wonder, rfc1812 was written by Cisco employee, + * what the hell these idiots break standards established + * by themselves??? + */ + + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; + struct tnl_ptk_info tpi; + bool csum_err = false; + int i; + + if (parse_gre_header(skb, &tpi, &csum_err)) { + if (!csum_err) /* ignore csum errors. */ + return; + } + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { + ipv4_update_pmtu(skb, dev_net(skb->dev), info, + skb->dev->ifindex, 0, IPPROTO_GRE, 0); + return; + } + if (type == ICMP_REDIRECT) { + ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0, + IPPROTO_GRE, 0); + return; + } + + rcu_read_lock(); + for (i = 0; i < GRE_IP_PROTO_MAX; i++) { + struct gre_cisco_protocol *proto; + + proto = rcu_dereference(gre_cisco_proto_list[i]); + if (!proto) + continue; + + if (proto->err_handler(skb, info, &tpi) == PACKET_RCVD) + goto out; + + } +out: + rcu_read_unlock(); +} + +static int gre_rcv(struct sk_buff *skb) +{ + const struct gre_protocol *proto; + u8 ver; + int ret; + + if (!pskb_may_pull(skb, 12)) + goto drop; + + ver = skb->data[1]&0x7f; + if (ver >= GREPROTO_MAX) + goto drop; + + rcu_read_lock(); + proto = rcu_dereference(gre_proto[ver]); + if (!proto || !proto->handler) + goto drop_unlock; + ret = proto->handler(skb); + rcu_read_unlock(); + return ret; + +drop_unlock: + rcu_read_unlock(); +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static void gre_err(struct sk_buff *skb, u32 info) +{ + const struct gre_protocol *proto; + const struct iphdr *iph = (const struct iphdr *)skb->data; + u8 ver = skb->data[(iph->ihl<<2) + 1]&0x7f; + + if (ver >= GREPROTO_MAX) + return; + + rcu_read_lock(); + proto = rcu_dereference(gre_proto[ver]); + if (proto && proto->err_handler) + proto->err_handler(skb, info); + rcu_read_unlock(); +} + +static const struct net_protocol net_gre_protocol = { + .handler = gre_rcv, + .err_handler = gre_err, + .netns_ok = 1, +}; + +static const struct gre_protocol ipgre_protocol = { + .handler = gre_cisco_rcv, + .err_handler = gre_cisco_err, +}; + +int gre_cisco_register(struct gre_cisco_protocol *newp) +{ + struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **) + &gre_cisco_proto_list[newp->priority]; + + return (cmpxchg(proto, NULL, newp) == NULL) ? 0 : -EBUSY; +} +EXPORT_SYMBOL_GPL(gre_cisco_register); + +int gre_cisco_unregister(struct gre_cisco_protocol *del_proto) +{ + struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **) + &gre_cisco_proto_list[del_proto->priority]; + int ret; + + ret = (cmpxchg(proto, del_proto, NULL) == del_proto) ? 0 : -EINVAL; + + if (ret) + return ret; + + synchronize_net(); + return 0; +} +EXPORT_SYMBOL_GPL(gre_cisco_unregister); + +static int __init gre_init(void) +{ + pr_info("GRE over IPv4 demultiplexor driver\n"); + + if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) { + pr_err("can't add protocol\n"); + goto err; + } + + if (gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) { + pr_info("%s: can't add ipgre handler\n", __func__); + goto err_gre; + } + + if (gre_offload_init()) { + pr_err("can't add protocol offload\n"); + goto err_gso; + } + + return 0; +err_gso: + gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); +err_gre: + inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); +err: + return -EAGAIN; +} + +static void __exit gre_exit(void) +{ + gre_offload_exit(); + + gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); + inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); +} + +module_init(gre_init); +module_exit(gre_exit); + +MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver"); +MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c new file mode 100644 index 000000000000..55e6bfb3a289 --- /dev/null +++ b/net/ipv4/gre_offload.c @@ -0,0 +1,130 @@ +/* + * IPV4 GSO/GRO offload support + * Linux INET implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * GRE GSO support + */ + +#include <linux/skbuff.h> +#include <net/protocol.h> +#include <net/gre.h> + +static int gre_gso_send_check(struct sk_buff *skb) +{ + if (!skb->encapsulation) + return -EINVAL; + return 0; +} + +static struct sk_buff *gre_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + netdev_features_t enc_features; + int ghl = GRE_HEADER_SECTION; + struct gre_base_hdr *greh; + int mac_len = skb->mac_len; + __be16 protocol = skb->protocol; + int tnl_hlen; + bool csum; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_TCPV6 | + SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_GRE))) + goto out; + + if (unlikely(!pskb_may_pull(skb, sizeof(*greh)))) + goto out; + + greh = (struct gre_base_hdr *)skb_transport_header(skb); + + if (greh->flags & GRE_KEY) + ghl += GRE_HEADER_SECTION; + if (greh->flags & GRE_SEQ) + ghl += GRE_HEADER_SECTION; + if (greh->flags & GRE_CSUM) { + ghl += GRE_HEADER_SECTION; + csum = true; + } else + csum = false; + + /* setup inner skb. */ + skb->protocol = greh->protocol; + skb->encapsulation = 0; + + if (unlikely(!pskb_may_pull(skb, ghl))) + goto out; + + __skb_pull(skb, ghl); + skb_reset_mac_header(skb); + skb_set_network_header(skb, skb_inner_network_offset(skb)); + skb->mac_len = skb_inner_network_offset(skb); + + /* segment inner packet. */ + enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); + segs = skb_mac_gso_segment(skb, enc_features); + if (!segs || IS_ERR(segs)) + goto out; + + skb = segs; + tnl_hlen = skb_tnl_header_len(skb); + do { + __skb_push(skb, ghl); + if (csum) { + __be32 *pcsum; + + if (skb_has_shared_frag(skb)) { + int err; + + err = __skb_linearize(skb); + if (err) { + kfree_skb_list(segs); + segs = ERR_PTR(err); + goto out; + } + } + + greh = (struct gre_base_hdr *)(skb->data); + pcsum = (__be32 *)(greh + 1); + *pcsum = 0; + *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0)); + } + __skb_push(skb, tnl_hlen - ghl); + + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + skb->mac_len = mac_len; + skb->protocol = protocol; + } while ((skb = skb->next)); +out: + return segs; +} + +static const struct net_offload gre_offload = { + .callbacks = { + .gso_send_check = gre_gso_send_check, + .gso_segment = gre_gso_segment, + }, +}; + +int __init gre_offload_init(void) +{ + return inet_add_offload(&gre_offload, IPPROTO_GRE); +} + +void __exit gre_offload_exit(void) +{ + inet_del_offload(&gre_offload, IPPROTO_GRE); +} diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 76e10b47e053..5f7d11a45871 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -482,7 +482,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) { struct iphdr *iph; int room; - struct icmp_bxm icmp_param; + struct icmp_bxm *icmp_param; struct rtable *rt = skb_rtable(skb_in); struct ipcm_cookie ipc; struct flowi4 fl4; @@ -503,7 +503,8 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) iph = ip_hdr(skb_in); if ((u8 *)iph < skb_in->head || - (skb_in->network_header + sizeof(*iph)) > skb_in->tail) + (skb_network_header(skb_in) + sizeof(*iph)) > + skb_tail_pointer(skb_in)) goto out; /* @@ -557,9 +558,13 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) } } + icmp_param = kmalloc(sizeof(*icmp_param), GFP_ATOMIC); + if (!icmp_param) + return; + sk = icmp_xmit_lock(net); if (sk == NULL) - return; + goto out_free; /* * Construct source address and options. @@ -585,7 +590,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) IPTOS_PREC_INTERNETCONTROL) : iph->tos; - if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in)) + if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb_in)) goto out_unlock; @@ -593,19 +598,19 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) * Prepare data for ICMP header. */ - icmp_param.data.icmph.type = type; - icmp_param.data.icmph.code = code; - icmp_param.data.icmph.un.gateway = info; - icmp_param.data.icmph.checksum = 0; - icmp_param.skb = skb_in; - icmp_param.offset = skb_network_offset(skb_in); + icmp_param->data.icmph.type = type; + icmp_param->data.icmph.code = code; + icmp_param->data.icmph.un.gateway = info; + icmp_param->data.icmph.checksum = 0; + icmp_param->skb = skb_in; + icmp_param->offset = skb_network_offset(skb_in); inet_sk(sk)->tos = tos; ipc.addr = iph->saddr; - ipc.opt = &icmp_param.replyopts.opt; + ipc.opt = &icmp_param->replyopts.opt; ipc.tx_flags = 0; rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, - type, code, &icmp_param); + type, code, icmp_param); if (IS_ERR(rt)) goto out_unlock; @@ -617,19 +622,21 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) room = dst_mtu(&rt->dst); if (room > 576) room = 576; - room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen; + room -= sizeof(struct iphdr) + icmp_param->replyopts.opt.opt.optlen; room -= sizeof(struct icmphdr); - icmp_param.data_len = skb_in->len - icmp_param.offset; - if (icmp_param.data_len > room) - icmp_param.data_len = room; - icmp_param.head_len = sizeof(struct icmphdr); + icmp_param->data_len = skb_in->len - icmp_param->offset; + if (icmp_param->data_len > room) + icmp_param->data_len = room; + icmp_param->head_len = sizeof(struct icmphdr); - icmp_push_reply(&icmp_param, &fl4, &ipc, &rt); + icmp_push_reply(icmp_param, &fl4, &ipc, &rt); ende: ip_rt_put(rt); out_unlock: icmp_xmit_unlock(sk); +out_free: + kfree(icmp_param); out:; } EXPORT_SYMBOL(icmp_send); @@ -657,7 +664,8 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info) } /* - * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, and ICMP_QUENCH. + * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and + * ICMP_PARAMETERPROB. */ static void icmp_unreach(struct sk_buff *skb) @@ -939,7 +947,8 @@ error: void icmp_err(struct sk_buff *skb, u32 info) { struct iphdr *iph = (struct iphdr *)skb->data; - struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2)); + int offset = iph->ihl<<2; + struct icmphdr *icmph = (struct icmphdr *)(skb->data + offset); int type = icmp_hdr(skb)->type; int code = icmp_hdr(skb)->code; struct net *net = dev_net(skb->dev); @@ -949,7 +958,7 @@ void icmp_err(struct sk_buff *skb, u32 info) * triggered by ICMP_ECHOREPLY which sent from kernel. */ if (icmph->type != ICMP_ECHOREPLY) { - ping_err(skb, info); + ping_err(skb, offset, info); return; } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d8c232794bcb..cd71190d2962 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -363,7 +363,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) static int igmpv3_sendpack(struct sk_buff *skb) { struct igmphdr *pig = igmp_hdr(skb); - const int igmplen = skb->tail - skb->transport_header; + const int igmplen = skb_tail_pointer(skb) - skb_transport_header(skb); pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen); @@ -1217,6 +1217,57 @@ static void igmp_group_added(struct ip_mc_list *im) * Multicast list managers */ +static u32 ip_mc_hash(const struct ip_mc_list *im) +{ + return hash_32((__force u32)im->multiaddr, MC_HASH_SZ_LOG); +} + +static void ip_mc_hash_add(struct in_device *in_dev, + struct ip_mc_list *im) +{ + struct ip_mc_list __rcu **mc_hash; + u32 hash; + + mc_hash = rtnl_dereference(in_dev->mc_hash); + if (mc_hash) { + hash = ip_mc_hash(im); + im->next_hash = mc_hash[hash]; + rcu_assign_pointer(mc_hash[hash], im); + return; + } + + /* do not use a hash table for small number of items */ + if (in_dev->mc_count < 4) + return; + + mc_hash = kzalloc(sizeof(struct ip_mc_list *) << MC_HASH_SZ_LOG, + GFP_KERNEL); + if (!mc_hash) + return; + + for_each_pmc_rtnl(in_dev, im) { + hash = ip_mc_hash(im); + im->next_hash = mc_hash[hash]; + RCU_INIT_POINTER(mc_hash[hash], im); + } + + rcu_assign_pointer(in_dev->mc_hash, mc_hash); +} + +static void ip_mc_hash_remove(struct in_device *in_dev, + struct ip_mc_list *im) +{ + struct ip_mc_list __rcu **mc_hash = rtnl_dereference(in_dev->mc_hash); + struct ip_mc_list *aux; + + if (!mc_hash) + return; + mc_hash += ip_mc_hash(im); + while ((aux = rtnl_dereference(*mc_hash)) != im) + mc_hash = &aux->next_hash; + *mc_hash = im->next_hash; +} + /* * A socket has joined a multicast group on device dev. @@ -1258,6 +1309,8 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) in_dev->mc_count++; rcu_assign_pointer(in_dev->mc_list, im); + ip_mc_hash_add(in_dev, im); + #ifdef CONFIG_IP_MULTICAST igmpv3_del_delrec(in_dev, im->multiaddr); #endif @@ -1314,6 +1367,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) ip = &i->next_rcu) { if (i->multiaddr == addr) { if (--i->users == 0) { + ip_mc_hash_remove(in_dev, i); *ip = i->next_rcu; in_dev->mc_count--; igmp_group_dropped(i); @@ -1381,13 +1435,9 @@ void ip_mc_init_dev(struct in_device *in_dev) { ASSERT_RTNL(); - in_dev->mc_tomb = NULL; #ifdef CONFIG_IP_MULTICAST - in_dev->mr_gq_running = 0; setup_timer(&in_dev->mr_gq_timer, igmp_gq_timer_expire, (unsigned long)in_dev); - in_dev->mr_ifc_count = 0; - in_dev->mc_count = 0; setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, (unsigned long)in_dev); in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; @@ -2321,12 +2371,25 @@ void ip_mc_drop_socket(struct sock *sk) int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto) { struct ip_mc_list *im; + struct ip_mc_list __rcu **mc_hash; struct ip_sf_list *psf; int rv = 0; - for_each_pmc_rcu(in_dev, im) { - if (im->multiaddr == mc_addr) - break; + mc_hash = rcu_dereference(in_dev->mc_hash); + if (mc_hash) { + u32 hash = hash_32((__force u32)mc_addr, MC_HASH_SZ_LOG); + + for (im = rcu_dereference(mc_hash[hash]); + im != NULL; + im = rcu_dereference(im->next_hash)) { + if (im->multiaddr == mc_addr) + break; + } + } else { + for_each_pmc_rcu(in_dev, im) { + if (im->multiaddr == mc_addr) + break; + } } if (im && proto == IPPROTO_IGMP) { rv = 1; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index cec539458307..c5313a9c019b 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -247,8 +247,6 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, { struct inet_frag_bucket *hb; struct inet_frag_queue *qp; -#ifdef CONFIG_SMP -#endif unsigned int hash; read_lock(&f->lock); /* Protects against hash rebuild */ diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 6af375afeeef..7bd8983dbfcf 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -467,7 +467,7 @@ void inet_unhash(struct sock *sk) lock = inet_ehash_lockp(hashinfo, sk->sk_hash); spin_lock_bh(lock); - done =__sk_nulls_del_node_init_rcu(sk); + done = __sk_nulls_del_node_init_rcu(sk); if (done) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock_bh(lock); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2a83591492dd..1f6eab66f7ce 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -121,103 +121,8 @@ static int ipgre_tunnel_init(struct net_device *dev); static int ipgre_net_id __read_mostly; static int gre_tap_net_id __read_mostly; -static __sum16 check_checksum(struct sk_buff *skb) -{ - __sum16 csum = 0; - - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - csum = csum_fold(skb->csum); - - if (!csum) - break; - /* Fall through. */ - - case CHECKSUM_NONE: - skb->csum = 0; - csum = __skb_checksum_complete(skb); - skb->ip_summed = CHECKSUM_COMPLETE; - break; - } - - return csum; -} - -static int ip_gre_calc_hlen(__be16 o_flags) -{ - int addend = 4; - - if (o_flags&TUNNEL_CSUM) - addend += 4; - if (o_flags&TUNNEL_KEY) - addend += 4; - if (o_flags&TUNNEL_SEQ) - addend += 4; - return addend; -} - -static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, - bool *csum_err, int *hdr_len) -{ - unsigned int ip_hlen = ip_hdrlen(skb); - const struct gre_base_hdr *greh; - __be32 *options; - - if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) - return -EINVAL; - - greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); - if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) - return -EINVAL; - - tpi->flags = gre_flags_to_tnl_flags(greh->flags); - *hdr_len = ip_gre_calc_hlen(tpi->flags); - - if (!pskb_may_pull(skb, *hdr_len)) - return -EINVAL; - - greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); - - tpi->proto = greh->protocol; - - options = (__be32 *)(greh + 1); - if (greh->flags & GRE_CSUM) { - if (check_checksum(skb)) { - *csum_err = true; - return -EINVAL; - } - options++; - } - - if (greh->flags & GRE_KEY) { - tpi->key = *options; - options++; - } else - tpi->key = 0; - - if (unlikely(greh->flags & GRE_SEQ)) { - tpi->seq = *options; - options++; - } else - tpi->seq = 0; - - /* WCCP version 1 and 2 protocol decoding. - * - Change protocol to IP - * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header - */ - if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { - tpi->proto = htons(ETH_P_IP); - if ((*(u8 *)options & 0xF0) != 0x40) { - *hdr_len += 4; - if (!pskb_may_pull(skb, *hdr_len)) - return -EINVAL; - } - } - - return 0; -} - -static void ipgre_err(struct sk_buff *skb, u32 info) +static int ipgre_err(struct sk_buff *skb, u32 info, + const struct tnl_ptk_info *tpi) { /* All the routers (except for Linux) return only @@ -239,26 +144,18 @@ static void ipgre_err(struct sk_buff *skb, u32 info) const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct ip_tunnel *t; - struct tnl_ptk_info tpi; - int hdr_len; - bool csum_err = false; - - if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) { - if (!csum_err) /* ignore csum errors. */ - return; - } switch (type) { default: case ICMP_PARAMETERPROB: - return; + return PACKET_RCVD; case ICMP_DEST_UNREACH: switch (code) { case ICMP_SR_FAILED: case ICMP_PORT_UNREACH: /* Impossible event. */ - return; + return PACKET_RCVD; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, @@ -269,138 +166,61 @@ static void ipgre_err(struct sk_buff *skb, u32 info) break; case ICMP_TIME_EXCEEDED: if (code != ICMP_EXC_TTL) - return; + return PACKET_RCVD; break; case ICMP_REDIRECT: break; } - if (tpi.proto == htons(ETH_P_TEB)) + if (tpi->proto == htons(ETH_P_TEB)) itn = net_generic(net, gre_tap_net_id); else itn = net_generic(net, ipgre_net_id); iph = (const struct iphdr *)skb->data; - t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, - iph->daddr, iph->saddr, tpi.key); + t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, + iph->daddr, iph->saddr, tpi->key); if (t == NULL) - return; + return PACKET_REJECT; - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->parms.link, 0, IPPROTO_GRE, 0); - return; - } - if (type == ICMP_REDIRECT) { - ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, - IPPROTO_GRE, 0); - return; - } if (t->parms.iph.daddr == 0 || ipv4_is_multicast(t->parms.iph.daddr)) - return; + return PACKET_RCVD; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) - return; + return PACKET_RCVD; if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) t->err_count++; else t->err_count = 1; t->err_time = jiffies; + return PACKET_RCVD; } -static int ipgre_rcv(struct sk_buff *skb) +static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) { struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn; const struct iphdr *iph; struct ip_tunnel *tunnel; - struct tnl_ptk_info tpi; - int hdr_len; - bool csum_err = false; - - if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0) - goto drop; - if (tpi.proto == htons(ETH_P_TEB)) + if (tpi->proto == htons(ETH_P_TEB)) itn = net_generic(net, gre_tap_net_id); else itn = net_generic(net, ipgre_net_id); iph = ip_hdr(skb); - tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, - iph->saddr, iph->daddr, tpi.key); + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags, + iph->saddr, iph->daddr, tpi->key); if (tunnel) { - ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); - return 0; - } - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); -drop: - kfree_skb(skb); - return 0; -} - -static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff *skb) -{ - int err; - - if (skb_is_gso(skb)) { - err = skb_unclone(skb, GFP_ATOMIC); - if (unlikely(err)) - goto error; - skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; - return skb; - } else if (skb->ip_summed == CHECKSUM_PARTIAL && - tunnel->parms.o_flags&TUNNEL_CSUM) { - err = skb_checksum_help(skb); - if (unlikely(err)) - goto error; - } else if (skb->ip_summed != CHECKSUM_PARTIAL) - skb->ip_summed = CHECKSUM_NONE; - - return skb; - -error: - kfree_skb(skb); - return ERR_PTR(err); -} - -static struct sk_buff *gre_build_header(struct sk_buff *skb, - const struct tnl_ptk_info *tpi, - int hdr_len) -{ - struct gre_base_hdr *greh; - - skb_push(skb, hdr_len); - - greh = (struct gre_base_hdr *)skb->data; - greh->flags = tnl_flags_to_gre_flags(tpi->flags); - greh->protocol = tpi->proto; - - if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) { - __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); - - if (tpi->flags&TUNNEL_SEQ) { - *ptr = tpi->seq; - ptr--; - } - if (tpi->flags&TUNNEL_KEY) { - *ptr = tpi->key; - ptr--; - } - if (tpi->flags&TUNNEL_CSUM && - !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { - *(__sum16 *)ptr = 0; - *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, - skb->len, 0)); - } + ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error); + return PACKET_RCVD; } - - return skb; + return PACKET_REJECT; } static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, @@ -410,11 +230,6 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, struct ip_tunnel *tunnel = netdev_priv(dev); struct tnl_ptk_info tpi; - if (likely(!skb->encapsulation)) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } - tpi.flags = tunnel->parms.o_flags; tpi.proto = proto; tpi.key = tunnel->parms.o_key; @@ -423,13 +238,9 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, tpi.seq = htonl(tunnel->o_seqno); /* Push GRE header. */ - skb = gre_build_header(skb, &tpi, tunnel->hlen); - if (unlikely(!skb)) { - dev->stats.tx_dropped++; - return; - } + gre_build_header(skb, &tpi, tunnel->hlen); - ip_tunnel_xmit(skb, dev, tnl_params); + ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } static netdev_tx_t ipgre_xmit(struct sk_buff *skb, @@ -438,7 +249,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *tnl_params; - skb = handle_offloads(tunnel, skb); + skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM)); if (IS_ERR(skb)) goto out; @@ -477,7 +288,7 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, { struct ip_tunnel *tunnel = netdev_priv(dev); - skb = handle_offloads(tunnel, skb); + skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM)); if (IS_ERR(skb)) goto out; @@ -503,10 +314,11 @@ static int ipgre_tunnel_ioctl(struct net_device *dev, if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) return -EFAULT; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || - ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) { - return -EINVAL; + if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || + p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || + ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) + return -EINVAL; } p.i_flags = gre_flags_to_tnl_flags(p.i_flags); p.o_flags = gre_flags_to_tnl_flags(p.o_flags); @@ -708,9 +520,10 @@ static int ipgre_tunnel_init(struct net_device *dev) return ip_tunnel_init(dev); } -static const struct gre_protocol ipgre_protocol = { - .handler = ipgre_rcv, - .err_handler = ipgre_err, +static struct gre_cisco_protocol ipgre_protocol = { + .handler = ipgre_rcv, + .err_handler = ipgre_err, + .priority = 0, }; static int __net_init ipgre_init_net(struct net *net) @@ -978,7 +791,7 @@ static int __init ipgre_init(void) if (err < 0) goto pnet_tap_faied; - err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); + err = gre_cisco_register(&ipgre_protocol); if (err < 0) { pr_info("%s: can't add protocol\n", __func__); goto add_proto_failed; @@ -997,7 +810,7 @@ static int __init ipgre_init(void) tap_ops_failed: rtnl_link_unregister(&ipgre_link_ops); rtnl_link_failed: - gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); + gre_cisco_unregister(&ipgre_protocol); add_proto_failed: unregister_pernet_device(&ipgre_tap_net_ops); pnet_tap_faied: @@ -1009,8 +822,7 @@ static void __exit ipgre_fini(void) { rtnl_link_unregister(&ipgre_tap_ops); rtnl_link_unregister(&ipgre_link_ops); - if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) - pr_info("%s: can't remove protocol\n", __func__); + gre_cisco_unregister(&ipgre_protocol); unregister_pernet_device(&ipgre_tap_net_ops); unregister_pernet_device(&ipgre_net_ops); } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 3da817b89e9b..15e3e683adec 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -190,10 +190,7 @@ static int ip_local_deliver_finish(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); - __skb_pull(skb, ip_hdrlen(skb)); - - /* Point into the IP datagram, just past the header. */ - skb_reset_transport_header(skb); + __skb_pull(skb, skb_network_header_len(skb)); rcu_read_lock(); { @@ -437,6 +434,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, goto drop; } + skb->transport_header = skb->network_header + iph->ihl*4; + /* Remove any debris in the socket control block */ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 7fa8f08fa7ae..ca1cb2d5f6e2 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -304,6 +304,7 @@ static struct net_device *__ip_tunnel_create(struct net *net, tunnel = netdev_priv(dev); tunnel->parms = *parms; + tunnel->net = net; err = register_netdevice(dev); if (err) @@ -408,13 +409,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, const struct iphdr *iph = ip_hdr(skb); int err; - secpath_reset(skb); - - skb->protocol = tpi->proto; - - skb->mac_header = skb->network_header; - __pskb_pull(skb, tunnel->hlen); - skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen); #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { /* Looped back packet, drop it! */ @@ -442,23 +436,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tunnel->i_seqno = ntohl(tpi->seq) + 1; } - /* Warning: All skb pointers will be invalidated! */ - if (tunnel->dev->type == ARPHRD_ETHER) { - if (!pskb_may_pull(skb, ETH_HLEN)) { - tunnel->dev->stats.rx_length_errors++; - tunnel->dev->stats.rx_errors++; - goto drop; - } - - iph = ip_hdr(skb); - skb->protocol = eth_type_trans(skb, tunnel->dev); - skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); - } - - skb->pkt_type = PACKET_HOST; - __skb_tunnel_rx(skb, tunnel->dev); - - skb_reset_network_header(skb); err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { if (log_ecn_error) @@ -477,6 +454,15 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); + if (tunnel->net != dev_net(tunnel->dev)) + skb_scrub_packet(skb); + + if (tunnel->dev->type == ARPHRD_ETHER) { + skb->protocol = eth_type_trans(skb, tunnel->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + } else { + skb->dev = tunnel->dev; + } gro_cells_receive(&tunnel->gro_cells, skb); return 0; @@ -486,24 +472,69 @@ drop: } EXPORT_SYMBOL_GPL(ip_tunnel_rcv); +static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, + struct rtable *rt, __be16 df) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; + int mtu; + + if (df) + mtu = dst_mtu(&rt->dst) - dev->hard_header_len + - sizeof(struct iphdr) - tunnel->hlen; + else + mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + + if (skb->protocol == htons(ETH_P_IP)) { + if (!skb_is_gso(skb) && + (df & htons(IP_DF)) && mtu < pkt_size) { + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + return -E2BIG; + } + } +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) { + struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); + + if (rt6 && mtu < dst_mtu(skb_dst(skb)) && + mtu >= IPV6_MIN_MTU) { + if ((tunnel->parms.iph.daddr && + !ipv4_is_multicast(tunnel->parms.iph.daddr)) || + rt6->rt6i_dst.plen == 128) { + rt6->rt6i_flags |= RTF_MODIFIED; + dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); + } + } + + if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && + mtu < pkt_size) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + return -E2BIG; + } + } +#endif + return 0; +} + void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, - const struct iphdr *tnl_params) + const struct iphdr *tnl_params, const u8 protocol) { struct ip_tunnel *tunnel = netdev_priv(dev); const struct iphdr *inner_iph; - struct iphdr *iph; struct flowi4 fl4; u8 tos, ttl; __be16 df; struct rtable *rt; /* Route to the other host */ - struct net_device *tdev; /* Device to other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst; - int mtu; + int err; inner_iph = (const struct iphdr *)skb_inner_network_header(skb); - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); dst = tnl_params->daddr; if (dst == 0) { /* NBMA tunnel */ @@ -561,8 +592,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); } - rt = ip_route_output_tunnel(dev_net(dev), &fl4, - tunnel->parms.iph.protocol, + rt = ip_route_output_tunnel(tunnel->net, &fl4, + protocol, dst, tnl_params->saddr, tunnel->parms.o_key, RT_TOS(tos), @@ -571,58 +602,19 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, dev->stats.tx_carrier_errors++; goto tx_error; } - tdev = rt->dst.dev; - - if (tdev == dev) { + if (rt->dst.dev == dev) { ip_rt_put(rt); dev->stats.collisions++; goto tx_error; } - df = tnl_params->frag_off; - - if (df) - mtu = dst_mtu(&rt->dst) - dev->hard_header_len - - sizeof(struct iphdr); - else - mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - - if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - - if (skb->protocol == htons(ETH_P_IP)) { - df |= (inner_iph->frag_off&htons(IP_DF)); - - if (!skb_is_gso(skb) && - (inner_iph->frag_off&htons(IP_DF)) && - mtu < ntohs(inner_iph->tot_len)) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); - ip_rt_put(rt); - goto tx_error; - } + if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { + ip_rt_put(rt); + goto tx_error; } -#if IS_ENABLED(CONFIG_IPV6) - else if (skb->protocol == htons(ETH_P_IPV6)) { - struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); - - if (rt6 && mtu < dst_mtu(skb_dst(skb)) && - mtu >= IPV6_MIN_MTU) { - if ((tunnel->parms.iph.daddr && - !ipv4_is_multicast(tunnel->parms.iph.daddr)) || - rt6->rt6i_dst.plen == 128) { - rt6->rt6i_flags |= RTF_MODIFIED; - dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); - } - } - if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && - mtu < skb->len) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - ip_rt_put(rt); - goto tx_error; - } - } -#endif + if (tunnel->net != dev_net(dev)) + skb_scrub_packet(skb); if (tunnel->err_count > 0) { if (time_before(jiffies, @@ -646,8 +638,12 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ttl = ip4_dst_hoplimit(&rt->dst); } - max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr) - + rt->dst.header_len; + df = tnl_params->frag_off; + if (skb->protocol == htons(ETH_P_IP)) + df |= (inner_iph->frag_off&htons(IP_DF)); + + max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr) + + rt->dst.header_len; if (max_headroom > dev->needed_headroom) { dev->needed_headroom = max_headroom; if (skb_cow_head(skb, dev->needed_headroom)) { @@ -657,27 +653,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, } } - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - - /* Push down and install the IP header. */ - skb_push(skb, sizeof(struct iphdr)); - skb_reset_network_header(skb); - - iph = ip_hdr(skb); - inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + err = iptunnel_xmit(dev_net(dev), rt, skb, + fl4.saddr, fl4.daddr, protocol, + ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df); + iptunnel_xmit_stats(err, &dev->stats, dev->tstats); - iph->version = 4; - iph->ihl = sizeof(struct iphdr) >> 2; - iph->frag_off = df; - iph->protocol = tnl_params->protocol; - iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); - iph->daddr = fl4.daddr; - iph->saddr = fl4.saddr; - iph->ttl = ttl; - tunnel_ip_select_ident(skb, inner_iph, &rt->dst); - - iptunnel_xmit(skb, dev); return; #if IS_ENABLED(CONFIG_IPV6) @@ -926,6 +906,7 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], if (ip_tunnel_find(itn, p, dev->type)) return -EEXIST; + nt->net = net; nt->parms = *p; err = register_netdevice(dev); if (err) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c new file mode 100644 index 000000000000..7167b08977df --- /dev/null +++ b/net/ipv4/ip_tunnel_core.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/in.h> +#include <linux/if_arp.h> +#include <linux/mroute.h> +#include <linux/init.h> +#include <linux/in6.h> +#include <linux/inetdevice.h> +#include <linux/netfilter_ipv4.h> +#include <linux/etherdevice.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> + +#include <net/ip.h> +#include <net/icmp.h> +#include <net/protocol.h> +#include <net/ip_tunnels.h> +#include <net/arp.h> +#include <net/checksum.h> +#include <net/dsfield.h> +#include <net/inet_ecn.h> +#include <net/xfrm.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> +#include <net/rtnetlink.h> + +int iptunnel_xmit(struct net *net, struct rtable *rt, + struct sk_buff *skb, + __be32 src, __be32 dst, __u8 proto, + __u8 tos, __u8 ttl, __be16 df) +{ + int pkt_len = skb->len; + struct iphdr *iph; + int err; + + nf_reset(skb); + secpath_reset(skb); + skb->rxhash = 0; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + + /* Push down and install the IP header. */ + __skb_push(skb, sizeof(struct iphdr)); + skb_reset_network_header(skb); + + iph = ip_hdr(skb); + + iph->version = 4; + iph->ihl = sizeof(struct iphdr) >> 2; + iph->frag_off = df; + iph->protocol = proto; + iph->tos = tos; + iph->daddr = dst; + iph->saddr = src; + iph->ttl = ttl; + tunnel_ip_select_ident(skb, + (const struct iphdr *)skb_inner_network_header(skb), + &rt->dst); + + err = ip_local_out(skb); + if (unlikely(net_xmit_eval(err))) + pkt_len = 0; + return pkt_len; +} +EXPORT_SYMBOL_GPL(iptunnel_xmit); + +int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto) +{ + if (unlikely(!pskb_may_pull(skb, hdr_len))) + return -ENOMEM; + + skb_pull_rcsum(skb, hdr_len); + + if (inner_proto == htons(ETH_P_TEB)) { + struct ethhdr *eh = (struct ethhdr *)skb->data; + + if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) + return -ENOMEM; + + if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN)) + skb->protocol = eh->h_proto; + else + skb->protocol = htons(ETH_P_802_2); + + } else { + skb->protocol = inner_proto; + } + + nf_reset(skb); + secpath_reset(skb); + if (!skb->l4_rxhash) + skb->rxhash = 0; + skb_dst_drop(skb); + skb->vlan_tci = 0; + skb_set_queue_mapping(skb, 0); + skb->pkt_type = PACKET_HOST; + return 0; +} +EXPORT_SYMBOL_GPL(iptunnel_pull_header); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index c118f6b576bb..17cc0ffa8c0d 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -606,17 +606,10 @@ static int __net_init vti_fb_tunnel_init(struct net_device *dev) struct iphdr *iph = &tunnel->parms.iph; struct vti_net *ipn = net_generic(dev_net(dev), vti_net_id); - tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); - iph->version = 4; iph->protocol = IPPROTO_IPIP; iph->ihl = 5; - dev->tstats = alloc_percpu(struct pcpu_tstats); - if (!dev->tstats) - return -ENOMEM; - dev_hold(dev); rcu_assign_pointer(ipn->tunnels_wc[0], tunnel); return 0; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 59cb8c769056..826be4cb482a 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -47,12 +47,9 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { - atomic_inc(&flow_cache_genid); - rt_genid_bump(net); - + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0); - } else + else ipv4_redirect(skb, net, 0, 0, IPPROTO_COMP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 77bfcce64fe5..51fc2a1dcdd3 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -188,8 +188,12 @@ static int ipip_rcv(struct sk_buff *skb) struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); struct ip_tunnel *tunnel; - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; + if (iptunnel_pull_header(skb, 0, tpi.proto)) + goto drop; + + iph = ip_hdr(skb); tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->saddr, iph->daddr, 0); if (tunnel) { @@ -222,7 +226,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) skb->encapsulation = 1; } - ip_tunnel_xmit(skb, dev, tiph); + ip_tunnel_xmit(skb, dev, tiph, tiph->protocol); return NETDEV_TX_OK; tx_error: @@ -240,11 +244,13 @@ ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) return -EFAULT; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) - return -EINVAL; - if (p.i_key || p.o_key || p.i_flags || p.o_flags) - return -EINVAL; + if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || + p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) + return -EINVAL; + } + + p.i_key = p.o_key = p.i_flags = p.o_flags = 0; if (p.iph.ttl) p.iph.frag_off |= htons(IP_DF); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9d9610ae7855..132a09664704 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -980,7 +980,7 @@ static int ipmr_cache_report(struct mr_table *mrt, /* Copy the IP header */ - skb->network_header = skb->tail; + skb_set_network_header(skb, skb->len); skb_put(skb, ihl); skb_copy_to_linear_data(skb, pkt->data, ihl); ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ @@ -1609,7 +1609,7 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct mr_table *mrt; struct vif_device *v; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index e7916c193932..4e9028017428 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -111,7 +111,7 @@ config IP_NF_TARGET_REJECT To compile it as a module, choose M here. If unsure, say N. config IP_NF_TARGET_ULOG - tristate "ULOG target support" + tristate "ULOG target support (obsolete)" default m if NETFILTER_ADVANCED=n ---help--- diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 5d5d4d1be9c2..30e4de940567 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -108,7 +108,7 @@ static int masq_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - const struct net_device *dev = ptr; + const struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); if (event == NETDEV_DOWN) { @@ -129,7 +129,10 @@ static int masq_inet_event(struct notifier_block *this, void *ptr) { struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; - return masq_device_event(this, event, dev); + struct netdev_notifier_info info; + + netdev_notifier_info_init(&info, dev); + return masq_device_event(this, event, &info); } static struct notifier_block masq_dev_notifier = { diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 32b0e978c8e0..cbc22158af49 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -331,6 +331,12 @@ static int ulog_tg_check(const struct xt_tgchk_param *par) { const struct ipt_ulog_info *loginfo = par->targinfo; + if (!par->net->xt.ulog_warn_deprecated) { + pr_info("ULOG is deprecated and it will be removed soon, " + "use NFLOG instead\n"); + par->net->xt.ulog_warn_deprecated = true; + } + if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') { pr_debug("prefix not null-terminated\n"); return -EINVAL; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 567d84168bd2..0a2e0e3e95ba 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -223,7 +223,7 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { static int log_invalid_proto_min = 0; static int log_invalid_proto_max = 255; -static ctl_table ip_ct_sysctl_table[] = { +static struct ctl_table ip_ct_sysctl_table[] = { { .procname = "ip_conntrack_max", .maxlen = sizeof(int), diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 7d93d62cd5fd..746427c9e719 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -33,7 +33,6 @@ #include <linux/netdevice.h> #include <net/snmp.h> #include <net/ip.h> -#include <net/ipv6.h> #include <net/icmp.h> #include <net/protocol.h> #include <linux/skbuff.h> @@ -46,8 +45,18 @@ #include <net/inet_common.h> #include <net/checksum.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <linux/in6.h> +#include <linux/icmpv6.h> +#include <net/addrconf.h> +#include <net/ipv6.h> +#include <net/transp_v6.h> +#endif -static struct ping_table ping_table; + +struct ping_table ping_table; +struct pingv6_ops pingv6_ops; +EXPORT_SYMBOL_GPL(pingv6_ops); static u16 ping_port_rover; @@ -58,6 +67,7 @@ static inline int ping_hashfn(struct net *net, unsigned int num, unsigned int ma pr_debug("hash(%d) = %d\n", num, res); return res; } +EXPORT_SYMBOL_GPL(ping_hash); static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, struct net *net, unsigned int num) @@ -65,7 +75,7 @@ static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table, return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)]; } -static int ping_v4_get_port(struct sock *sk, unsigned short ident) +int ping_get_port(struct sock *sk, unsigned short ident) { struct hlist_nulls_node *node; struct hlist_nulls_head *hlist; @@ -103,6 +113,10 @@ next_port: ping_portaddr_for_each_entry(sk2, node, hlist) { isk2 = inet_sk(sk2); + /* BUG? Why is this reuse and not reuseaddr? ping.c + * doesn't turn off SO_REUSEADDR, and it doesn't expect + * that other ping processes can steal its packets. + */ if ((isk2->inet_num == ident) && (sk2 != sk) && (!sk2->sk_reuse || !sk->sk_reuse)) @@ -125,17 +139,18 @@ fail: write_unlock_bh(&ping_table.lock); return 1; } +EXPORT_SYMBOL_GPL(ping_get_port); -static void ping_v4_hash(struct sock *sk) +void ping_hash(struct sock *sk) { - pr_debug("ping_v4_hash(sk->port=%u)\n", inet_sk(sk)->inet_num); + pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num); BUG(); /* "Please do not press this button again." */ } -static void ping_v4_unhash(struct sock *sk) +void ping_unhash(struct sock *sk) { struct inet_sock *isk = inet_sk(sk); - pr_debug("ping_v4_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); + pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); if (sk_hashed(sk)) { write_lock_bh(&ping_table.lock); hlist_nulls_del(&sk->sk_nulls_node); @@ -146,31 +161,61 @@ static void ping_v4_unhash(struct sock *sk) write_unlock_bh(&ping_table.lock); } } +EXPORT_SYMBOL_GPL(ping_unhash); -static struct sock *ping_v4_lookup(struct net *net, __be32 saddr, __be32 daddr, - u16 ident, int dif) +static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) { struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident); struct sock *sk = NULL; struct inet_sock *isk; struct hlist_nulls_node *hnode; + int dif = skb->dev->ifindex; + + if (skb->protocol == htons(ETH_P_IP)) { + pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n", + (int)ident, &ip_hdr(skb)->daddr, dif); +#if IS_ENABLED(CONFIG_IPV6) + } else if (skb->protocol == htons(ETH_P_IPV6)) { + pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n", + (int)ident, &ipv6_hdr(skb)->daddr, dif); +#endif + } - pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n", - (int)ident, &daddr, dif); read_lock_bh(&ping_table.lock); ping_portaddr_for_each_entry(sk, hnode, hslot) { isk = inet_sk(sk); - pr_debug("found: %p: num = %d, daddr = %pI4, dif = %d\n", sk, - (int)isk->inet_num, &isk->inet_rcv_saddr, - sk->sk_bound_dev_if); - pr_debug("iterate\n"); if (isk->inet_num != ident) continue; - if (isk->inet_rcv_saddr && isk->inet_rcv_saddr != daddr) - continue; + + if (skb->protocol == htons(ETH_P_IP) && + sk->sk_family == AF_INET) { + pr_debug("found: %p: num=%d, daddr=%pI4, dif=%d\n", sk, + (int) isk->inet_num, &isk->inet_rcv_saddr, + sk->sk_bound_dev_if); + + if (isk->inet_rcv_saddr && + isk->inet_rcv_saddr != ip_hdr(skb)->daddr) + continue; +#if IS_ENABLED(CONFIG_IPV6) + } else if (skb->protocol == htons(ETH_P_IPV6) && + sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk, + (int) isk->inet_num, + &inet6_sk(sk)->rcv_saddr, + sk->sk_bound_dev_if); + + if (!ipv6_addr_any(&np->rcv_saddr) && + !ipv6_addr_equal(&np->rcv_saddr, + &ipv6_hdr(skb)->daddr)) + continue; +#endif + } + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) continue; @@ -200,7 +245,7 @@ static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, } -static int ping_init_sock(struct sock *sk) +int ping_init_sock(struct sock *sk) { struct net *net = sock_net(sk); kgid_t group = current_egid(); @@ -225,8 +270,9 @@ static int ping_init_sock(struct sock *sk) return -EACCES; } +EXPORT_SYMBOL_GPL(ping_init_sock); -static void ping_close(struct sock *sk, long timeout) +void ping_close(struct sock *sk, long timeout) { pr_debug("ping_close(sk=%p,sk->num=%u)\n", inet_sk(sk), inet_sk(sk)->inet_num); @@ -234,36 +280,122 @@ static void ping_close(struct sock *sk, long timeout) sk_common_release(sk); } +EXPORT_SYMBOL_GPL(ping_close); + +/* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */ +static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, + struct sockaddr *uaddr, int addr_len) { + struct net *net = sock_net(sk); + if (sk->sk_family == AF_INET) { + struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; + int chk_addr_ret; + + if (addr_len < sizeof(*addr)) + return -EINVAL; + + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n", + sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port)); + + chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr); + + if (addr->sin_addr.s_addr == htonl(INADDR_ANY)) + chk_addr_ret = RTN_LOCAL; + + if ((sysctl_ip_nonlocal_bind == 0 && + isk->freebind == 0 && isk->transparent == 0 && + chk_addr_ret != RTN_LOCAL) || + chk_addr_ret == RTN_MULTICAST || + chk_addr_ret == RTN_BROADCAST) + return -EADDRNOTAVAIL; + +#if IS_ENABLED(CONFIG_IPV6) + } else if (sk->sk_family == AF_INET6) { + struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr; + int addr_type, scoped, has_addr; + struct net_device *dev = NULL; + + if (addr_len < sizeof(*addr)) + return -EINVAL; + + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n", + sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port)); + + addr_type = ipv6_addr_type(&addr->sin6_addr); + scoped = __ipv6_addr_needs_scope_id(addr_type); + if ((addr_type != IPV6_ADDR_ANY && + !(addr_type & IPV6_ADDR_UNICAST)) || + (scoped && !addr->sin6_scope_id)) + return -EINVAL; + + rcu_read_lock(); + if (addr->sin6_scope_id) { + dev = dev_get_by_index_rcu(net, addr->sin6_scope_id); + if (!dev) { + rcu_read_unlock(); + return -ENODEV; + } + } + has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev, + scoped); + rcu_read_unlock(); + + if (!(isk->freebind || isk->transparent || has_addr || + addr_type == IPV6_ADDR_ANY)) + return -EADDRNOTAVAIL; + + if (scoped) + sk->sk_bound_dev_if = addr->sin6_scope_id; +#endif + } else { + return -EAFNOSUPPORT; + } + return 0; +} + +static void ping_set_saddr(struct sock *sk, struct sockaddr *saddr) +{ + if (saddr->sa_family == AF_INET) { + struct inet_sock *isk = inet_sk(sk); + struct sockaddr_in *addr = (struct sockaddr_in *) saddr; + isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr; +#if IS_ENABLED(CONFIG_IPV6) + } else if (saddr->sa_family == AF_INET6) { + struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr; + struct ipv6_pinfo *np = inet6_sk(sk); + np->rcv_saddr = np->saddr = addr->sin6_addr; +#endif + } +} +static void ping_clear_saddr(struct sock *sk, int dif) +{ + sk->sk_bound_dev_if = dif; + if (sk->sk_family == AF_INET) { + struct inet_sock *isk = inet_sk(sk); + isk->inet_rcv_saddr = isk->inet_saddr = 0; +#if IS_ENABLED(CONFIG_IPV6) + } else if (sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + memset(&np->rcv_saddr, 0, sizeof(np->rcv_saddr)); + memset(&np->saddr, 0, sizeof(np->saddr)); +#endif + } +} /* * We need our own bind because there are no privileged id's == local ports. * Moreover, we don't allow binding to multi- and broadcast addresses. */ -static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) +int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { - struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; struct inet_sock *isk = inet_sk(sk); unsigned short snum; - int chk_addr_ret; int err; + int dif = sk->sk_bound_dev_if; - if (addr_len < sizeof(struct sockaddr_in)) - return -EINVAL; - - pr_debug("ping_v4_bind(sk=%p,sa_addr=%08x,sa_port=%d)\n", - sk, addr->sin_addr.s_addr, ntohs(addr->sin_port)); - - chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr); - if (addr->sin_addr.s_addr == htonl(INADDR_ANY)) - chk_addr_ret = RTN_LOCAL; - - if ((sysctl_ip_nonlocal_bind == 0 && - isk->freebind == 0 && isk->transparent == 0 && - chk_addr_ret != RTN_LOCAL) || - chk_addr_ret == RTN_MULTICAST || - chk_addr_ret == RTN_BROADCAST) - return -EADDRNOTAVAIL; + err = ping_check_bind_addr(sk, isk, uaddr, addr_len); + if (err) + return err; lock_sock(sk); @@ -272,42 +404,50 @@ static int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) goto out; err = -EADDRINUSE; - isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr; - snum = ntohs(addr->sin_port); - if (ping_v4_get_port(sk, snum) != 0) { - isk->inet_saddr = isk->inet_rcv_saddr = 0; + ping_set_saddr(sk, uaddr); + snum = ntohs(((struct sockaddr_in *)uaddr)->sin_port); + if (ping_get_port(sk, snum) != 0) { + ping_clear_saddr(sk, dif); goto out; } - pr_debug("after bind(): num = %d, daddr = %pI4, dif = %d\n", + pr_debug("after bind(): num = %d, dif = %d\n", (int)isk->inet_num, - &isk->inet_rcv_saddr, (int)sk->sk_bound_dev_if); err = 0; - if (isk->inet_rcv_saddr) + if ((sk->sk_family == AF_INET && isk->inet_rcv_saddr) || + (sk->sk_family == AF_INET6 && + !ipv6_addr_any(&inet6_sk(sk)->rcv_saddr))) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; + if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; isk->inet_sport = htons(isk->inet_num); isk->inet_daddr = 0; isk->inet_dport = 0; + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + memset(&inet6_sk(sk)->daddr, 0, sizeof(inet6_sk(sk)->daddr)); +#endif + sk_dst_reset(sk); out: release_sock(sk); pr_debug("ping_v4_bind -> %d\n", err); return err; } +EXPORT_SYMBOL_GPL(ping_bind); /* * Is this a supported type of ICMP message? */ -static inline int ping_supported(int type, int code) +static inline int ping_supported(int family, int type, int code) { - if (type == ICMP_ECHO && code == 0) - return 1; - return 0; + return (family == AF_INET && type == ICMP_ECHO && code == 0) || + (family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0); } /* @@ -315,30 +455,42 @@ static inline int ping_supported(int type, int code) * sort of error condition. */ -static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); - -void ping_err(struct sk_buff *skb, u32 info) +void ping_err(struct sk_buff *skb, int offset, u32 info) { - struct iphdr *iph = (struct iphdr *)skb->data; - struct icmphdr *icmph = (struct icmphdr *)(skb->data+(iph->ihl<<2)); + int family; + struct icmphdr *icmph; struct inet_sock *inet_sock; - int type = icmp_hdr(skb)->type; - int code = icmp_hdr(skb)->code; + int type; + int code; struct net *net = dev_net(skb->dev); struct sock *sk; int harderr; int err; + if (skb->protocol == htons(ETH_P_IP)) { + family = AF_INET; + type = icmp_hdr(skb)->type; + code = icmp_hdr(skb)->code; + icmph = (struct icmphdr *)(skb->data + offset); + } else if (skb->protocol == htons(ETH_P_IPV6)) { + family = AF_INET6; + type = icmp6_hdr(skb)->icmp6_type; + code = icmp6_hdr(skb)->icmp6_code; + icmph = (struct icmphdr *) (skb->data + offset); + } else { + BUG(); + } + /* We assume the packet has already been checked by icmp_unreach */ - if (!ping_supported(icmph->type, icmph->code)) + if (!ping_supported(family, icmph->type, icmph->code)) return; - pr_debug("ping_err(type=%04x,code=%04x,id=%04x,seq=%04x)\n", type, - code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence)); + pr_debug("ping_err(proto=0x%x,type=%d,code=%d,id=%04x,seq=%04x)\n", + skb->protocol, type, code, ntohs(icmph->un.echo.id), + ntohs(icmph->un.echo.sequence)); - sk = ping_v4_lookup(net, iph->daddr, iph->saddr, - ntohs(icmph->un.echo.id), skb->dev->ifindex); + sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); if (sk == NULL) { pr_debug("no socket, dropping\n"); return; /* No socket for error */ @@ -349,72 +501,83 @@ void ping_err(struct sk_buff *skb, u32 info) harderr = 0; inet_sock = inet_sk(sk); - switch (type) { - default: - case ICMP_TIME_EXCEEDED: - err = EHOSTUNREACH; - break; - case ICMP_SOURCE_QUENCH: - /* This is not a real error but ping wants to see it. - * Report it with some fake errno. */ - err = EREMOTEIO; - break; - case ICMP_PARAMETERPROB: - err = EPROTO; - harderr = 1; - break; - case ICMP_DEST_UNREACH: - if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ - ipv4_sk_update_pmtu(skb, sk, info); - if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { - err = EMSGSIZE; - harderr = 1; - break; + if (skb->protocol == htons(ETH_P_IP)) { + switch (type) { + default: + case ICMP_TIME_EXCEEDED: + err = EHOSTUNREACH; + break; + case ICMP_SOURCE_QUENCH: + /* This is not a real error but ping wants to see it. + * Report it with some fake errno. + */ + err = EREMOTEIO; + break; + case ICMP_PARAMETERPROB: + err = EPROTO; + harderr = 1; + break; + case ICMP_DEST_UNREACH: + if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ + ipv4_sk_update_pmtu(skb, sk, info); + if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { + err = EMSGSIZE; + harderr = 1; + break; + } + goto out; } - goto out; - } - err = EHOSTUNREACH; - if (code <= NR_ICMP_UNREACH) { - harderr = icmp_err_convert[code].fatal; - err = icmp_err_convert[code].errno; + err = EHOSTUNREACH; + if (code <= NR_ICMP_UNREACH) { + harderr = icmp_err_convert[code].fatal; + err = icmp_err_convert[code].errno; + } + break; + case ICMP_REDIRECT: + /* See ICMP_SOURCE_QUENCH */ + ipv4_sk_redirect(skb, sk); + err = EREMOTEIO; + break; } - break; - case ICMP_REDIRECT: - /* See ICMP_SOURCE_QUENCH */ - ipv4_sk_redirect(skb, sk); - err = EREMOTEIO; - break; +#if IS_ENABLED(CONFIG_IPV6) + } else if (skb->protocol == htons(ETH_P_IPV6)) { + harderr = pingv6_ops.icmpv6_err_convert(type, code, &err); +#endif } /* * RFC1122: OK. Passes ICMP errors back to application, as per * 4.1.3.3. */ - if (!inet_sock->recverr) { + if ((family == AF_INET && !inet_sock->recverr) || + (family == AF_INET6 && !inet6_sk(sk)->recverr)) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { - ip_icmp_error(sk, skb, err, 0 /* no remote port */, - info, (u8 *)icmph); + if (family == AF_INET) { + ip_icmp_error(sk, skb, err, 0 /* no remote port */, + info, (u8 *)icmph); +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + pingv6_ops.ipv6_icmp_error(sk, skb, err, 0, + info, (u8 *)icmph); +#endif + } } sk->sk_err = err; sk->sk_error_report(sk); out: sock_put(sk); } +EXPORT_SYMBOL_GPL(ping_err); /* - * Copy and checksum an ICMP Echo packet from user space into a buffer. + * Copy and checksum an ICMP Echo packet from user space into a buffer + * starting from the payload. */ -struct pingfakehdr { - struct icmphdr icmph; - struct iovec *iov; - __wsum wcheck; -}; - -static int ping_getfrag(void *from, char *to, - int offset, int fraglen, int odd, struct sk_buff *skb) +int ping_getfrag(void *from, char *to, + int offset, int fraglen, int odd, struct sk_buff *skb) { struct pingfakehdr *pfh = (struct pingfakehdr *)from; @@ -425,20 +588,33 @@ static int ping_getfrag(void *from, char *to, pfh->iov, 0, fraglen - sizeof(struct icmphdr), &pfh->wcheck)) return -EFAULT; + } else if (offset < sizeof(struct icmphdr)) { + BUG(); + } else { + if (csum_partial_copy_fromiovecend + (to, pfh->iov, offset - sizeof(struct icmphdr), + fraglen, &pfh->wcheck)) + return -EFAULT; + } - return 0; +#if IS_ENABLED(CONFIG_IPV6) + /* For IPv6, checksum each skb as we go along, as expected by + * icmpv6_push_pending_frames. For IPv4, accumulate the checksum in + * wcheck, it will be finalized in ping_v4_push_pending_frames. + */ + if (pfh->family == AF_INET6) { + skb->csum = pfh->wcheck; + skb->ip_summed = CHECKSUM_NONE; + pfh->wcheck = 0; } - if (offset < sizeof(struct icmphdr)) - BUG(); - if (csum_partial_copy_fromiovecend - (to, pfh->iov, offset - sizeof(struct icmphdr), - fraglen, &pfh->wcheck)) - return -EFAULT; +#endif + return 0; } +EXPORT_SYMBOL_GPL(ping_getfrag); -static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, - struct flowi4 *fl4) +static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, + struct flowi4 *fl4) { struct sk_buff *skb = skb_peek(&sk->sk_write_queue); @@ -450,24 +626,9 @@ static int ping_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, return ip_push_pending_frames(sk, fl4); } -static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) -{ - struct net *net = sock_net(sk); - struct flowi4 fl4; - struct inet_sock *inet = inet_sk(sk); - struct ipcm_cookie ipc; - struct icmphdr user_icmph; - struct pingfakehdr pfh; - struct rtable *rt = NULL; - struct ip_options_data opt_copy; - int free = 0; - __be32 saddr, daddr, faddr; - u8 tos; - int err; - - pr_debug("ping_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); - +int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, + void *user_icmph, size_t icmph_len) { + u8 type, code; if (len > 0xFFFF) return -EMSGSIZE; @@ -482,15 +643,53 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, /* * Fetch the ICMP header provided by the userland. - * iovec is modified! + * iovec is modified! The ICMP header is consumed. */ - - if (memcpy_fromiovec((u8 *)&user_icmph, msg->msg_iov, - sizeof(struct icmphdr))) + if (memcpy_fromiovec(user_icmph, msg->msg_iov, icmph_len)) return -EFAULT; - if (!ping_supported(user_icmph.type, user_icmph.code)) + + if (family == AF_INET) { + type = ((struct icmphdr *) user_icmph)->type; + code = ((struct icmphdr *) user_icmph)->code; +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + type = ((struct icmp6hdr *) user_icmph)->icmp6_type; + code = ((struct icmp6hdr *) user_icmph)->icmp6_code; +#endif + } else { + BUG(); + } + + if (!ping_supported(family, type, code)) return -EINVAL; + return 0; +} +EXPORT_SYMBOL_GPL(ping_common_sendmsg); + +int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) +{ + struct net *net = sock_net(sk); + struct flowi4 fl4; + struct inet_sock *inet = inet_sk(sk); + struct ipcm_cookie ipc; + struct icmphdr user_icmph; + struct pingfakehdr pfh; + struct rtable *rt = NULL; + struct ip_options_data opt_copy; + int free = 0; + __be32 saddr, daddr, faddr; + u8 tos; + int err; + + pr_debug("ping_v4_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); + + err = ping_common_sendmsg(AF_INET, msg, len, &user_icmph, + sizeof(user_icmph)); + if (err) + return err; + /* * Get and verify the address. */ @@ -595,13 +794,14 @@ back_from_confirm: pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence; pfh.iov = msg->msg_iov; pfh.wcheck = 0; + pfh.family = AF_INET; err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len, 0, &ipc, &rt, msg->msg_flags); if (err) ip_flush_pending_frames(sk); else - err = ping_push_pending_frames(sk, &pfh, &fl4); + err = ping_v4_push_pending_frames(sk, &pfh, &fl4); release_sock(sk); out: @@ -622,11 +822,13 @@ do_confirm: goto out; } -static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) +int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len) { struct inet_sock *isk = inet_sk(sk); - struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; + int family = sk->sk_family; + struct sockaddr_in *sin; + struct sockaddr_in6 *sin6; struct sk_buff *skb; int copied, err; @@ -636,11 +838,22 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_OOB) goto out; - if (addr_len) - *addr_len = sizeof(*sin); + if (addr_len) { + if (family == AF_INET) + *addr_len = sizeof(*sin); + else if (family == AF_INET6 && addr_len) + *addr_len = sizeof(*sin6); + } - if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len); + if (flags & MSG_ERRQUEUE) { + if (family == AF_INET) { + return ip_recv_error(sk, msg, len); +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + return pingv6_ops.ipv6_recv_error(sk, msg, len); +#endif + } + } skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) @@ -659,15 +872,40 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sock_recv_timestamp(msg, sk, skb); - /* Copy the address. */ - if (sin) { + /* Copy the address and add cmsg data. */ + if (family == AF_INET) { + sin = (struct sockaddr_in *) msg->msg_name; sin->sin_family = AF_INET; sin->sin_port = 0 /* skb->h.uh->source */; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + + if (isk->cmsg_flags) + ip_cmsg_recv(msg, skb); + +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6hdr *ip6 = ipv6_hdr(skb); + sin6 = (struct sockaddr_in6 *) msg->msg_name; + sin6->sin6_family = AF_INET6; + sin6->sin6_port = 0; + sin6->sin6_addr = ip6->saddr; + + sin6->sin6_flowinfo = 0; + if (np->sndflow) + sin6->sin6_flowinfo = ip6_flowinfo(ip6); + + sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, + IP6CB(skb)->iif); + + if (inet6_sk(sk)->rxopt.all) + pingv6_ops.ip6_datagram_recv_ctl(sk, msg, skb); +#endif + } else { + BUG(); } - if (isk->cmsg_flags) - ip_cmsg_recv(msg, skb); + err = copied; done: @@ -676,8 +914,9 @@ out: pr_debug("ping_recvmsg -> %d\n", err); return err; } +EXPORT_SYMBOL_GPL(ping_recvmsg); -static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n", inet_sk(sk), inet_sk(sk)->inet_num, skb); @@ -688,6 +927,7 @@ static int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } return 0; } +EXPORT_SYMBOL_GPL(ping_queue_rcv_skb); /* @@ -698,10 +938,7 @@ void ping_rcv(struct sk_buff *skb) { struct sock *sk; struct net *net = dev_net(skb->dev); - struct iphdr *iph = ip_hdr(skb); struct icmphdr *icmph = icmp_hdr(skb); - __be32 saddr = iph->saddr; - __be32 daddr = iph->daddr; /* We assume the packet has already been checked by icmp_rcv */ @@ -711,8 +948,7 @@ void ping_rcv(struct sk_buff *skb) /* Push ICMP header back */ skb_push(skb, skb->data - (u8 *)icmph); - sk = ping_v4_lookup(net, saddr, daddr, ntohs(icmph->un.echo.id), - skb->dev->ifindex); + sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); if (sk != NULL) { pr_debug("rcv on socket %p\n", sk); ping_queue_rcv_skb(sk, skb_get(skb)); @@ -723,6 +959,7 @@ void ping_rcv(struct sk_buff *skb) /* We're called from icmp_rcv(). kfree_skb() is done there. */ } +EXPORT_SYMBOL_GPL(ping_rcv); struct proto ping_prot = { .name = "PING", @@ -733,14 +970,14 @@ struct proto ping_prot = { .disconnect = udp_disconnect, .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, - .sendmsg = ping_sendmsg, + .sendmsg = ping_v4_sendmsg, .recvmsg = ping_recvmsg, .bind = ping_bind, .backlog_rcv = ping_queue_rcv_skb, .release_cb = ip4_datagram_release_cb, - .hash = ping_v4_hash, - .unhash = ping_v4_unhash, - .get_port = ping_v4_get_port, + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, .obj_size = sizeof(struct inet_sock), }; EXPORT_SYMBOL(ping_prot); @@ -764,7 +1001,8 @@ static struct sock *ping_get_first(struct seq_file *seq, int start) continue; sk_nulls_for_each(sk, node, hslot) { - if (net_eq(sock_net(sk), net)) + if (net_eq(sock_net(sk), net) && + sk->sk_family == state->family) goto found; } } @@ -797,17 +1035,24 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos) return pos ? NULL : sk; } -static void *ping_seq_start(struct seq_file *seq, loff_t *pos) +void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family) { struct ping_iter_state *state = seq->private; state->bucket = 0; + state->family = family; read_lock_bh(&ping_table.lock); return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } +EXPORT_SYMBOL_GPL(ping_seq_start); + +static void *ping_v4_seq_start(struct seq_file *seq, loff_t *pos) +{ + return ping_seq_start(seq, pos, AF_INET); +} -static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) +void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sock *sk; @@ -819,13 +1064,15 @@ static void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; return sk; } +EXPORT_SYMBOL_GPL(ping_seq_next); -static void ping_seq_stop(struct seq_file *seq, void *v) +void ping_seq_stop(struct seq_file *seq, void *v) { read_unlock_bh(&ping_table.lock); } +EXPORT_SYMBOL_GPL(ping_seq_stop); -static void ping_format_sock(struct sock *sp, struct seq_file *f, +static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, int bucket, int *len) { struct inet_sock *inet = inet_sk(sp); @@ -846,7 +1093,7 @@ static void ping_format_sock(struct sock *sp, struct seq_file *f, atomic_read(&sp->sk_drops), len); } -static int ping_seq_show(struct seq_file *seq, void *v) +static int ping_v4_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_printf(seq, "%-127s\n", @@ -857,72 +1104,86 @@ static int ping_seq_show(struct seq_file *seq, void *v) struct ping_iter_state *state = seq->private; int len; - ping_format_sock(v, seq, state->bucket, &len); + ping_v4_format_sock(v, seq, state->bucket, &len); seq_printf(seq, "%*s\n", 127 - len, ""); } return 0; } -static const struct seq_operations ping_seq_ops = { - .show = ping_seq_show, - .start = ping_seq_start, +static const struct seq_operations ping_v4_seq_ops = { + .show = ping_v4_seq_show, + .start = ping_v4_seq_start, .next = ping_seq_next, .stop = ping_seq_stop, }; static int ping_seq_open(struct inode *inode, struct file *file) { - return seq_open_net(inode, file, &ping_seq_ops, + struct ping_seq_afinfo *afinfo = PDE_DATA(inode); + return seq_open_net(inode, file, &afinfo->seq_ops, sizeof(struct ping_iter_state)); } -static const struct file_operations ping_seq_fops = { +const struct file_operations ping_seq_fops = { .open = ping_seq_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release_net, }; +EXPORT_SYMBOL_GPL(ping_seq_fops); + +static struct ping_seq_afinfo ping_v4_seq_afinfo = { + .name = "icmp", + .family = AF_INET, + .seq_fops = &ping_seq_fops, + .seq_ops = { + .start = ping_v4_seq_start, + .show = ping_v4_seq_show, + .next = ping_seq_next, + .stop = ping_seq_stop, + }, +}; -static int ping_proc_register(struct net *net) +int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo) { struct proc_dir_entry *p; - int rc = 0; - - p = proc_create("icmp", S_IRUGO, net->proc_net, &ping_seq_fops); + p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net, + afinfo->seq_fops, afinfo); if (!p) - rc = -ENOMEM; - return rc; + return -ENOMEM; + return 0; } +EXPORT_SYMBOL_GPL(ping_proc_register); -static void ping_proc_unregister(struct net *net) +void ping_proc_unregister(struct net *net, struct ping_seq_afinfo *afinfo) { - remove_proc_entry("icmp", net->proc_net); + remove_proc_entry(afinfo->name, net->proc_net); } +EXPORT_SYMBOL_GPL(ping_proc_unregister); - -static int __net_init ping_proc_init_net(struct net *net) +static int __net_init ping_v4_proc_init_net(struct net *net) { - return ping_proc_register(net); + return ping_proc_register(net, &ping_v4_seq_afinfo); } -static void __net_exit ping_proc_exit_net(struct net *net) +static void __net_exit ping_v4_proc_exit_net(struct net *net) { - ping_proc_unregister(net); + ping_proc_unregister(net, &ping_v4_seq_afinfo); } -static struct pernet_operations ping_net_ops = { - .init = ping_proc_init_net, - .exit = ping_proc_exit_net, +static struct pernet_operations ping_v4_net_ops = { + .init = ping_v4_proc_init_net, + .exit = ping_v4_proc_exit_net, }; int __init ping_proc_init(void) { - return register_pernet_subsys(&ping_net_ops); + return register_pernet_subsys(&ping_v4_net_ops); } void ping_proc_exit(void) { - unregister_pernet_subsys(&ping_net_ops); + unregister_pernet_subsys(&ping_v4_net_ops); } #endif diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 2a5bf86d2415..6577a1149a47 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -273,6 +273,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW), SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES), + SNMP_MIB_ITEM("LowLatencyRxPackets", LINUX_MIB_LOWLATENCYRXPACKETS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d35bbf0cf404..a9a54a236832 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -565,10 +565,25 @@ static inline void rt_free(struct rtable *rt) static DEFINE_SPINLOCK(fnhe_lock); +static void fnhe_flush_routes(struct fib_nh_exception *fnhe) +{ + struct rtable *rt; + + rt = rcu_dereference(fnhe->fnhe_rth_input); + if (rt) { + RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL); + rt_free(rt); + } + rt = rcu_dereference(fnhe->fnhe_rth_output); + if (rt) { + RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL); + rt_free(rt); + } +} + static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) { struct fib_nh_exception *fnhe, *oldest; - struct rtable *orig; oldest = rcu_dereference(hash->chain); for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; @@ -576,11 +591,7 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) oldest = fnhe; } - orig = rcu_dereference(oldest->fnhe_rth); - if (orig) { - RCU_INIT_POINTER(oldest->fnhe_rth, NULL); - rt_free(orig); - } + fnhe_flush_routes(oldest); return oldest; } @@ -594,11 +605,25 @@ static inline u32 fnhe_hashfun(__be32 daddr) return hval & (FNHE_HASH_SIZE - 1); } +static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) +{ + rt->rt_pmtu = fnhe->fnhe_pmtu; + rt->dst.expires = fnhe->fnhe_expires; + + if (fnhe->fnhe_gw) { + rt->rt_flags |= RTCF_REDIRECTED; + rt->rt_gateway = fnhe->fnhe_gw; + rt->rt_uses_gateway = 1; + } +} + static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, u32 pmtu, unsigned long expires) { struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; + struct rtable *rt; + unsigned int i; int depth; u32 hval = fnhe_hashfun(daddr); @@ -627,8 +652,15 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_gw = gw; if (pmtu) { fnhe->fnhe_pmtu = pmtu; - fnhe->fnhe_expires = expires; + fnhe->fnhe_expires = max(1UL, expires); } + /* Update all cached dsts too */ + rt = rcu_dereference(fnhe->fnhe_rth_input); + if (rt) + fill_route_from_fnhe(rt, fnhe); + rt = rcu_dereference(fnhe->fnhe_rth_output); + if (rt) + fill_route_from_fnhe(rt, fnhe); } else { if (depth > FNHE_RECLAIM_DEPTH) fnhe = fnhe_oldest(hash); @@ -640,10 +672,27 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_next = hash->chain; rcu_assign_pointer(hash->chain, fnhe); } + fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev)); fnhe->fnhe_daddr = daddr; fnhe->fnhe_gw = gw; fnhe->fnhe_pmtu = pmtu; fnhe->fnhe_expires = expires; + + /* Exception created; mark the cached routes for the nexthop + * stale, so anyone caching it rechecks if this exception + * applies to them. + */ + rt = rcu_dereference(nh->nh_rth_input); + if (rt) + rt->dst.obsolete = DST_OBSOLETE_KILL; + + for_each_possible_cpu(i) { + struct rtable __rcu **prt; + prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i); + rt = rcu_dereference(*prt); + if (rt) + rt->dst.obsolete = DST_OBSOLETE_KILL; + } } fnhe->fnhe_stamp = jiffies; @@ -922,12 +971,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; - if (!rt->rt_pmtu) { - dst->obsolete = DST_OBSOLETE_KILL; - } else { - rt->rt_pmtu = mtu; - dst->expires = max(1UL, jiffies + ip_rt_mtu_expires); - } + if (rt->rt_pmtu == mtu && + time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2)) + return; rcu_read_lock(); if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) { @@ -1068,11 +1114,11 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) * DST_OBSOLETE_FORCE_CHK which forces validation calls down * into this function always. * - * When a PMTU/redirect information update invalidates a - * route, this is indicated by setting obsolete to - * DST_OBSOLETE_KILL. + * When a PMTU/redirect information update invalidates a route, + * this is indicated by setting obsolete to DST_OBSOLETE_KILL or + * DST_OBSOLETE_DEAD by dst_free(). */ - if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt)) + if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt)) return NULL; return dst; } @@ -1214,34 +1260,36 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, spin_lock_bh(&fnhe_lock); if (daddr == fnhe->fnhe_daddr) { - struct rtable *orig = rcu_dereference(fnhe->fnhe_rth); - if (orig && rt_is_expired(orig)) { + struct rtable __rcu **porig; + struct rtable *orig; + int genid = fnhe_genid(dev_net(rt->dst.dev)); + + if (rt_is_input_route(rt)) + porig = &fnhe->fnhe_rth_input; + else + porig = &fnhe->fnhe_rth_output; + orig = rcu_dereference(*porig); + + if (fnhe->fnhe_genid != genid) { + fnhe->fnhe_genid = genid; fnhe->fnhe_gw = 0; fnhe->fnhe_pmtu = 0; fnhe->fnhe_expires = 0; + fnhe_flush_routes(fnhe); + orig = NULL; } - if (fnhe->fnhe_pmtu) { - unsigned long expires = fnhe->fnhe_expires; - unsigned long diff = expires - jiffies; - - if (time_before(jiffies, expires)) { - rt->rt_pmtu = fnhe->fnhe_pmtu; - dst_set_expires(&rt->dst, diff); - } - } - if (fnhe->fnhe_gw) { - rt->rt_flags |= RTCF_REDIRECTED; - rt->rt_gateway = fnhe->fnhe_gw; - rt->rt_uses_gateway = 1; - } else if (!rt->rt_gateway) + fill_route_from_fnhe(rt, fnhe); + if (!rt->rt_gateway) rt->rt_gateway = daddr; - rcu_assign_pointer(fnhe->fnhe_rth, rt); - if (orig) - rt_free(orig); + if (!(rt->dst.flags & DST_NOCACHE)) { + rcu_assign_pointer(*porig, rt); + if (orig) + rt_free(orig); + ret = true; + } fnhe->fnhe_stamp = jiffies; - ret = true; } spin_unlock_bh(&fnhe_lock); @@ -1473,6 +1521,7 @@ static int __mkroute_input(struct sk_buff *skb, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos) { + struct fib_nh_exception *fnhe; struct rtable *rth; int err; struct in_device *out_dev; @@ -1519,8 +1568,13 @@ static int __mkroute_input(struct sk_buff *skb, } } + fnhe = find_exception(&FIB_RES_NH(*res), daddr); if (do_cache) { - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + if (fnhe != NULL) + rth = rcu_dereference(fnhe->fnhe_rth_input); + else + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); goto out; @@ -1548,7 +1602,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->dst.input = ip_forward; rth->dst.output = ip_output; - rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag); + rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag); skb_dst_set(skb, &rth->dst); out: err = 0; @@ -1863,7 +1917,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fnhe = find_exception(nh, fl4->daddr); if (fnhe) - prth = &fnhe->fnhe_rth; + prth = &fnhe->fnhe_rth_output; else { if (unlikely(fl4->flowi4_flags & FLOWI_FLAG_KNOWN_NH && @@ -2429,19 +2483,22 @@ static int ip_rt_gc_interval __read_mostly = 60 * HZ; static int ip_rt_gc_min_interval __read_mostly = HZ / 2; static int ip_rt_gc_elasticity __read_mostly = 8; -static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, +static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct net *net = (struct net *)__ctl->extra1; + if (write) { - rt_cache_flush((struct net *)__ctl->extra1); + rt_cache_flush(net); + fnhe_genid_bump(net); return 0; } return -EINVAL; } -static ctl_table ipv4_route_table[] = { +static struct ctl_table ipv4_route_table[] = { { .procname = "gc_thresh", .data = &ipv4_dst_ops.gc_thresh, @@ -2609,6 +2666,7 @@ static __net_initdata struct pernet_operations sysctl_route_ops = { static __net_init int rt_genid_init(struct net *net) { atomic_set(&net->rt_genid, 0); + atomic_set(&net->fnhe_genid, 0); get_random_bytes(&net->ipv4.dev_addr_genid, sizeof(net->ipv4.dev_addr_genid)); return 0; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index fa2f63fc453b..610e324348d1 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -36,6 +36,8 @@ static int tcp_adv_win_scale_min = -31; static int tcp_adv_win_scale_max = 31; static int ip_ttl_min = 1; static int ip_ttl_max = 255; +static int tcp_syn_retries_min = 1; +static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; @@ -49,13 +51,13 @@ static void set_local_port_range(int range[2]) } /* Validate changes from /proc interface. */ -static int ipv4_local_port_range(ctl_table *table, int write, +static int ipv4_local_port_range(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; int range[2]; - ctl_table tmp = { + struct ctl_table tmp = { .data = &range, .maxlen = sizeof(range), .mode = table->mode, @@ -100,7 +102,7 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig } /* Validate changes from /proc interface. */ -static int ipv4_ping_group_range(ctl_table *table, int write, +static int ipv4_ping_group_range(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -108,7 +110,7 @@ static int ipv4_ping_group_range(ctl_table *table, int write, int ret; gid_t urange[2]; kgid_t low, high; - ctl_table tmp = { + struct ctl_table tmp = { .data = &urange, .maxlen = sizeof(urange), .mode = table->mode, @@ -135,11 +137,11 @@ static int ipv4_ping_group_range(ctl_table *table, int write, return ret; } -static int proc_tcp_congestion_control(ctl_table *ctl, int write, +static int proc_tcp_congestion_control(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char val[TCP_CA_NAME_MAX]; - ctl_table tbl = { + struct ctl_table tbl = { .data = val, .maxlen = TCP_CA_NAME_MAX, }; @@ -153,12 +155,12 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, return ret; } -static int proc_tcp_available_congestion_control(ctl_table *ctl, +static int proc_tcp_available_congestion_control(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, }; + struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, }; int ret; tbl.data = kmalloc(tbl.maxlen, GFP_USER); @@ -170,12 +172,12 @@ static int proc_tcp_available_congestion_control(ctl_table *ctl, return ret; } -static int proc_allowed_congestion_control(ctl_table *ctl, +static int proc_allowed_congestion_control(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX }; + struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX }; int ret; tbl.data = kmalloc(tbl.maxlen, GFP_USER); @@ -190,7 +192,7 @@ static int proc_allowed_congestion_control(ctl_table *ctl, return ret; } -static int ipv4_tcp_mem(ctl_table *ctl, int write, +static int ipv4_tcp_mem(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -201,7 +203,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, struct mem_cgroup *memcg; #endif - ctl_table tmp = { + struct ctl_table tmp = { .data = &vec, .maxlen = sizeof(vec), .mode = ctl->mode, @@ -233,10 +235,11 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, return 0; } -static int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer, - size_t *lenp, loff_t *ppos) +static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) { - ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; + struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; struct tcp_fastopen_context *ctxt; int ret; u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */ @@ -331,7 +334,9 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_syn_retries, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_syn_retries_min, + .extra2 = &tcp_syn_retries_max }, { .procname = "tcp_synack_retries", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ab450c099aa4..5423223e93c2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -279,6 +279,7 @@ #include <asm/uaccess.h> #include <asm/ioctls.h> +#include <net/busy_poll.h> int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; @@ -436,6 +437,8 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) struct sock *sk = sock->sk; const struct tcp_sock *tp = tcp_sk(sk); + sock_rps_record_flow(sk); + sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == TCP_LISTEN) return inet_csk_listen_poll(sk); @@ -1551,6 +1554,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct sk_buff *skb; u32 urg_hole = 0; + if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) && + (sk->sk_state == TCP_ESTABLISHED)) + sk_busy_loop(sk, nonblock); + lock_sock(sk); err = -ENOTCONN; @@ -2875,249 +2882,9 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(compat_tcp_getsockopt); #endif -struct sk_buff *tcp_tso_segment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - struct tcphdr *th; - unsigned int thlen; - unsigned int seq; - __be32 delta; - unsigned int oldlen; - unsigned int mss; - struct sk_buff *gso_skb = skb; - __sum16 newcheck; - bool ooo_okay, copy_destructor; - - if (!pskb_may_pull(skb, sizeof(*th))) - goto out; - - th = tcp_hdr(skb); - thlen = th->doff * 4; - if (thlen < sizeof(*th)) - goto out; - - if (!pskb_may_pull(skb, thlen)) - goto out; - - oldlen = (u16)~skb->len; - __skb_pull(skb, thlen); - - mss = skb_shinfo(skb)->gso_size; - if (unlikely(skb->len <= mss)) - goto out; - - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { - /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_TCPV6 | - SKB_GSO_GRE | - SKB_GSO_UDP_TUNNEL | - 0) || - !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) - goto out; - - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); - - segs = NULL; - goto out; - } - - copy_destructor = gso_skb->destructor == tcp_wfree; - ooo_okay = gso_skb->ooo_okay; - /* All segments but the first should have ooo_okay cleared */ - skb->ooo_okay = 0; - - segs = skb_segment(skb, features); - if (IS_ERR(segs)) - goto out; - - /* Only first segment might have ooo_okay set */ - segs->ooo_okay = ooo_okay; - - delta = htonl(oldlen + (thlen + mss)); - - skb = segs; - th = tcp_hdr(skb); - seq = ntohl(th->seq); - - newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + - (__force u32)delta)); - - do { - th->fin = th->psh = 0; - th->check = newcheck; - - if (skb->ip_summed != CHECKSUM_PARTIAL) - th->check = - csum_fold(csum_partial(skb_transport_header(skb), - thlen, skb->csum)); - - seq += mss; - if (copy_destructor) { - skb->destructor = gso_skb->destructor; - skb->sk = gso_skb->sk; - /* {tcp|sock}_wfree() use exact truesize accounting : - * sum(skb->truesize) MUST be exactly be gso_skb->truesize - * So we account mss bytes of 'true size' for each segment. - * The last segment will contain the remaining. - */ - skb->truesize = mss; - gso_skb->truesize -= mss; - } - skb = skb->next; - th = tcp_hdr(skb); - - th->seq = htonl(seq); - th->cwr = 0; - } while (skb->next); - - /* Following permits TCP Small Queues to work well with GSO : - * The callback to TCP stack will be called at the time last frag - * is freed at TX completion, and not right now when gso_skb - * is freed by GSO engine - */ - if (copy_destructor) { - swap(gso_skb->sk, skb->sk); - swap(gso_skb->destructor, skb->destructor); - swap(gso_skb->truesize, skb->truesize); - } - - delta = htonl(oldlen + (skb->tail - skb->transport_header) + - skb->data_len); - th->check = ~csum_fold((__force __wsum)((__force u32)th->check + - (__force u32)delta)); - if (skb->ip_summed != CHECKSUM_PARTIAL) - th->check = csum_fold(csum_partial(skb_transport_header(skb), - thlen, skb->csum)); - -out: - return segs; -} -EXPORT_SYMBOL(tcp_tso_segment); - -struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) -{ - struct sk_buff **pp = NULL; - struct sk_buff *p; - struct tcphdr *th; - struct tcphdr *th2; - unsigned int len; - unsigned int thlen; - __be32 flags; - unsigned int mss = 1; - unsigned int hlen; - unsigned int off; - int flush = 1; - int i; - - off = skb_gro_offset(skb); - hlen = off + sizeof(*th); - th = skb_gro_header_fast(skb, off); - if (skb_gro_header_hard(skb, hlen)) { - th = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!th)) - goto out; - } - - thlen = th->doff * 4; - if (thlen < sizeof(*th)) - goto out; - - hlen = off + thlen; - if (skb_gro_header_hard(skb, hlen)) { - th = skb_gro_header_slow(skb, hlen, off); - if (unlikely(!th)) - goto out; - } - - skb_gro_pull(skb, thlen); - - len = skb_gro_len(skb); - flags = tcp_flag_word(th); - - for (; (p = *head); head = &p->next) { - if (!NAPI_GRO_CB(p)->same_flow) - continue; - - th2 = tcp_hdr(p); - - if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { - NAPI_GRO_CB(p)->same_flow = 0; - continue; - } - - goto found; - } - - goto out_check_final; - -found: - flush = NAPI_GRO_CB(p)->flush; - flush |= (__force int)(flags & TCP_FLAG_CWR); - flush |= (__force int)((flags ^ tcp_flag_word(th2)) & - ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); - flush |= (__force int)(th->ack_seq ^ th2->ack_seq); - for (i = sizeof(*th); i < thlen; i += 4) - flush |= *(u32 *)((u8 *)th + i) ^ - *(u32 *)((u8 *)th2 + i); - - mss = skb_shinfo(p)->gso_size; - - flush |= (len - 1) >= mss; - flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); - - if (flush || skb_gro_receive(head, skb)) { - mss = 1; - goto out_check_final; - } - - p = *head; - th2 = tcp_hdr(p); - tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); - -out_check_final: - flush = len < mss; - flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH | - TCP_FLAG_RST | TCP_FLAG_SYN | - TCP_FLAG_FIN)); - - if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) - pp = head; - -out: - NAPI_GRO_CB(skb)->flush |= flush; - - return pp; -} -EXPORT_SYMBOL(tcp_gro_receive); - -int tcp_gro_complete(struct sk_buff *skb) -{ - struct tcphdr *th = tcp_hdr(skb); - - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct tcphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; - - skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; - - if (th->cwr) - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; - - return 0; -} -EXPORT_SYMBOL(tcp_gro_complete); - #ifdef CONFIG_TCP_MD5SIG -static unsigned long tcp_md5sig_users; -static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool; -static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); +static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly; +static DEFINE_MUTEX(tcp_md5sig_mutex); static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) { @@ -3132,30 +2899,14 @@ static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) free_percpu(pool); } -void tcp_free_md5sig_pool(void) -{ - struct tcp_md5sig_pool __percpu *pool = NULL; - - spin_lock_bh(&tcp_md5sig_pool_lock); - if (--tcp_md5sig_users == 0) { - pool = tcp_md5sig_pool; - tcp_md5sig_pool = NULL; - } - spin_unlock_bh(&tcp_md5sig_pool_lock); - if (pool) - __tcp_free_md5sig_pool(pool); -} -EXPORT_SYMBOL(tcp_free_md5sig_pool); - -static struct tcp_md5sig_pool __percpu * -__tcp_alloc_md5sig_pool(struct sock *sk) +static void __tcp_alloc_md5sig_pool(void) { int cpu; struct tcp_md5sig_pool __percpu *pool; pool = alloc_percpu(struct tcp_md5sig_pool); if (!pool) - return NULL; + return; for_each_possible_cpu(cpu) { struct crypto_hash *hash; @@ -3166,53 +2917,27 @@ __tcp_alloc_md5sig_pool(struct sock *sk) per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash; } - return pool; + /* before setting tcp_md5sig_pool, we must commit all writes + * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool() + */ + smp_wmb(); + tcp_md5sig_pool = pool; + return; out_free: __tcp_free_md5sig_pool(pool); - return NULL; } -struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk) +bool tcp_alloc_md5sig_pool(void) { - struct tcp_md5sig_pool __percpu *pool; - bool alloc = false; - -retry: - spin_lock_bh(&tcp_md5sig_pool_lock); - pool = tcp_md5sig_pool; - if (tcp_md5sig_users++ == 0) { - alloc = true; - spin_unlock_bh(&tcp_md5sig_pool_lock); - } else if (!pool) { - tcp_md5sig_users--; - spin_unlock_bh(&tcp_md5sig_pool_lock); - cpu_relax(); - goto retry; - } else - spin_unlock_bh(&tcp_md5sig_pool_lock); - - if (alloc) { - /* we cannot hold spinlock here because this may sleep. */ - struct tcp_md5sig_pool __percpu *p; - - p = __tcp_alloc_md5sig_pool(sk); - spin_lock_bh(&tcp_md5sig_pool_lock); - if (!p) { - tcp_md5sig_users--; - spin_unlock_bh(&tcp_md5sig_pool_lock); - return NULL; - } - pool = tcp_md5sig_pool; - if (pool) { - /* oops, it has already been assigned. */ - spin_unlock_bh(&tcp_md5sig_pool_lock); - __tcp_free_md5sig_pool(p); - } else { - tcp_md5sig_pool = pool = p; - spin_unlock_bh(&tcp_md5sig_pool_lock); - } + if (unlikely(!tcp_md5sig_pool)) { + mutex_lock(&tcp_md5sig_mutex); + + if (!tcp_md5sig_pool) + __tcp_alloc_md5sig_pool(); + + mutex_unlock(&tcp_md5sig_mutex); } - return pool; + return tcp_md5sig_pool != NULL; } EXPORT_SYMBOL(tcp_alloc_md5sig_pool); @@ -3229,28 +2954,15 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) struct tcp_md5sig_pool __percpu *p; local_bh_disable(); - - spin_lock(&tcp_md5sig_pool_lock); - p = tcp_md5sig_pool; + p = ACCESS_ONCE(tcp_md5sig_pool); if (p) - tcp_md5sig_users++; - spin_unlock(&tcp_md5sig_pool_lock); - - if (p) - return this_cpu_ptr(p); + return __this_cpu_ptr(p); local_bh_enable(); return NULL; } EXPORT_SYMBOL(tcp_get_md5sig_pool); -void tcp_put_md5sig_pool(void) -{ - local_bh_enable(); - tcp_free_md5sig_pool(); -} -EXPORT_SYMBOL(tcp_put_md5sig_pool); - int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, const struct tcphdr *th) { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9c6225780bd5..28af45abe062 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -347,24 +347,13 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) } /* 3. Tuning rcvbuf, when connection enters established state. */ - static void tcp_fixup_rcvbuf(struct sock *sk) { u32 mss = tcp_sk(sk)->advmss; - u32 icwnd = TCP_DEFAULT_INIT_RCVWND; int rcvmem; - /* Limit to 10 segments if mss <= 1460, - * or 14600/mss segments, with a minimum of two segments. - */ - if (mss > 1460) - icwnd = max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2); - - rcvmem = SKB_TRUESIZE(mss + MAX_TCP_HEADER); - while (tcp_win_from_space(rcvmem) < mss) - rcvmem += 128; - - rcvmem *= icwnd; + rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) * + tcp_default_init_rwnd(mss); if (sk->sk_rcvbuf < rcvmem) sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); @@ -1257,8 +1246,6 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, if (skb == tp->retransmit_skb_hint) tp->retransmit_skb_hint = prev; - if (skb == tp->scoreboard_skb_hint) - tp->scoreboard_skb_hint = prev; if (skb == tp->lost_skb_hint) { tp->lost_skb_hint = prev; tp->lost_cnt_hint -= tcp_skb_pcount(prev); @@ -1966,20 +1953,6 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag) return true; } -static inline int tcp_skb_timedout(const struct sock *sk, - const struct sk_buff *skb) -{ - return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto; -} - -static inline int tcp_head_timedout(const struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - - return tp->packets_out && - tcp_skb_timedout(sk, tcp_write_queue_head(sk)); -} - /* Linux NewReno/SACK/FACK/ECN state machine. * -------------------------------------- * @@ -2086,12 +2059,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) if (tcp_dupack_heuristics(tp) > tp->reordering) return true; - /* Trick#3 : when we use RFC2988 timer restart, fast - * retransmit can be triggered by timeout of queue head. - */ - if (tcp_is_fack(tp) && tcp_head_timedout(sk)) - return true; - /* Trick#4: It is still not OK... But will it be useful to delay * recovery more? */ @@ -2128,44 +2095,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) return false; } -/* New heuristics: it is possible only after we switched to restart timer - * each time when something is ACKed. Hence, we can detect timed out packets - * during fast retransmit without falling to slow start. - * - * Usefulness of this as is very questionable, since we should know which of - * the segments is the next to timeout which is relatively expensive to find - * in general case unless we add some data structure just for that. The - * current approach certainly won't find the right one too often and when it - * finally does find _something_ it usually marks large part of the window - * right away (because a retransmission with a larger timestamp blocks the - * loop from advancing). -ij - */ -static void tcp_timeout_skbs(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - - if (!tcp_is_fack(tp) || !tcp_head_timedout(sk)) - return; - - skb = tp->scoreboard_skb_hint; - if (tp->scoreboard_skb_hint == NULL) - skb = tcp_write_queue_head(sk); - - tcp_for_write_queue_from(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - if (!tcp_skb_timedout(sk, skb)) - break; - - tcp_skb_mark_lost(tp, skb); - } - - tp->scoreboard_skb_hint = skb; - - tcp_verify_left_out(tp); -} - /* Detect loss in event "A" above by marking head of queue up as lost. * For FACK or non-SACK(Reno) senders, the first "packets" number of segments * are considered lost. For RFC3517 SACK, a segment is considered lost if it @@ -2251,8 +2180,6 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) else if (fast_rexmit) tcp_mark_head_lost(sk, 1, 1); } - - tcp_timeout_skbs(sk); } /* CWND moderation, preventing bursts due to too big ACKs @@ -2307,10 +2234,22 @@ static void DBGUNDO(struct sock *sk, const char *msg) #define DBGUNDO(x...) do { } while (0) #endif -static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) +static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) { struct tcp_sock *tp = tcp_sk(sk); + if (unmark_loss) { + struct sk_buff *skb; + + tcp_for_write_queue(skb, sk) { + if (skb == tcp_send_head(sk)) + break; + TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; + } + tp->lost_out = 0; + tcp_clear_all_retrans_hints(tp); + } + if (tp->prior_ssthresh) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -2319,7 +2258,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) else tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1); - if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) { + if (tp->prior_ssthresh > tp->snd_ssthresh) { tp->snd_ssthresh = tp->prior_ssthresh; TCP_ECN_withdraw_cwr(tp); } @@ -2327,6 +2266,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); } tp->snd_cwnd_stamp = tcp_time_stamp; + tp->undo_marker = 0; } static inline bool tcp_may_undo(const struct tcp_sock *tp) @@ -2346,14 +2286,13 @@ static bool tcp_try_undo_recovery(struct sock *sk) * or our original transmission succeeded. */ DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); - tcp_undo_cwr(sk, true); + tcp_undo_cwnd_reduction(sk, false); if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) mib_idx = LINUX_MIB_TCPLOSSUNDO; else mib_idx = LINUX_MIB_TCPFULLUNDO; NET_INC_STATS_BH(sock_net(sk), mib_idx); - tp->undo_marker = 0; } if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { /* Hold old state until something *above* high_seq @@ -2367,16 +2306,17 @@ static bool tcp_try_undo_recovery(struct sock *sk) } /* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */ -static void tcp_try_undo_dsack(struct sock *sk) +static bool tcp_try_undo_dsack(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); if (tp->undo_marker && !tp->undo_retrans) { DBGUNDO(sk, "D-SACK"); - tcp_undo_cwr(sk, true); - tp->undo_marker = 0; + tcp_undo_cwnd_reduction(sk, false); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); + return true; } + return false; } /* We can clear retrans_stamp when there are no retransmissions in the @@ -2408,60 +2348,20 @@ static bool tcp_any_retrans_done(const struct sock *sk) return false; } -/* Undo during fast recovery after partial ACK. */ - -static int tcp_try_undo_partial(struct sock *sk, int acked) -{ - struct tcp_sock *tp = tcp_sk(sk); - /* Partial ACK arrived. Force Hoe's retransmit. */ - int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering); - - if (tcp_may_undo(tp)) { - /* Plain luck! Hole if filled with delayed - * packet, rather than with a retransmit. - */ - if (!tcp_any_retrans_done(sk)) - tp->retrans_stamp = 0; - - tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); - - DBGUNDO(sk, "Hoe"); - tcp_undo_cwr(sk, false); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); - - /* So... Do not make Hoe's retransmit yet. - * If the first packet was delayed, the rest - * ones are most probably delayed as well. - */ - failed = 0; - } - return failed; -} - /* Undo during loss recovery after partial ACK or using F-RTO. */ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) { struct tcp_sock *tp = tcp_sk(sk); if (frto_undo || tcp_may_undo(tp)) { - struct sk_buff *skb; - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; - } - - tcp_clear_all_retrans_hints(tp); + tcp_undo_cwnd_reduction(sk, true); DBGUNDO(sk, "partial loss"); - tp->lost_out = 0; - tcp_undo_cwr(sk, true); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); if (frto_undo) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; - tp->undo_marker = 0; if (frto_undo || tcp_is_sack(tp)) tcp_set_ca_state(sk, TCP_CA_Open); return true; @@ -2494,12 +2394,14 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh) TCP_ECN_queue_cwr(tp); } -static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, +static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked, int fast_rexmit) { struct tcp_sock *tp = tcp_sk(sk); int sndcnt = 0; int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp); + int newly_acked_sacked = prior_unsacked - + (tp->packets_out - tp->sacked_out); tp->prr_delivered += newly_acked_sacked; if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) { @@ -2556,7 +2458,7 @@ static void tcp_try_keep_open(struct sock *sk) } } -static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) +static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked) { struct tcp_sock *tp = tcp_sk(sk); @@ -2573,7 +2475,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked) if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) tcp_moderate_cwnd(tp); } else { - tcp_cwnd_reduction(sk, newly_acked_sacked, 0); + tcp_cwnd_reduction(sk, prior_unsacked, 0); } } @@ -2731,6 +2633,40 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) tcp_xmit_retransmit_queue(sk); } +/* Undo during fast recovery after partial ACK. */ +static bool tcp_try_undo_partial(struct sock *sk, const int acked, + const int prior_unsacked) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (tp->undo_marker && tcp_packet_delayed(tp)) { + /* Plain luck! Hole if filled with delayed + * packet, rather than with a retransmit. + */ + tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); + + /* We are getting evidence that the reordering degree is higher + * than we realized. If there are no retransmits out then we + * can undo. Otherwise we clock out new packets but do not + * mark more packets lost or retransmit more. + */ + if (tp->retrans_out) { + tcp_cwnd_reduction(sk, prior_unsacked, 0); + return true; + } + + if (!tcp_any_retrans_done(sk)) + tp->retrans_stamp = 0; + + DBGUNDO(sk, "partial recovery"); + tcp_undo_cwnd_reduction(sk, true); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO); + tcp_try_keep_open(sk); + return true; + } + return false; +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@ -2742,15 +2678,14 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) * It does _not_ decide what to send, it is made in function * tcp_xmit_retransmit_queue(). */ -static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, - int prior_sacked, int prior_packets, +static void tcp_fastretrans_alert(struct sock *sk, const int acked, + const int prior_unsacked, bool is_dupack, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && + bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); - int newly_acked_sacked = 0; int fast_rexmit = 0; if (WARN_ON(!tp->packets_out && tp->sacked_out)) @@ -2802,10 +2737,17 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (!(flag & FLAG_SND_UNA_ADVANCED)) { if (tcp_is_reno(tp) && is_dupack) tcp_add_reno_sack(sk); - } else - do_lost = tcp_try_undo_partial(sk, pkts_acked); - newly_acked_sacked = prior_packets - tp->packets_out + - tp->sacked_out - prior_sacked; + } else { + if (tcp_try_undo_partial(sk, acked, prior_unsacked)) + return; + /* Partial ACK arrived. Force fast retransmit. */ + do_lost = tcp_is_reno(tp) || + tcp_fackets_out(tp) > tp->reordering; + } + if (tcp_try_undo_dsack(sk)) { + tcp_try_keep_open(sk); + return; + } break; case TCP_CA_Loss: tcp_process_loss(sk, flag, is_dupack); @@ -2819,14 +2761,12 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, if (is_dupack) tcp_add_reno_sack(sk); } - newly_acked_sacked = prior_packets - tp->packets_out + - tp->sacked_out - prior_sacked; if (icsk->icsk_ca_state <= TCP_CA_Disorder) tcp_try_undo_dsack(sk); if (!tcp_time_to_recover(sk, flag)) { - tcp_try_to_open(sk, flag, newly_acked_sacked); + tcp_try_to_open(sk, flag, prior_unsacked); return; } @@ -2846,9 +2786,9 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, fast_rexmit = 1; } - if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk))) + if (do_lost) tcp_update_scoreboard(sk, fast_rexmit); - tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit); + tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit); tcp_xmit_retransmit_queue(sk); } @@ -3079,7 +3019,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); - tp->scoreboard_skb_hint = NULL; if (skb == tp->retransmit_skb_hint) tp->retransmit_skb_hint = NULL; if (skb == tp->lost_skb_hint) @@ -3333,9 +3272,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) u32 prior_in_flight; u32 prior_fackets; int prior_packets = tp->packets_out; - int prior_sacked = tp->sacked_out; - int pkts_acked = 0; - int previous_packets_out = 0; + const int prior_unsacked = tp->packets_out - tp->sacked_out; + int acked = 0; /* Number of packets newly acked */ /* If the ack is older than previous acks * then we can probably ignore it. @@ -3410,18 +3348,17 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) goto no_queue; /* See if we can take anything off of the retransmit queue. */ - previous_packets_out = tp->packets_out; + acked = tp->packets_out; flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); - - pkts_acked = previous_packets_out - tp->packets_out; + acked -= tp->packets_out; if (tcp_ack_is_dubious(sk, flag)) { /* Advance CWND, if state allows this. */ if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); - tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, - prior_packets, is_dupack, flag); + tcp_fastretrans_alert(sk, acked, prior_unsacked, + is_dupack, flag); } else { if (flag & FLAG_DATA_ACKED) tcp_cong_avoid(sk, ack, prior_in_flight); @@ -3443,8 +3380,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) no_queue: /* If data was DSACKed, see if we can undo a cwnd reduction. */ if (flag & FLAG_DSACKING_ACK) - tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, - prior_packets, is_dupack, flag); + tcp_fastretrans_alert(sk, acked, prior_unsacked, + is_dupack, flag); /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. @@ -3466,8 +3403,8 @@ old_ack: */ if (TCP_SKB_CB(skb)->sacked) { flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); - tcp_fastretrans_alert(sk, pkts_acked, prior_sacked, - prior_packets, is_dupack, flag); + tcp_fastretrans_alert(sk, acked, prior_unsacked, + is_dupack, flag); } SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); @@ -3780,6 +3717,7 @@ void tcp_reset(struct sock *sk) static void tcp_fin(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + const struct dst_entry *dst; inet_csk_schedule_ack(sk); @@ -3791,7 +3729,9 @@ static void tcp_fin(struct sock *sk) case TCP_ESTABLISHED: /* Move to CLOSE_WAIT */ tcp_set_state(sk, TCP_CLOSE_WAIT); - inet_csk(sk)->icsk_ack.pingpong = 1; + dst = __sk_dst_get(sk); + if (!dst || !dst_metric(dst, RTAX_QUICKACK)) + inet_csk(sk)->icsk_ack.pingpong = 1; break; case TCP_CLOSE_WAIT: @@ -5601,6 +5541,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct inet_connection_sock *icsk = inet_csk(sk); struct request_sock *req; int queued = 0; + bool acceptable; tp->rx_opt.saw_tstamp = 0; @@ -5671,157 +5612,147 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 0; /* step 5: check the ACK field */ - if (true) { - int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | - FLAG_UPDATE_TS_RECENT) > 0; - - switch (sk->sk_state) { - case TCP_SYN_RECV: - if (acceptable) { - /* Once we leave TCP_SYN_RECV, we no longer - * need req so release it. - */ - if (req) { - tcp_synack_rtt_meas(sk, req); - tp->total_retrans = req->num_retrans; - - reqsk_fastopen_remove(sk, req, false); - } else { - /* Make sure socket is routed, for - * correct metrics. - */ - icsk->icsk_af_ops->rebuild_header(sk); - tcp_init_congestion_control(sk); + acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | + FLAG_UPDATE_TS_RECENT) > 0; - tcp_mtup_init(sk); - tcp_init_buffer_space(sk); - tp->copied_seq = tp->rcv_nxt; - } - smp_mb(); - tcp_set_state(sk, TCP_ESTABLISHED); - sk->sk_state_change(sk); - - /* Note, that this wakeup is only for marginal - * crossed SYN case. Passively open sockets - * are not waked up, because sk->sk_sleep == - * NULL and sk->sk_socket == NULL. - */ - if (sk->sk_socket) - sk_wake_async(sk, - SOCK_WAKE_IO, POLL_OUT); - - tp->snd_una = TCP_SKB_CB(skb)->ack_seq; - tp->snd_wnd = ntohs(th->window) << - tp->rx_opt.snd_wscale; - tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); - - if (tp->rx_opt.tstamp_ok) - tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; - - if (req) { - /* Re-arm the timer because data may - * have been sent out. This is similar - * to the regular data transmission case - * when new data has just been ack'ed. - * - * (TFO) - we could try to be more - * aggressive and retranmitting any data - * sooner based on when they were sent - * out. - */ - tcp_rearm_rto(sk); - } else - tcp_init_metrics(sk); + switch (sk->sk_state) { + case TCP_SYN_RECV: + if (!acceptable) + return 1; - /* Prevent spurious tcp_cwnd_restart() on - * first data packet. - */ - tp->lsndtime = tcp_time_stamp; + /* Once we leave TCP_SYN_RECV, we no longer need req + * so release it. + */ + if (req) { + tcp_synack_rtt_meas(sk, req); + tp->total_retrans = req->num_retrans; - tcp_initialize_rcv_mss(sk); - tcp_fast_path_on(tp); - } else { - return 1; - } - break; + reqsk_fastopen_remove(sk, req, false); + } else { + /* Make sure socket is routed, for correct metrics. */ + icsk->icsk_af_ops->rebuild_header(sk); + tcp_init_congestion_control(sk); + + tcp_mtup_init(sk); + tcp_init_buffer_space(sk); + tp->copied_seq = tp->rcv_nxt; + } + smp_mb(); + tcp_set_state(sk, TCP_ESTABLISHED); + sk->sk_state_change(sk); + + /* Note, that this wakeup is only for marginal crossed SYN case. + * Passively open sockets are not waked up, because + * sk->sk_sleep == NULL and sk->sk_socket == NULL. + */ + if (sk->sk_socket) + sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); + + tp->snd_una = TCP_SKB_CB(skb)->ack_seq; + tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; + tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); - case TCP_FIN_WAIT1: - /* If we enter the TCP_FIN_WAIT1 state and we are a - * Fast Open socket and this is the first acceptable - * ACK we have received, this would have acknowledged - * our SYNACK so stop the SYNACK timer. + if (tp->rx_opt.tstamp_ok) + tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; + + if (req) { + /* Re-arm the timer because data may have been sent out. + * This is similar to the regular data transmission case + * when new data has just been ack'ed. + * + * (TFO) - we could try to be more aggressive and + * retransmitting any data sooner based on when they + * are sent out. */ - if (req != NULL) { - /* Return RST if ack_seq is invalid. - * Note that RFC793 only says to generate a - * DUPACK for it but for TCP Fast Open it seems - * better to treat this case like TCP_SYN_RECV - * above. - */ - if (!acceptable) - return 1; - /* We no longer need the request sock. */ - reqsk_fastopen_remove(sk, req, false); - tcp_rearm_rto(sk); - } - if (tp->snd_una == tp->write_seq) { - struct dst_entry *dst; - - tcp_set_state(sk, TCP_FIN_WAIT2); - sk->sk_shutdown |= SEND_SHUTDOWN; - - dst = __sk_dst_get(sk); - if (dst) - dst_confirm(dst); - - if (!sock_flag(sk, SOCK_DEAD)) - /* Wake up lingering close() */ - sk->sk_state_change(sk); - else { - int tmo; - - if (tp->linger2 < 0 || - (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && - after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { - tcp_done(sk); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); - return 1; - } + tcp_rearm_rto(sk); + } else + tcp_init_metrics(sk); - tmo = tcp_fin_time(sk); - if (tmo > TCP_TIMEWAIT_LEN) { - inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); - } else if (th->fin || sock_owned_by_user(sk)) { - /* Bad case. We could lose such FIN otherwise. - * It is not a big problem, but it looks confusing - * and not so rare event. We still can lose it now, - * if it spins in bh_lock_sock(), but it is really - * marginal case. - */ - inet_csk_reset_keepalive_timer(sk, tmo); - } else { - tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); - goto discard; - } - } - } - break; + /* Prevent spurious tcp_cwnd_restart() on first data packet */ + tp->lsndtime = tcp_time_stamp; - case TCP_CLOSING: - if (tp->snd_una == tp->write_seq) { - tcp_time_wait(sk, TCP_TIME_WAIT, 0); - goto discard; - } + tcp_initialize_rcv_mss(sk); + tcp_fast_path_on(tp); + break; + + case TCP_FIN_WAIT1: { + struct dst_entry *dst; + int tmo; + + /* If we enter the TCP_FIN_WAIT1 state and we are a + * Fast Open socket and this is the first acceptable + * ACK we have received, this would have acknowledged + * our SYNACK so stop the SYNACK timer. + */ + if (req != NULL) { + /* Return RST if ack_seq is invalid. + * Note that RFC793 only says to generate a + * DUPACK for it but for TCP Fast Open it seems + * better to treat this case like TCP_SYN_RECV + * above. + */ + if (!acceptable) + return 1; + /* We no longer need the request sock. */ + reqsk_fastopen_remove(sk, req, false); + tcp_rearm_rto(sk); + } + if (tp->snd_una != tp->write_seq) break; - case TCP_LAST_ACK: - if (tp->snd_una == tp->write_seq) { - tcp_update_metrics(sk); - tcp_done(sk); - goto discard; - } + tcp_set_state(sk, TCP_FIN_WAIT2); + sk->sk_shutdown |= SEND_SHUTDOWN; + + dst = __sk_dst_get(sk); + if (dst) + dst_confirm(dst); + + if (!sock_flag(sk, SOCK_DEAD)) { + /* Wake up lingering close() */ + sk->sk_state_change(sk); break; } + + if (tp->linger2 < 0 || + (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && + after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { + tcp_done(sk); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA); + return 1; + } + + tmo = tcp_fin_time(sk); + if (tmo > TCP_TIMEWAIT_LEN) { + inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); + } else if (th->fin || sock_owned_by_user(sk)) { + /* Bad case. We could lose such FIN otherwise. + * It is not a big problem, but it looks confusing + * and not so rare event. We still can lose it now, + * if it spins in bh_lock_sock(), but it is really + * marginal case. + */ + inet_csk_reset_keepalive_timer(sk, tmo); + } else { + tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); + goto discard; + } + break; + } + + case TCP_CLOSING: + if (tp->snd_una == tp->write_seq) { + tcp_time_wait(sk, TCP_TIME_WAIT, 0); + goto discard; + } + break; + + case TCP_LAST_ACK: + if (tp->snd_una == tp->write_seq) { + tcp_update_metrics(sk); + tcp_done(sk); + goto discard; + } + break; } /* step 6: check the URG bit */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7999fc55c83b..b299da5ff499 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -75,6 +75,7 @@ #include <net/netdma.h> #include <net/secure_seq.h> #include <net/tcp_memcontrol.h> +#include <net/busy_poll.h> #include <linux/inet.h> #include <linux/ipv6.h> @@ -545,8 +546,7 @@ out: sock_put(sk); } -static void __tcp_v4_send_check(struct sk_buff *skb, - __be32 saddr, __be32 daddr) +void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr) { struct tcphdr *th = tcp_hdr(skb); @@ -571,23 +571,6 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_v4_send_check); -int tcp_v4_gso_send_check(struct sk_buff *skb) -{ - const struct iphdr *iph; - struct tcphdr *th; - - if (!pskb_may_pull(skb, sizeof(*th))) - return -EINVAL; - - iph = ip_hdr(skb); - th = tcp_hdr(skb); - - th->check = 0; - skb->ip_summed = CHECKSUM_PARTIAL; - __tcp_v4_send_check(skb, iph->saddr, iph->daddr); - return 0; -} - /* * This routine will send an RST to the other tcp. * @@ -1026,7 +1009,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, key = sock_kmalloc(sk, sizeof(*key), gfp); if (!key) return -ENOMEM; - if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) { + if (!tcp_alloc_md5sig_pool()) { sock_kfree_s(sk, key, sizeof(*key)); return -ENOMEM; } @@ -1044,9 +1027,7 @@ EXPORT_SYMBOL(tcp_md5_do_add); int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) { - struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; - struct tcp_md5sig_info *md5sig; key = tcp_md5_do_lookup(sk, addr, family); if (!key) @@ -1054,10 +1035,6 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) hlist_del_rcu(&key->node); atomic_sub(sizeof(*key), &sk->sk_omem_alloc); kfree_rcu(key, rcu); - md5sig = rcu_dereference_protected(tp->md5sig_info, - sock_owned_by_user(sk)); - if (hlist_empty(&md5sig->head)) - tcp_free_md5sig_pool(); return 0; } EXPORT_SYMBOL(tcp_md5_do_del); @@ -1071,8 +1048,6 @@ static void tcp_clear_md5_list(struct sock *sk) md5sig = rcu_dereference_protected(tp->md5sig_info, 1); - if (!hlist_empty(&md5sig->head)) - tcp_free_md5sig_pool(); hlist_for_each_entry_safe(key, n, &md5sig->head, node) { hlist_del_rcu(&key->node); atomic_sub(sizeof(*key), &sk->sk_omem_alloc); @@ -2019,6 +1994,7 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; + sk_mark_napi_id(sk, skb); skb->dev = NULL; bh_lock_sock_nested(sk); @@ -2803,52 +2779,6 @@ void tcp4_proc_exit(void) } #endif /* CONFIG_PROC_FS */ -struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) -{ - const struct iphdr *iph = skb_gro_network_header(skb); - __wsum wsum; - __sum16 sum; - - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, - skb->csum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } -flush: - NAPI_GRO_CB(skb)->flush = 1; - return NULL; - - case CHECKSUM_NONE: - wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb_gro_len(skb), IPPROTO_TCP, 0); - sum = csum_fold(skb_checksum(skb, - skb_gro_offset(skb), - skb_gro_len(skb), - wsum)); - if (sum) - goto flush; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - } - - return tcp_gro_receive(head, skb); -} - -int tcp4_gro_complete(struct sk_buff *skb) -{ - const struct iphdr *iph = ip_hdr(skb); - struct tcphdr *th = tcp_hdr(skb); - - th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), - iph->saddr, iph->daddr, 0); - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; - - return tcp_gro_complete(skb); -} - struct proto tcp_prot = { .name = "TCP", .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0f0178827259..ab1c08658528 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -317,7 +317,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) key = tp->af_specific->md5_lookup(sk, sk); if (key != NULL) { tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); - if (tcptw->tw_md5_key && tcp_alloc_md5sig_pool(sk) == NULL) + if (tcptw->tw_md5_key && !tcp_alloc_md5sig_pool()) BUG(); } } while (0); @@ -358,10 +358,8 @@ void tcp_twsk_destructor(struct sock *sk) #ifdef CONFIG_TCP_MD5SIG struct tcp_timewait_sock *twsk = tcp_twsk(sk); - if (twsk->tw_md5_key) { - tcp_free_md5sig_pool(); + if (twsk->tw_md5_key) kfree_rcu(twsk->tw_md5_key, rcu); - } #endif } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c new file mode 100644 index 000000000000..3a7525e6c086 --- /dev/null +++ b/net/ipv4/tcp_offload.c @@ -0,0 +1,332 @@ +/* + * IPV4 GSO/GRO offload support + * Linux INET implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * TCPv4 GSO/GRO support + */ + +#include <linux/skbuff.h> +#include <net/tcp.h> +#include <net/protocol.h> + +struct sk_buff *tcp_tso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct tcphdr *th; + unsigned int thlen; + unsigned int seq; + __be32 delta; + unsigned int oldlen; + unsigned int mss; + struct sk_buff *gso_skb = skb; + __sum16 newcheck; + bool ooo_okay, copy_destructor; + + if (!pskb_may_pull(skb, sizeof(*th))) + goto out; + + th = tcp_hdr(skb); + thlen = th->doff * 4; + if (thlen < sizeof(*th)) + goto out; + + if (!pskb_may_pull(skb, thlen)) + goto out; + + oldlen = (u16)~skb->len; + __skb_pull(skb, thlen); + + mss = tcp_skb_mss(skb); + if (unlikely(skb->len <= mss)) + goto out; + + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { + /* Packet is from an untrusted source, reset gso_segs. */ + int type = skb_shinfo(skb)->gso_type; + + if (unlikely(type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_TCPV6 | + SKB_GSO_GRE | + SKB_GSO_MPLS | + SKB_GSO_UDP_TUNNEL | + 0) || + !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) + goto out; + + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); + + segs = NULL; + goto out; + } + + copy_destructor = gso_skb->destructor == tcp_wfree; + ooo_okay = gso_skb->ooo_okay; + /* All segments but the first should have ooo_okay cleared */ + skb->ooo_okay = 0; + + segs = skb_segment(skb, features); + if (IS_ERR(segs)) + goto out; + + /* Only first segment might have ooo_okay set */ + segs->ooo_okay = ooo_okay; + + delta = htonl(oldlen + (thlen + mss)); + + skb = segs; + th = tcp_hdr(skb); + seq = ntohl(th->seq); + + newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + + (__force u32)delta)); + + do { + th->fin = th->psh = 0; + th->check = newcheck; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + th->check = + csum_fold(csum_partial(skb_transport_header(skb), + thlen, skb->csum)); + + seq += mss; + if (copy_destructor) { + skb->destructor = gso_skb->destructor; + skb->sk = gso_skb->sk; + /* {tcp|sock}_wfree() use exact truesize accounting : + * sum(skb->truesize) MUST be exactly be gso_skb->truesize + * So we account mss bytes of 'true size' for each segment. + * The last segment will contain the remaining. + */ + skb->truesize = mss; + gso_skb->truesize -= mss; + } + skb = skb->next; + th = tcp_hdr(skb); + + th->seq = htonl(seq); + th->cwr = 0; + } while (skb->next); + + /* Following permits TCP Small Queues to work well with GSO : + * The callback to TCP stack will be called at the time last frag + * is freed at TX completion, and not right now when gso_skb + * is freed by GSO engine + */ + if (copy_destructor) { + swap(gso_skb->sk, skb->sk); + swap(gso_skb->destructor, skb->destructor); + swap(gso_skb->truesize, skb->truesize); + } + + delta = htonl(oldlen + (skb_tail_pointer(skb) - + skb_transport_header(skb)) + + skb->data_len); + th->check = ~csum_fold((__force __wsum)((__force u32)th->check + + (__force u32)delta)); + if (skb->ip_summed != CHECKSUM_PARTIAL) + th->check = csum_fold(csum_partial(skb_transport_header(skb), + thlen, skb->csum)); +out: + return segs; +} +EXPORT_SYMBOL(tcp_tso_segment); + +struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + struct sk_buff **pp = NULL; + struct sk_buff *p; + struct tcphdr *th; + struct tcphdr *th2; + unsigned int len; + unsigned int thlen; + __be32 flags; + unsigned int mss = 1; + unsigned int hlen; + unsigned int off; + int flush = 1; + int i; + + off = skb_gro_offset(skb); + hlen = off + sizeof(*th); + th = skb_gro_header_fast(skb, off); + if (skb_gro_header_hard(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) + goto out; + } + + thlen = th->doff * 4; + if (thlen < sizeof(*th)) + goto out; + + hlen = off + thlen; + if (skb_gro_header_hard(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) + goto out; + } + + skb_gro_pull(skb, thlen); + + len = skb_gro_len(skb); + flags = tcp_flag_word(th); + + for (; (p = *head); head = &p->next) { + if (!NAPI_GRO_CB(p)->same_flow) + continue; + + th2 = tcp_hdr(p); + + if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { + NAPI_GRO_CB(p)->same_flow = 0; + continue; + } + + goto found; + } + + goto out_check_final; + +found: + flush = NAPI_GRO_CB(p)->flush; + flush |= (__force int)(flags & TCP_FLAG_CWR); + flush |= (__force int)((flags ^ tcp_flag_word(th2)) & + ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); + flush |= (__force int)(th->ack_seq ^ th2->ack_seq); + for (i = sizeof(*th); i < thlen; i += 4) + flush |= *(u32 *)((u8 *)th + i) ^ + *(u32 *)((u8 *)th2 + i); + + mss = tcp_skb_mss(p); + + flush |= (len - 1) >= mss; + flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); + + if (flush || skb_gro_receive(head, skb)) { + mss = 1; + goto out_check_final; + } + + p = *head; + th2 = tcp_hdr(p); + tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); + +out_check_final: + flush = len < mss; + flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH | + TCP_FLAG_RST | TCP_FLAG_SYN | + TCP_FLAG_FIN)); + + if (p && (!NAPI_GRO_CB(skb)->same_flow || flush)) + pp = head; + +out: + NAPI_GRO_CB(skb)->flush |= flush; + + return pp; +} +EXPORT_SYMBOL(tcp_gro_receive); + +int tcp_gro_complete(struct sk_buff *skb) +{ + struct tcphdr *th = tcp_hdr(skb); + + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + + if (th->cwr) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + + return 0; +} +EXPORT_SYMBOL(tcp_gro_complete); + +static int tcp_v4_gso_send_check(struct sk_buff *skb) +{ + const struct iphdr *iph; + struct tcphdr *th; + + if (!pskb_may_pull(skb, sizeof(*th))) + return -EINVAL; + + iph = ip_hdr(skb); + th = tcp_hdr(skb); + + th->check = 0; + skb->ip_summed = CHECKSUM_PARTIAL; + __tcp_v4_send_check(skb, iph->saddr, iph->daddr); + return 0; +} + +static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) +{ + const struct iphdr *iph = skb_gro_network_header(skb); + __wsum wsum; + __sum16 sum; + + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, + skb->csum)) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } +flush: + NAPI_GRO_CB(skb)->flush = 1; + return NULL; + + case CHECKSUM_NONE: + wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, + skb_gro_len(skb), IPPROTO_TCP, 0); + sum = csum_fold(skb_checksum(skb, + skb_gro_offset(skb), + skb_gro_len(skb), + wsum)); + if (sum) + goto flush; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + } + + return tcp_gro_receive(head, skb); +} + +static int tcp4_gro_complete(struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + struct tcphdr *th = tcp_hdr(skb); + + th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb), + iph->saddr, iph->daddr, 0); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; + + return tcp_gro_complete(skb); +} + +static const struct net_offload tcpv4_offload = { + .callbacks = { + .gso_send_check = tcp_v4_gso_send_check, + .gso_segment = tcp_tso_segment, + .gro_receive = tcp4_gro_receive, + .gro_complete = tcp4_gro_complete, + }, +}; + +int __init tcpv4_offload_init(void) +{ + return inet_add_offload(&tcpv4_offload, IPPROTO_TCP); +} diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ec335fabd5cc..92fde8d1aa82 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -160,6 +160,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp, { struct inet_connection_sock *icsk = inet_csk(sk); const u32 now = tcp_time_stamp; + const struct dst_entry *dst = __sk_dst_get(sk); if (sysctl_tcp_slow_start_after_idle && (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto)) @@ -170,8 +171,9 @@ static void tcp_event_data_sent(struct tcp_sock *tp, /* If it is a reply for ato after last received * packet, enter pingpong mode. */ - if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) - icsk->icsk_ack.pingpong = 1; + if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato && + (!dst || !dst_metric(dst, RTAX_QUICKACK))) + icsk->icsk_ack.pingpong = 1; } /* Account for an ACK we sent. */ @@ -181,6 +183,21 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } + +u32 tcp_default_init_rwnd(u32 mss) +{ + /* Initial receive window should be twice of TCP_INIT_CWND to + * enable proper sending of new unsent data during fast recovery + * (RFC 3517, Section 4, NextSeg() rule (2)). Further place a + * limit when mss is larger than 1460. + */ + u32 init_rwnd = TCP_INIT_CWND * 2; + + if (mss > 1460) + init_rwnd = max((1460 * init_rwnd) / mss, 2U); + return init_rwnd; +} + /* Determine a window scaling and initial window to offer. * Based on the assumption that the given amount of space * will be offered. Store the results in the tp structure. @@ -230,22 +247,10 @@ void tcp_select_initial_window(int __space, __u32 mss, } } - /* Set initial window to a value enough for senders starting with - * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place - * a limit on the initial window when mss is larger than 1460. - */ if (mss > (1 << *rcv_wscale)) { - int init_cwnd = TCP_DEFAULT_INIT_RCVWND; - if (mss > 1460) - init_cwnd = - max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2); - /* when initializing use the value from init_rcv_wnd - * rather than the default from above - */ - if (init_rcv_wnd) - *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss); - else - *rcv_wnd = min(*rcv_wnd, init_cwnd * mss); + if (!init_rcv_wnd) /* Use default unless specified otherwise */ + init_rcv_wnd = tcp_default_init_rwnd(mss); + *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss); } /* Set the clamp no higher than max representable value */ @@ -2402,6 +2407,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) * see tcp_input.c tcp_sacktag_write_queue(). */ TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt; + } else { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); } return err; } @@ -2523,10 +2530,9 @@ begin_fwd: if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS)) continue; - if (tcp_retransmit_skb(sk, skb)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); + if (tcp_retransmit_skb(sk, skb)) return; - } + NET_INC_STATS_BH(sock_net(sk), mib_idx); if (tcp_in_cwnd_reduction(sk)) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0bf5d399a03c..766e6bab9113 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -109,6 +109,7 @@ #include <trace/events/udp.h> #include <linux/static_key.h> #include <trace/events/skb.h> +#include <net/busy_poll.h> #include "udp_impl.h" struct udp_table udp_table __read_mostly; @@ -429,7 +430,7 @@ begin: reuseport = sk->sk_reuseport; if (reuseport) { hash = inet_ehashfn(net, daddr, hnum, - saddr, htons(sport)); + saddr, sport); matches = 1; } } else if (score == badness && reuseport) { @@ -510,7 +511,7 @@ begin: reuseport = sk->sk_reuseport; if (reuseport) { hash = inet_ehashfn(net, daddr, hnum, - saddr, htons(sport)); + saddr, sport); matches = 1; } } else if (score == badness && reuseport) { @@ -799,7 +800,7 @@ send: /* * Push out all pending data as one UDP datagram. Socket is locked. */ -static int udp_push_pending_frames(struct sock *sk) +int udp_push_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); @@ -818,6 +819,7 @@ out: up->pending = 0; return err; } +EXPORT_SYMBOL(udp_push_pending_frames); int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) @@ -1709,7 +1711,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk != NULL) { - int ret = udp_queue_rcv_skb(sk, skb); + int ret; + + sk_mark_napi_id(sk, skb); + ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); /* a return value > 0 means to resubmit the input, but @@ -1967,6 +1972,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) unsigned int mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; + sock_rps_record_flow(sk); + /* Check for false positives due to checksum errors */ if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk)) @@ -2284,29 +2291,8 @@ void __init udp_init(void) sysctl_udp_wmem_min = SK_MEM_QUANTUM; } -int udp4_ufo_send_check(struct sk_buff *skb) -{ - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - return -EINVAL; - - if (likely(!skb->encapsulation)) { - const struct iphdr *iph; - struct udphdr *uh; - - iph = ip_hdr(skb); - uh = udp_hdr(skb); - - uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, - IPPROTO_UDP, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; - } - return 0; -} - -static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, - netdev_features_t features) +struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); int mac_len = skb->mac_len; @@ -2337,6 +2323,9 @@ static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, struct udphdr *uh; int udp_offset = outer_hlen - tnl_hlen; + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + skb->mac_len = mac_len; skb_push(skb, outer_hlen); @@ -2359,59 +2348,8 @@ static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, uh->check = CSUM_MANGLED_0; } - skb->ip_summed = CHECKSUM_NONE; skb->protocol = protocol; } while ((skb = skb->next)); out: return segs; } - -struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - unsigned int mss; - mss = skb_shinfo(skb)->gso_size; - if (unlikely(skb->len <= mss)) - goto out; - - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { - /* Packet is from an untrusted source, reset gso_segs. */ - int type = skb_shinfo(skb)->gso_type; - - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_GRE) || - !(type & (SKB_GSO_UDP)))) - goto out; - - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); - - segs = NULL; - goto out; - } - - /* Fragment the skb. IP headers of the fragments are updated in - * inet_gso_segment() - */ - if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) - segs = skb_udp_tunnel_segment(skb, features); - else { - int offset; - __wsum csum; - - /* Do software UFO. Complete and fill in the UDP checksum as - * HW cannot do checksum of UDP packets sent as multiple - * IP fragments. - */ - offset = skb_checksum_start_offset(skb); - csum = skb_checksum(skb, offset, skb->len - offset, 0); - offset += skb->csum_offset; - *(__sum16 *)(skb->data + offset) = csum_fold(csum); - skb->ip_summed = CHECKSUM_NONE; - - segs = skb_segment(skb, features); - } -out: - return segs; -} diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c new file mode 100644 index 000000000000..f35eccaa855e --- /dev/null +++ b/net/ipv4/udp_offload.c @@ -0,0 +1,100 @@ +/* + * IPV4 GSO/GRO offload support + * Linux INET implementation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * UDPv4 GSO support + */ + +#include <linux/skbuff.h> +#include <net/udp.h> +#include <net/protocol.h> + +static int udp4_ufo_send_check(struct sk_buff *skb) +{ + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + return -EINVAL; + + if (likely(!skb->encapsulation)) { + const struct iphdr *iph; + struct udphdr *uh; + + iph = ip_hdr(skb); + uh = udp_hdr(skb); + + uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, + IPPROTO_UDP, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; + } + + return 0; +} + +static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + unsigned int mss; + + mss = skb_shinfo(skb)->gso_size; + if (unlikely(skb->len <= mss)) + goto out; + + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { + /* Packet is from an untrusted source, reset gso_segs. */ + int type = skb_shinfo(skb)->gso_type; + + if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | + SKB_GSO_UDP_TUNNEL | + SKB_GSO_GRE | SKB_GSO_MPLS) || + !(type & (SKB_GSO_UDP)))) + goto out; + + skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); + + segs = NULL; + goto out; + } + + /* Fragment the skb. IP headers of the fragments are updated in + * inet_gso_segment() + */ + if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) + segs = skb_udp_tunnel_segment(skb, features); + else { + int offset; + __wsum csum; + + /* Do software UFO. Complete and fill in the UDP checksum as + * HW cannot do checksum of UDP packets sent as multiple + * IP fragments. + */ + offset = skb_checksum_start_offset(skb); + csum = skb_checksum(skb, offset, skb->len - offset, 0); + offset += skb->csum_offset; + *(__sum16 *)(skb->data + offset) = csum_fold(csum); + skb->ip_summed = CHECKSUM_NONE; + + segs = skb_segment(skb, features); + } +out: + return segs; +} + +static const struct net_offload udpv4_offload = { + .callbacks = { + .gso_send_check = udp4_ufo_send_check, + .gso_segment = udp4_ufo_fragment, + }, +}; + +int __init udpv4_offload_init(void) +{ + return inet_add_offload(&udpv4_offload, IPPROTO_UDP); +} diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 05a5df2febc9..06347dbd32c1 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -63,7 +63,7 @@ static int xfrm_tunnel_err(struct sk_buff *skb, u32 info) static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = { .handler = xfrm_tunnel_rcv, .err_handler = xfrm_tunnel_err, - .priority = 2, + .priority = 3, }; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 9af088d2cdaa..470a9c008e9b 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_IPV6) += ipv6.o ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ - raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ + raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o ipv6-offload := ip6_offload.o tcpv6_offload.o udp_offload.o exthdrs_offload.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4ab4c38958c6..da4241c8c7da 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -253,37 +253,32 @@ static inline bool addrconf_qdisc_ok(const struct net_device *dev) return !qdisc_tx_is_noop(dev); } -static void addrconf_del_timer(struct inet6_ifaddr *ifp) +static void addrconf_del_rs_timer(struct inet6_dev *idev) { - if (del_timer(&ifp->timer)) + if (del_timer(&idev->rs_timer)) + __in6_dev_put(idev); +} + +static void addrconf_del_dad_timer(struct inet6_ifaddr *ifp) +{ + if (del_timer(&ifp->dad_timer)) __in6_ifa_put(ifp); } -enum addrconf_timer_t { - AC_NONE, - AC_DAD, - AC_RS, -}; +static void addrconf_mod_rs_timer(struct inet6_dev *idev, + unsigned long when) +{ + if (!timer_pending(&idev->rs_timer)) + in6_dev_hold(idev); + mod_timer(&idev->rs_timer, jiffies + when); +} -static void addrconf_mod_timer(struct inet6_ifaddr *ifp, - enum addrconf_timer_t what, - unsigned long when) +static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp, + unsigned long when) { - if (!del_timer(&ifp->timer)) + if (!timer_pending(&ifp->dad_timer)) in6_ifa_hold(ifp); - - switch (what) { - case AC_DAD: - ifp->timer.function = addrconf_dad_timer; - break; - case AC_RS: - ifp->timer.function = addrconf_rs_timer; - break; - default: - break; - } - ifp->timer.expires = jiffies + when; - add_timer(&ifp->timer); + mod_timer(&ifp->dad_timer, jiffies + when); } static int snmp6_alloc_dev(struct inet6_dev *idev) @@ -326,6 +321,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) WARN_ON(!list_empty(&idev->addr_list)); WARN_ON(idev->mc_list != NULL); + WARN_ON(timer_pending(&idev->rs_timer)); #ifdef NET_REFCNT_DEBUG pr_debug("%s: %s\n", __func__, dev ? dev->name : "NIL"); @@ -357,7 +353,8 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) rwlock_init(&ndev->lock); ndev->dev = dev; INIT_LIST_HEAD(&ndev->addr_list); - + setup_timer(&ndev->rs_timer, addrconf_rs_timer, + (unsigned long)ndev); memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf)); ndev->cnf.mtu6 = dev->mtu; ndev->cnf.sysctl = NULL; @@ -776,7 +773,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) in6_dev_put(ifp->idev); - if (del_timer(&ifp->timer)) + if (del_timer(&ifp->dad_timer)) pr_notice("Timer is still running, when freeing ifa=%p\n", ifp); if (ifp->state != INET6_IFADDR_STATE_DEAD) { @@ -816,8 +813,9 @@ static u32 inet6_addr_hash(const struct in6_addr *addr) /* On success it returns ifp with increased reference count */ static struct inet6_ifaddr * -ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, - int scope, u32 flags) +ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, + const struct in6_addr *peer_addr, int pfxlen, + int scope, u32 flags, u32 valid_lft, u32 prefered_lft) { struct inet6_ifaddr *ifa = NULL; struct rt6_info *rt; @@ -866,15 +864,19 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, } ifa->addr = *addr; + if (peer_addr) + ifa->peer_addr = *peer_addr; spin_lock_init(&ifa->lock); spin_lock_init(&ifa->state_lock); - init_timer(&ifa->timer); + setup_timer(&ifa->dad_timer, addrconf_dad_timer, + (unsigned long)ifa); INIT_HLIST_NODE(&ifa->addr_lst); - ifa->timer.data = (unsigned long) ifa; ifa->scope = scope; ifa->prefix_len = pfxlen; ifa->flags = flags | IFA_F_TENTATIVE; + ifa->valid_lft = valid_lft; + ifa->prefered_lft = prefered_lft; ifa->cstamp = ifa->tstamp = jiffies; ifa->tokenized = false; @@ -994,7 +996,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) } write_unlock_bh(&idev->lock); - addrconf_del_timer(ifp); + addrconf_del_dad_timer(ifp); ipv6_ifa_notify(RTM_DELADDR, ifp); @@ -1126,9 +1128,9 @@ retry: ift = !max_addresses || ipv6_count_addresses(idev) < max_addresses ? - ipv6_add_addr(idev, &addr, tmp_plen, - ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, - addr_flags) : NULL; + ipv6_add_addr(idev, &addr, NULL, tmp_plen, + ipv6_addr_scope(&addr), addr_flags, + tmp_valid_lft, tmp_prefered_lft) : NULL; if (IS_ERR_OR_NULL(ift)) { in6_ifa_put(ifp); in6_dev_put(idev); @@ -1140,8 +1142,6 @@ retry: spin_lock_bh(&ift->lock); ift->ifpub = ifp; - ift->valid_lft = tmp_valid_lft; - ift->prefered_lft = tmp_prefered_lft; ift->cstamp = now; ift->tstamp = tmp_tstamp; spin_unlock_bh(&ift->lock); @@ -1448,6 +1448,23 @@ try_nextdev: } EXPORT_SYMBOL(ipv6_dev_get_saddr); +int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, + unsigned char banned_flags) +{ + struct inet6_ifaddr *ifp; + int err = -EADDRNOTAVAIL; + + list_for_each_entry(ifp, &idev->addr_list, if_list) { + if (ifp->scope == IFA_LINK && + !(ifp->flags & banned_flags)) { + *addr = ifp->addr; + err = 0; + break; + } + } + return err; +} + int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, unsigned char banned_flags) { @@ -1457,17 +1474,8 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { - struct inet6_ifaddr *ifp; - read_lock_bh(&idev->lock); - list_for_each_entry(ifp, &idev->addr_list, if_list) { - if (ifp->scope == IFA_LINK && - !(ifp->flags & banned_flags)) { - *addr = ifp->addr; - err = 0; - break; - } - } + err = __ipv6_get_lladdr(idev, addr, banned_flags); read_unlock_bh(&idev->lock); } rcu_read_unlock(); @@ -1581,7 +1589,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) { if (ifp->flags&IFA_F_PERMANENT) { spin_lock_bh(&ifp->lock); - addrconf_del_timer(ifp); + addrconf_del_dad_timer(ifp); ifp->flags |= IFA_F_TENTATIVE; if (dad_failed) ifp->flags |= IFA_F_DADFAILED; @@ -2175,16 +2183,19 @@ ok: */ if (!max_addresses || ipv6_count_addresses(in6_dev) < max_addresses) - ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len, + ifp = ipv6_add_addr(in6_dev, &addr, NULL, + pinfo->prefix_len, addr_type&IPV6_ADDR_SCOPE_MASK, - addr_flags); + addr_flags, valid_lft, + prefered_lft); if (IS_ERR_OR_NULL(ifp)) { in6_dev_put(in6_dev); return; } - update_lft = create = 1; + update_lft = 0; + create = 1; ifp->cstamp = jiffies; ifp->tokenized = tokenized; addrconf_dad_start(ifp); @@ -2205,7 +2216,7 @@ ok: stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; else stored_lft = 0; - if (!update_lft && stored_lft) { + if (!update_lft && !create && stored_lft) { if (valid_lft > MIN_VALID_LIFETIME || valid_lft > stored_lft) update_lft = 1; @@ -2402,6 +2413,7 @@ err_exit: * Manual configuration of address on an interface */ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *pfx, + const struct in6_addr *peer_pfx, unsigned int plen, __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft) { @@ -2450,15 +2462,10 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p prefered_lft = timeout; } - ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); + ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags, + valid_lft, prefered_lft); if (!IS_ERR(ifp)) { - spin_lock_bh(&ifp->lock); - ifp->valid_lft = valid_lft; - ifp->prefered_lft = prefered_lft; - ifp->tstamp = jiffies; - spin_unlock_bh(&ifp->lock); - addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, expires, flags); /* @@ -2500,12 +2507,6 @@ static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *p read_unlock_bh(&idev->lock); ipv6_del_addr(ifp); - - /* If the last address is deleted administratively, - disable IPv6 on this interface. - */ - if (list_empty(&idev->addr_list)) - addrconf_ifdown(idev->dev, 1); return 0; } } @@ -2526,7 +2527,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg) return -EFAULT; rtnl_lock(); - err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, + err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, NULL, ireq.ifr6_prefixlen, IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); rtnl_unlock(); @@ -2556,7 +2557,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, { struct inet6_ifaddr *ifp; - ifp = ipv6_add_addr(idev, addr, plen, scope, IFA_F_PERMANENT); + ifp = ipv6_add_addr(idev, addr, NULL, plen, + scope, IFA_F_PERMANENT, 0, 0); if (!IS_ERR(ifp)) { spin_lock_bh(&ifp->lock); ifp->flags &= ~IFA_F_TENTATIVE; @@ -2682,7 +2684,7 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr #endif - ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags); + ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, 0, 0); if (!IS_ERR(ifp)) { addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); addrconf_dad_start(ifp); @@ -2761,8 +2763,6 @@ static void addrconf_gre_config(struct net_device *dev) struct inet6_dev *idev; struct in6_addr addr; - pr_info("%s(%s)\n", __func__, dev->name); - ASSERT_RTNL(); if ((idev = ipv6_find_idev(dev)) == NULL) { @@ -2829,9 +2829,9 @@ static void addrconf_ip6_tnl_config(struct net_device *dev) } static int addrconf_notify(struct notifier_block *this, unsigned long event, - void *data) + void *ptr) { - struct net_device *dev = (struct net_device *) data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct inet6_dev *idev = __in6_dev_get(dev); int run_pending = 0; int err; @@ -3039,7 +3039,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) hlist_for_each_entry_rcu(ifa, h, addr_lst) { if (ifa->idev == idev) { hlist_del_init_rcu(&ifa->addr_lst); - addrconf_del_timer(ifa); + addrconf_del_dad_timer(ifa); goto restart; } } @@ -3048,6 +3048,8 @@ static int addrconf_ifdown(struct net_device *dev, int how) write_lock_bh(&idev->lock); + addrconf_del_rs_timer(idev); + /* Step 2: clear flags for stateless addrconf */ if (!how) idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); @@ -3077,7 +3079,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) while (!list_empty(&idev->addr_list)) { ifa = list_first_entry(&idev->addr_list, struct inet6_ifaddr, if_list); - addrconf_del_timer(ifa); + addrconf_del_dad_timer(ifa); list_del(&ifa->if_list); @@ -3119,10 +3121,10 @@ static int addrconf_ifdown(struct net_device *dev, int how) static void addrconf_rs_timer(unsigned long data) { - struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; - struct inet6_dev *idev = ifp->idev; + struct inet6_dev *idev = (struct inet6_dev *)data; + struct in6_addr lladdr; - read_lock(&idev->lock); + write_lock(&idev->lock); if (idev->dead || !(idev->if_flags & IF_READY)) goto out; @@ -3133,18 +3135,19 @@ static void addrconf_rs_timer(unsigned long data) if (idev->if_flags & IF_RA_RCVD) goto out; - spin_lock(&ifp->lock); - if (ifp->probes++ < idev->cnf.rtr_solicits) { - /* The wait after the last probe can be shorter */ - addrconf_mod_timer(ifp, AC_RS, - (ifp->probes == idev->cnf.rtr_solicits) ? - idev->cnf.rtr_solicit_delay : - idev->cnf.rtr_solicit_interval); - spin_unlock(&ifp->lock); + if (idev->rs_probes++ < idev->cnf.rtr_solicits) { + if (!__ipv6_get_lladdr(idev, &lladdr, IFA_F_TENTATIVE)) + ndisc_send_rs(idev->dev, &lladdr, + &in6addr_linklocal_allrouters); + else + goto out; - ndisc_send_rs(idev->dev, &ifp->addr, &in6addr_linklocal_allrouters); + /* The wait after the last probe can be shorter */ + addrconf_mod_rs_timer(idev, (idev->rs_probes == + idev->cnf.rtr_solicits) ? + idev->cnf.rtr_solicit_delay : + idev->cnf.rtr_solicit_interval); } else { - spin_unlock(&ifp->lock); /* * Note: we do not support deprecated "all on-link" * assumption any longer. @@ -3153,8 +3156,8 @@ static void addrconf_rs_timer(unsigned long data) } out: - read_unlock(&idev->lock); - in6_ifa_put(ifp); + write_unlock(&idev->lock); + in6_dev_put(idev); } /* @@ -3170,8 +3173,8 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp) else rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1); - ifp->probes = idev->cnf.dad_transmits; - addrconf_mod_timer(ifp, AC_DAD, rand_num); + ifp->dad_probes = idev->cnf.dad_transmits; + addrconf_mod_dad_timer(ifp, rand_num); } static void addrconf_dad_start(struct inet6_ifaddr *ifp) @@ -3232,40 +3235,40 @@ static void addrconf_dad_timer(unsigned long data) struct inet6_dev *idev = ifp->idev; struct in6_addr mcaddr; - if (!ifp->probes && addrconf_dad_end(ifp)) + if (!ifp->dad_probes && addrconf_dad_end(ifp)) goto out; - read_lock(&idev->lock); + write_lock(&idev->lock); if (idev->dead || !(idev->if_flags & IF_READY)) { - read_unlock(&idev->lock); + write_unlock(&idev->lock); goto out; } spin_lock(&ifp->lock); if (ifp->state == INET6_IFADDR_STATE_DEAD) { spin_unlock(&ifp->lock); - read_unlock(&idev->lock); + write_unlock(&idev->lock); goto out; } - if (ifp->probes == 0) { + if (ifp->dad_probes == 0) { /* * DAD was successful */ ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); - read_unlock(&idev->lock); + write_unlock(&idev->lock); addrconf_dad_completed(ifp); goto out; } - ifp->probes--; - addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time); + ifp->dad_probes--; + addrconf_mod_dad_timer(ifp, ifp->idev->nd_parms->retrans_time); spin_unlock(&ifp->lock); - read_unlock(&idev->lock); + write_unlock(&idev->lock); /* send a neighbour solicitation for our addr */ addrconf_addr_solict_mult(&ifp->addr, &mcaddr); @@ -3277,6 +3280,10 @@ out: static void addrconf_dad_completed(struct inet6_ifaddr *ifp) { struct net_device *dev = ifp->idev->dev; + struct in6_addr lladdr; + bool send_rs, send_mld; + + addrconf_del_dad_timer(ifp); /* * Configure the address for reception. Now it is valid. @@ -3288,22 +3295,41 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) router advertisements, start sending router solicitations. */ - if (ipv6_accept_ra(ifp->idev) && - ifp->idev->cnf.rtr_solicits > 0 && - (dev->flags&IFF_LOOPBACK) == 0 && - (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { + read_lock_bh(&ifp->idev->lock); + spin_lock(&ifp->lock); + send_mld = ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL && + ifp->idev->valid_ll_addr_cnt == 1; + send_rs = send_mld && + ipv6_accept_ra(ifp->idev) && + ifp->idev->cnf.rtr_solicits > 0 && + (dev->flags&IFF_LOOPBACK) == 0; + spin_unlock(&ifp->lock); + read_unlock_bh(&ifp->idev->lock); + + /* While dad is in progress mld report's source address is in6_addrany. + * Resend with proper ll now. + */ + if (send_mld) + ipv6_mc_dad_complete(ifp->idev); + + if (send_rs) { /* * If a host as already performed a random delay * [...] as part of DAD [...] there is no need * to delay again before sending the first RS */ - ndisc_send_rs(ifp->idev->dev, &ifp->addr, &in6addr_linklocal_allrouters); + if (ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE)) + return; + ndisc_send_rs(dev, &lladdr, &in6addr_linklocal_allrouters); - spin_lock_bh(&ifp->lock); - ifp->probes = 1; + write_lock_bh(&ifp->idev->lock); + spin_lock(&ifp->lock); + ifp->idev->rs_probes = 1; ifp->idev->if_flags |= IF_RS_SENT; - addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval); - spin_unlock_bh(&ifp->lock); + addrconf_mod_rs_timer(ifp->idev, + ifp->idev->cnf.rtr_solicit_interval); + spin_unlock(&ifp->lock); + write_unlock_bh(&ifp->idev->lock); } } @@ -3615,18 +3641,20 @@ restart: rcu_read_unlock_bh(); } -static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local) +static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local, + struct in6_addr **peer_pfx) { struct in6_addr *pfx = NULL; + *peer_pfx = NULL; + if (addr) pfx = nla_data(addr); if (local) { if (pfx && nla_memcmp(local, pfx, sizeof(*pfx))) - pfx = NULL; - else - pfx = nla_data(local); + *peer_pfx = pfx; + pfx = nla_data(local); } return pfx; @@ -3644,7 +3672,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; - struct in6_addr *pfx; + struct in6_addr *pfx, *peer_pfx; int err; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); @@ -3652,7 +3680,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) return err; ifm = nlmsg_data(nlh); - pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); + pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx); if (pfx == NULL) return -EINVAL; @@ -3710,7 +3738,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) struct net *net = sock_net(skb->sk); struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; - struct in6_addr *pfx; + struct in6_addr *pfx, *peer_pfx; struct inet6_ifaddr *ifa; struct net_device *dev; u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME; @@ -3722,7 +3750,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) return err; ifm = nlmsg_data(nlh); - pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); + pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx); if (pfx == NULL) return -EINVAL; @@ -3750,7 +3778,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) * It would be best to check for !NLM_F_CREATE here but * userspace alreay relies on not having to provide this. */ - return inet6_addr_add(net, ifm->ifa_index, pfx, + return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx, ifm->ifa_prefixlen, ifa_flags, preferred_lft, valid_lft); } @@ -3807,6 +3835,7 @@ static inline int rt_scope(int ifa_scope) static inline int inet6_ifaddr_msgsize(void) { return NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(16) /* IFA_LOCAL */ + nla_total_size(16) /* IFA_ADDRESS */ + nla_total_size(sizeof(struct ifa_cacheinfo)); } @@ -3845,13 +3874,22 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, valid = INFINITY_LIFE_TIME; } - if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 || - put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) { - nlmsg_cancel(skb, nlh); - return -EMSGSIZE; - } + if (!ipv6_addr_any(&ifa->peer_addr)) { + if (nla_put(skb, IFA_LOCAL, 16, &ifa->addr) < 0 || + nla_put(skb, IFA_ADDRESS, 16, &ifa->peer_addr) < 0) + goto error; + } else + if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0) + goto error; + + if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) + goto error; return nlmsg_end(skb, nlh); + +error: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; } static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, @@ -4051,7 +4089,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh) struct net *net = sock_net(in_skb->sk); struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; - struct in6_addr *addr = NULL; + struct in6_addr *addr = NULL, *peer; struct net_device *dev = NULL; struct inet6_ifaddr *ifa; struct sk_buff *skb; @@ -4061,7 +4099,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh) if (err < 0) goto errout; - addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); + addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer); if (addr == NULL) { err = -EINVAL; goto errout; @@ -4339,8 +4377,11 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) write_lock_bh(&idev->lock); - if (update_rs) + if (update_rs) { idev->if_flags |= IF_RS_SENT; + idev->rs_probes = 1; + addrconf_mod_rs_timer(idev, idev->cnf.rtr_solicit_interval); + } /* Well, that's kinda nasty ... */ list_for_each_entry(ifp, &idev->addr_list, if_list) { @@ -4353,6 +4394,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) } write_unlock_bh(&idev->lock); + addrconf_verify(0); return 0; } @@ -4550,6 +4592,19 @@ errout: rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err); } +static void update_valid_ll_addr_cnt(struct inet6_ifaddr *ifp, int count) +{ + write_lock_bh(&ifp->idev->lock); + spin_lock(&ifp->lock); + if (((ifp->flags & (IFA_F_PERMANENT|IFA_F_TENTATIVE|IFA_F_OPTIMISTIC| + IFA_F_DADFAILED)) == IFA_F_PERMANENT) && + (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) + ifp->idev->valid_ll_addr_cnt += count; + WARN_ON(ifp->idev->valid_ll_addr_cnt < 0); + spin_unlock(&ifp->lock); + write_unlock_bh(&ifp->idev->lock); +} + static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { struct net *net = dev_net(ifp->idev->dev); @@ -4558,6 +4613,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) switch (event) { case RTM_NEWADDR: + update_valid_ll_addr_cnt(ifp, 1); + /* * If the address was optimistic * we inserted the route at the start of @@ -4568,11 +4625,28 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) ip6_ins_rt(ifp->rt); if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); + if (!ipv6_addr_any(&ifp->peer_addr)) + addrconf_prefix_route(&ifp->peer_addr, 128, + ifp->idev->dev, 0, 0); break; case RTM_DELADDR: + update_valid_ll_addr_cnt(ifp, -1); + if (ifp->idev->cnf.forwarding) addrconf_leave_anycast(ifp); addrconf_leave_solict(ifp->idev, &ifp->addr); + if (!ipv6_addr_any(&ifp->peer_addr)) { + struct rt6_info *rt; + struct net_device *dev = ifp->idev->dev; + + rt = rt6_lookup(dev_net(dev), &ifp->peer_addr, NULL, + dev->ifindex, 1); + if (rt) { + dst_hold(&rt->dst); + if (ip6_del_rt(rt)) + dst_free(&rt->dst); + } + } dst_hold(&ifp->rt->dst); if (ip6_del_rt(ifp->rt)) @@ -4593,13 +4667,13 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) #ifdef CONFIG_SYSCTL static -int addrconf_sysctl_forward(ctl_table *ctl, int write, +int addrconf_sysctl_forward(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; loff_t pos = *ppos; - ctl_table lctl; + struct ctl_table lctl; int ret; /* @@ -4620,13 +4694,16 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, static void dev_disable_change(struct inet6_dev *idev) { + struct netdev_notifier_info info; + if (!idev || !idev->dev) return; + netdev_notifier_info_init(&info, idev->dev); if (idev->cnf.disable_ipv6) - addrconf_notify(NULL, NETDEV_DOWN, idev->dev); + addrconf_notify(NULL, NETDEV_DOWN, &info); else - addrconf_notify(NULL, NETDEV_UP, idev->dev); + addrconf_notify(NULL, NETDEV_UP, &info); } static void addrconf_disable_change(struct net *net, __s32 newf) @@ -4675,13 +4752,13 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) } static -int addrconf_sysctl_disable(ctl_table *ctl, int write, +int addrconf_sysctl_disable(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; loff_t pos = *ppos; - ctl_table lctl; + struct ctl_table lctl; int ret; /* @@ -4703,7 +4780,7 @@ int addrconf_sysctl_disable(ctl_table *ctl, int write, static struct addrconf_sysctl_table { struct ctl_table_header *sysctl_header; - ctl_table addrconf_vars[DEVCONF_MAX+1]; + struct ctl_table addrconf_vars[DEVCONF_MAX+1]; } addrconf_sysctl __read_mostly = { .sysctl_header = NULL, .addrconf_vars = { diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 72104562c864..d2f87427244b 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -5,6 +5,7 @@ #include <linux/export.h> #include <net/ipv6.h> +#include <net/addrconf.h> #define IPV6_ADDR_SCOPE_TYPE(scope) ((scope) << 16) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index ab5c7ad482cd..a5ac969aeefe 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -49,6 +49,7 @@ #include <net/udp.h> #include <net/udplite.h> #include <net/tcp.h> +#include <net/ping.h> #include <net/protocol.h> #include <net/inet_common.h> #include <net/route.h> @@ -840,6 +841,9 @@ static int __init inet6_init(void) if (err) goto out_unregister_udplite_proto; + err = proto_register(&pingv6_prot, 1); + if (err) + goto out_unregister_ping_proto; /* We MUST register RAW sockets before we create the ICMP6, * IGMP6, or NDISC control sockets. @@ -930,6 +934,10 @@ static int __init inet6_init(void) if (err) goto ipv6_packet_fail; + err = pingv6_init(); + if (err) + goto pingv6_fail; + #ifdef CONFIG_SYSCTL err = ipv6_sysctl_register(); if (err) @@ -942,6 +950,8 @@ out: sysctl_fail: ipv6_packet_cleanup(); #endif +pingv6_fail: + pingv6_exit(); ipv6_packet_fail: tcpv6_exit(); tcpv6_fail: @@ -985,6 +995,8 @@ register_pernet_fail: rtnl_unregister_all(PF_INET6); out_sock_register_fail: rawv6_exit(); +out_unregister_ping_proto: + proto_unregister(&pingv6_prot); out_unregister_raw_proto: proto_unregister(&rawv6_prot); out_unregister_udplite_proto: diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 4b56cbbc7890..197e6f4a2b74 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -879,3 +879,30 @@ exit_f: return err; } EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl); + +void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, + __u16 srcp, __u16 destp, int bucket) +{ + struct ipv6_pinfo *np = inet6_sk(sp); + const struct in6_addr *dest, *src; + + dest = &np->daddr; + src = &np->rcv_saddr; + seq_printf(seq, + "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " + "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n", + bucket, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], srcp, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], destp, + sp->sk_state, + sk_wmem_alloc_get(sp), + sk_rmem_alloc_get(sp), + 0, 0L, 0, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), + 0, + sock_i_ino(sp), + atomic_read(&sp->sk_refcnt), sp, + atomic_read(&sp->sk_drops)); +} diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index c5e83fae4df4..140748debc4a 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -115,7 +115,7 @@ EXPORT_SYMBOL(ipv6_skip_exthdr); int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) { const unsigned char *nh = skb_network_header(skb); - int packet_len = skb->tail - skb->network_header; + int packet_len = skb_tail_pointer(skb) - skb_network_header(skb); struct ipv6_opt_hdr *hdr; int len; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index b4ff0a42b8c7..7cfc8d284870 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -57,6 +57,7 @@ #include <net/ipv6.h> #include <net/ip6_checksum.h> +#include <net/ping.h> #include <net/protocol.h> #include <net/raw.h> #include <net/rawv6.h> @@ -84,12 +85,18 @@ static inline struct sock *icmpv6_sk(struct net *net) static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { + /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */ + struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset); struct net *net = dev_net(skb->dev); if (type == ICMPV6_PKT_TOOBIG) ip6_update_pmtu(skb, net, info, 0, 0); else if (type == NDISC_REDIRECT) ip6_redirect(skb, net, 0, 0); + + if (!(type & ICMPV6_INFOMSG_MASK)) + if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) + ping_err(skb, offset, info); } static int icmpv6_rcv(struct sk_buff *skb); @@ -224,7 +231,8 @@ static bool opt_unrec(struct sk_buff *skb, __u32 offset) return (*op & 0xC0) == 0x80; } -static int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct icmp6hdr *thdr, int len) +int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, + struct icmp6hdr *thdr, int len) { struct sk_buff *skb; struct icmp6hdr *icmp6h; @@ -307,8 +315,8 @@ static void mip6_addr_swap(struct sk_buff *skb) static inline void mip6_addr_swap(struct sk_buff *skb) {} #endif -static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, - struct sock *sk, struct flowi6 *fl6) +struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, + struct sock *sk, struct flowi6 *fl6) { struct dst_entry *dst, *dst2; struct flowi6 fl2; @@ -391,7 +399,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) int err = 0; if ((u8 *)hdr < skb->head || - (skb->network_header + sizeof(*hdr)) > skb->tail) + (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) return; /* @@ -697,7 +705,8 @@ static int icmpv6_rcv(struct sk_buff *skb) skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, 0)); if (__skb_checksum_complete(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n", + LIMIT_NETDEBUG(KERN_DEBUG + "ICMPv6 checksum failed [%pI6c > %pI6c]\n", saddr, daddr); goto csum_error; } @@ -718,7 +727,7 @@ static int icmpv6_rcv(struct sk_buff *skb) break; case ICMPV6_ECHO_REPLY: - /* we couldn't care less */ + ping_rcv(skb); break; case ICMPV6_PKT_TOOBIG: @@ -967,7 +976,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err) EXPORT_SYMBOL(icmpv6_err_convert); #ifdef CONFIG_SYSCTL -ctl_table ipv6_icmp_table_template[] = { +struct ctl_table ipv6_icmp_table_template[] = { { .procname = "ratelimit", .data = &init_net.ipv6.sysctl.icmpv6_time, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 192dd1a0e188..bff3d821c7eb 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -632,6 +632,12 @@ insert_above: return ln; } +static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt) +{ + return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == + RTF_GATEWAY; +} + /* * Insert routing information in a node. */ @@ -646,6 +652,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, int add = (!info->nlh || (info->nlh->nlmsg_flags & NLM_F_CREATE)); int found = 0; + bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); ins = &fn->leaf; @@ -691,9 +698,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, * To avoid long list, we only had siblings if the * route have a gateway. */ - if (rt->rt6i_flags & RTF_GATEWAY && - !(rt->rt6i_flags & RTF_EXPIRES) && - !(iter->rt6i_flags & RTF_EXPIRES)) + if (rt_can_ecmp && + rt6_qualify_for_ecmp(iter)) rt->rt6i_nsiblings++; } @@ -715,7 +721,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, /* Find the first route that have the same metric */ sibling = fn->leaf; while (sibling) { - if (sibling->rt6i_metric == rt->rt6i_metric) { + if (sibling->rt6i_metric == rt->rt6i_metric && + rt6_qualify_for_ecmp(sibling)) { list_add_tail(&rt->rt6i_siblings, &sibling->rt6i_siblings); break; @@ -1625,27 +1632,28 @@ static int fib6_age(struct rt6_info *rt, void *arg) static DEFINE_SPINLOCK(fib6_gc_lock); -void fib6_run_gc(unsigned long expires, struct net *net) +void fib6_run_gc(unsigned long expires, struct net *net, bool force) { - if (expires != ~0UL) { + unsigned long now; + + if (force) { spin_lock_bh(&fib6_gc_lock); - gc_args.timeout = expires ? (int)expires : - net->ipv6.sysctl.ip6_rt_gc_interval; - } else { - if (!spin_trylock_bh(&fib6_gc_lock)) { - mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); - return; - } - gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval; + } else if (!spin_trylock_bh(&fib6_gc_lock)) { + mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); + return; } + gc_args.timeout = expires ? (int)expires : + net->ipv6.sysctl.ip6_rt_gc_interval; gc_args.more = icmp6_dst_gc(); fib6_clean_all(net, fib6_age, 0, NULL); + now = jiffies; + net->ipv6.ip6_rt_last_gc = now; if (gc_args.more) mod_timer(&net->ipv6.ip6_fib_timer, - round_jiffies(jiffies + round_jiffies(now + net->ipv6.sysctl.ip6_rt_gc_interval)); else del_timer(&net->ipv6.ip6_fib_timer); @@ -1654,7 +1662,7 @@ void fib6_run_gc(unsigned long expires, struct net *net) static void fib6_gc_timer_cb(unsigned long arg) { - fib6_run_gc(0, (struct net *)arg); + fib6_run_gc(0, (struct net *)arg, true); } static int __net_init fib6_net_init(struct net *net) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 71b766ee821d..a263b990ee11 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -98,6 +98,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_GRE | SKB_GSO_UDP_TUNNEL | + SKB_GSO_MPLS | SKB_GSO_TCPV6 | 0))) goto out; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d5d20cde8d92..6e3ddf806ec2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1098,11 +1098,12 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; } -static void ip6_append_data_mtu(int *mtu, +static void ip6_append_data_mtu(unsigned int *mtu, int *maxfraglen, unsigned int fragheaderlen, struct sk_buff *skb, - struct rt6_info *rt) + struct rt6_info *rt, + bool pmtuprobe) { if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { if (skb == NULL) { @@ -1114,7 +1115,9 @@ static void ip6_append_data_mtu(int *mtu, * this fragment is not first, the headers * space is regarded as data space. */ - *mtu = dst_mtu(rt->dst.path); + *mtu = min(*mtu, pmtuprobe ? + rt->dst.dev->mtu : + dst_mtu(rt->dst.path)); } *maxfraglen = ((*mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); @@ -1131,11 +1134,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, struct ipv6_pinfo *np = inet6_sk(sk); struct inet_cork *cork; struct sk_buff *skb, *skb_prev = NULL; - unsigned int maxfraglen, fragheaderlen; + unsigned int maxfraglen, fragheaderlen, mtu; int exthdrlen; int dst_exthdrlen; int hh_len; - int mtu; int copy; int err; int offset = 0; @@ -1292,7 +1294,9 @@ alloc_new_skb: /* update mtu and maxfraglen if necessary */ if (skb == NULL || skb_prev == NULL) ip6_append_data_mtu(&mtu, &maxfraglen, - fragheaderlen, skb, rt); + fragheaderlen, skb, rt, + np->pmtudisc == + IPV6_PMTUDISC_PROBE); skb_prev = skb; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 241fb8ad9fcf..03986d31fa41 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -259,10 +259,12 @@ static void __net_exit ip6mr_rules_exit(struct net *net) { struct mr6_table *mrt, *next; + rtnl_lock(); list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { list_del(&mrt->list); ip6mr_free_table(mrt); } + rtnl_unlock(); fib_rules_unregister(net->ipv6.mr6_rules_ops); } #else @@ -289,7 +291,10 @@ static int __net_init ip6mr_rules_init(struct net *net) static void __net_exit ip6mr_rules_exit(struct net *net) { + rtnl_lock(); ip6mr_free_table(net->ipv6.mrt6); + net->ipv6.mrt6 = NULL; + rtnl_unlock(); } #endif @@ -1319,7 +1324,7 @@ static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc, static int ip6mr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct mr6_table *mrt; struct mif_device *v; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index bfa6cc36ef2a..99cd65c715cd 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -999,6 +999,14 @@ static void mld_ifc_start_timer(struct inet6_dev *idev, int delay) in6_dev_hold(idev); } +static void mld_dad_start_timer(struct inet6_dev *idev, int delay) +{ + int tv = net_random() % delay; + + if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2)) + in6_dev_hold(idev); +} + /* * IGMP handling (alias multicast ICMPv6 messages) */ @@ -1343,8 +1351,9 @@ static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb, hdr->daddr = *daddr; } -static struct sk_buff *mld_newpack(struct net_device *dev, int size) +static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size) { + struct net_device *dev = idev->dev; struct net *net = dev_net(dev); struct sock *sk = net->ipv6.igmp_sk; struct sk_buff *skb; @@ -1369,7 +1378,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) skb_reserve(skb, hlen); - if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) { + if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) { /* <draft-ietf-magma-mld-source-05.txt>: * use unspecified address as the source address * when a valid link-local address is not available. @@ -1409,8 +1418,9 @@ static void mld_sendpack(struct sk_buff *skb) idev = __in6_dev_get(skb->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); - payload_len = (skb->tail - skb->network_header) - sizeof(*pip6); - mldlen = skb->tail - skb->transport_header; + payload_len = (skb_tail_pointer(skb) - skb_network_header(skb)) - + sizeof(*pip6); + mldlen = skb_tail_pointer(skb) - skb_transport_header(skb); pip6->payload_len = htons(payload_len); pmr->mld2r_cksum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen, @@ -1465,7 +1475,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, struct mld2_grec *pgr; if (!skb) - skb = mld_newpack(dev, dev->mtu); + skb = mld_newpack(pmc->idev, dev->mtu); if (!skb) return NULL; pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec)); @@ -1485,7 +1495,8 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted) { - struct net_device *dev = pmc->idev->dev; + struct inet6_dev *idev = pmc->idev; + struct net_device *dev = idev->dev; struct mld2_report *pmr; struct mld2_grec *pgr = NULL; struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; @@ -1514,7 +1525,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { if (skb) mld_sendpack(skb); - skb = mld_newpack(dev, dev->mtu); + skb = mld_newpack(idev, dev->mtu); } } first = 1; @@ -1541,7 +1552,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, pgr->grec_nsrcs = htons(scount); if (skb) mld_sendpack(skb); - skb = mld_newpack(dev, dev->mtu); + skb = mld_newpack(idev, dev->mtu); first = 1; scount = 0; } @@ -1596,8 +1607,8 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) struct sk_buff *skb = NULL; int type; + read_lock_bh(&idev->lock); if (!pmc) { - read_lock_bh(&idev->lock); for (pmc=idev->mc_list; pmc; pmc=pmc->next) { if (pmc->mca_flags & MAF_NOREPORT) continue; @@ -1609,7 +1620,6 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) skb = add_grec(skb, pmc, type, 0, 0); spin_unlock_bh(&pmc->mca_lock); } - read_unlock_bh(&idev->lock); } else { spin_lock_bh(&pmc->mca_lock); if (pmc->mca_sfcount[MCAST_EXCLUDE]) @@ -1619,6 +1629,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) skb = add_grec(skb, pmc, type, 0, 0); spin_unlock_bh(&pmc->mca_lock); } + read_unlock_bh(&idev->lock); if (skb) mld_sendpack(skb); } @@ -1814,6 +1825,46 @@ err_out: goto out; } +static void mld_resend_report(struct inet6_dev *idev) +{ + if (MLD_V1_SEEN(idev)) { + struct ifmcaddr6 *mcaddr; + read_lock_bh(&idev->lock); + for (mcaddr = idev->mc_list; mcaddr; mcaddr = mcaddr->next) { + if (!(mcaddr->mca_flags & MAF_NOREPORT)) + igmp6_send(&mcaddr->mca_addr, idev->dev, + ICMPV6_MGM_REPORT); + } + read_unlock_bh(&idev->lock); + } else { + mld_send_report(idev, NULL); + } +} + +void ipv6_mc_dad_complete(struct inet6_dev *idev) +{ + idev->mc_dad_count = idev->mc_qrv; + if (idev->mc_dad_count) { + mld_resend_report(idev); + idev->mc_dad_count--; + if (idev->mc_dad_count) + mld_dad_start_timer(idev, idev->mc_maxdelay); + } +} + +static void mld_dad_timer_expire(unsigned long data) +{ + struct inet6_dev *idev = (struct inet6_dev *)data; + + mld_resend_report(idev); + if (idev->mc_dad_count) { + idev->mc_dad_count--; + if (idev->mc_dad_count) + mld_dad_start_timer(idev, idev->mc_maxdelay); + } + __in6_dev_put(idev); +} + static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, const struct in6_addr *psfsrc) { @@ -2231,6 +2282,8 @@ void ipv6_mc_down(struct inet6_dev *idev) idev->mc_gq_running = 0; if (del_timer(&idev->mc_gq_timer)) __in6_dev_put(idev); + if (del_timer(&idev->mc_dad_timer)) + __in6_dev_put(idev); for (i = idev->mc_list; i; i=i->next) igmp6_group_dropped(i); @@ -2267,6 +2320,8 @@ void ipv6_mc_init_dev(struct inet6_dev *idev) idev->mc_ifc_count = 0; setup_timer(&idev->mc_ifc_timer, mld_ifc_timer_expire, (unsigned long)idev); + setup_timer(&idev->mc_dad_timer, mld_dad_timer_expire, + (unsigned long)idev); idev->mc_qrv = MLD_QRV_DEFAULT; idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL; idev->mc_v1_seen = 0; diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 0f9bdc5ee9f3..9ac01dc9402e 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -268,7 +268,8 @@ static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb, struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); const unsigned char *nh = skb_network_header(skb); - unsigned int packet_len = skb->tail - skb->network_header; + unsigned int packet_len = skb_tail_pointer(skb) - + skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; @@ -404,7 +405,8 @@ static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb, struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); const unsigned char *nh = skb_network_header(skb); - unsigned int packet_len = skb->tail - skb->network_header; + unsigned int packet_len = skb_tail_pointer(skb) - + skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index ca4ffcc287f1..79aa9652ed86 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -479,7 +479,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, if (ifp) { src_addr = solicited_addr; if (ifp->flags & IFA_F_OPTIMISTIC) - override = 0; + override = false; inc_opt |= ifp->idev->cnf.force_tllao; in6_ifa_put(ifp); } else { @@ -557,7 +557,7 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, } if (ipv6_addr_any(saddr)) - inc_opt = 0; + inc_opt = false; if (inc_opt) optlen += ndisc_opt_addr_space(dev); @@ -693,7 +693,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; - u32 ndoptlen = skb->tail - (skb->transport_header + + u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct nd_msg, opt)); struct ndisc_options ndopts; struct net_device *dev = skb->dev; @@ -790,7 +790,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && - inc != 0 && + inc && idev->nd_parms->proxy_delay != 0) { /* * for anycast or proxy, @@ -853,7 +853,7 @@ static void ndisc_recv_na(struct sk_buff *skb) const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; - u32 ndoptlen = skb->tail - (skb->transport_header + + u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct nd_msg, opt)); struct ndisc_options ndopts; struct net_device *dev = skb->dev; @@ -1069,7 +1069,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) __u8 * opt = (__u8 *)(ra_msg + 1); - optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg); + optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) - + sizeof(struct ra_msg); if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { ND_PRINTK(2, warn, "RA: source address is not link-local\n"); @@ -1346,7 +1347,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) u8 *hdr; struct ndisc_options ndopts; struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb); - u32 ndoptlen = skb->tail - (skb->transport_header + + u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct rd_msg, opt)); #ifdef CONFIG_IPV6_NDISC_NODETYPE @@ -1568,14 +1569,14 @@ int ndisc_rcv(struct sk_buff *skb) static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct inet6_dev *idev; switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); - fib6_run_gc(~0UL, net); + fib6_run_gc(0, net, false); idev = in6_dev_get(dev); if (!idev) break; @@ -1585,7 +1586,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, break; case NETDEV_DOWN: neigh_ifdown(&nd_tbl, dev); - fib6_run_gc(~0UL, net); + fib6_run_gc(0, net, false); break; case NETDEV_NOTIFY_PEERS: ndisc_send_unsol_na(dev); diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c index 60e9053bab05..47bff6107519 100644 --- a/net/ipv6/netfilter/ip6t_MASQUERADE.c +++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c @@ -71,7 +71,7 @@ static int device_cmp(struct nf_conn *ct, void *ifindex) static int masq_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - const struct net_device *dev = ptr; + const struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); if (event == NETDEV_DOWN) @@ -89,8 +89,10 @@ static int masq_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { struct inet6_ifaddr *ifa = ptr; + struct netdev_notifier_info info; - return masq_device_event(this, event, ifa->idev->dev); + netdev_notifier_info_init(&info, ifa->idev->dev); + return masq_device_event(this, event, &info); } static struct notifier_block masq_inet_notifier = { diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index c2e73e647e44..ab92a3673fbb 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -40,7 +40,8 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) u16 offset = sizeof(struct ipv6hdr); struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); - unsigned int packet_len = skb->tail - skb->network_header; + unsigned int packet_len = skb_tail_pointer(skb) - + skb_network_header(skb); int found_rhdr = 0; *nexthdr = &ipv6_hdr(skb)->nexthdr; diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c new file mode 100644 index 000000000000..18f19df4189f --- /dev/null +++ b/net/ipv6/ping.c @@ -0,0 +1,277 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * "Ping" sockets + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on ipv4/ping.c code. + * + * Authors: Lorenzo Colitti (IPv6 support) + * Vasiliy Kulikov / Openwall (IPv4 implementation, for Linux 2.6), + * Pavel Kankovsky (IPv4 implementation, for Linux 2.4.32) + * + */ + +#include <net/addrconf.h> +#include <net/ipv6.h> +#include <net/ip6_route.h> +#include <net/protocol.h> +#include <net/udp.h> +#include <net/transp_v6.h> +#include <net/ping.h> + +struct proto pingv6_prot = { + .name = "PINGv6", + .owner = THIS_MODULE, + .init = ping_init_sock, + .close = ping_close, + .connect = ip6_datagram_connect, + .disconnect = udp_disconnect, + .setsockopt = ipv6_setsockopt, + .getsockopt = ipv6_getsockopt, + .sendmsg = ping_v6_sendmsg, + .recvmsg = ping_recvmsg, + .bind = ping_bind, + .backlog_rcv = ping_queue_rcv_skb, + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, + .obj_size = sizeof(struct raw6_sock), +}; +EXPORT_SYMBOL_GPL(pingv6_prot); + +static struct inet_protosw pingv6_protosw = { + .type = SOCK_DGRAM, + .protocol = IPPROTO_ICMPV6, + .prot = &pingv6_prot, + .ops = &inet6_dgram_ops, + .no_check = UDP_CSUM_DEFAULT, + .flags = INET_PROTOSW_REUSE, +}; + + +/* Compatibility glue so we can support IPv6 when it's compiled as a module */ +static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +{ + return -EAFNOSUPPORT; +} +static int dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) +{ + return -EAFNOSUPPORT; +} +static int dummy_icmpv6_err_convert(u8 type, u8 code, int *err) +{ + return -EAFNOSUPPORT; +} +static void dummy_ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, + __be16 port, u32 info, u8 *payload) {} +static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, + const struct net_device *dev, int strict) +{ + return 0; +} + +int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) +{ + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct icmp6hdr user_icmph; + int addr_type; + struct in6_addr *daddr; + int iif = 0; + struct flowi6 fl6; + int err; + int hlimit; + struct dst_entry *dst; + struct rt6_info *rt; + struct pingfakehdr pfh; + + pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); + + err = ping_common_sendmsg(AF_INET6, msg, len, &user_icmph, + sizeof(user_icmph)); + if (err) + return err; + + if (msg->msg_name) { + struct sockaddr_in6 *u = (struct sockaddr_in6 *) msg->msg_name; + if (msg->msg_namelen < sizeof(struct sockaddr_in6) || + u->sin6_family != AF_INET6) { + return -EINVAL; + } + if (sk->sk_bound_dev_if && + sk->sk_bound_dev_if != u->sin6_scope_id) { + return -EINVAL; + } + daddr = &(u->sin6_addr); + iif = u->sin6_scope_id; + } else { + if (sk->sk_state != TCP_ESTABLISHED) + return -EDESTADDRREQ; + daddr = &np->daddr; + } + + if (!iif) + iif = sk->sk_bound_dev_if; + + addr_type = ipv6_addr_type(daddr); + if (__ipv6_addr_needs_scope_id(addr_type) && !iif) + return -EINVAL; + if (addr_type & IPV6_ADDR_MAPPED) + return -EINVAL; + + /* TODO: use ip6_datagram_send_ctl to get options from cmsg */ + + memset(&fl6, 0, sizeof(fl6)); + + fl6.flowi6_proto = IPPROTO_ICMPV6; + fl6.saddr = np->saddr; + fl6.daddr = *daddr; + fl6.fl6_icmp_type = user_icmph.icmp6_type; + fl6.fl6_icmp_code = user_icmph.icmp6_code; + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; + else if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->ucast_oif; + + dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, 1); + if (IS_ERR(dst)) + return PTR_ERR(dst); + rt = (struct rt6_info *) dst; + + np = inet6_sk(sk); + if (!np) + return -EBADF; + + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; + else if (!fl6.flowi6_oif) + fl6.flowi6_oif = np->ucast_oif; + + pfh.icmph.type = user_icmph.icmp6_type; + pfh.icmph.code = user_icmph.icmp6_code; + pfh.icmph.checksum = 0; + pfh.icmph.un.echo.id = inet->inet_sport; + pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence; + pfh.iov = msg->msg_iov; + pfh.wcheck = 0; + pfh.family = AF_INET6; + + if (ipv6_addr_is_multicast(&fl6.daddr)) + hlimit = np->mcast_hops; + else + hlimit = np->hop_limit; + if (hlimit < 0) + hlimit = ip6_dst_hoplimit(dst); + + lock_sock(sk); + err = ip6_append_data(sk, ping_getfrag, &pfh, len, + 0, hlimit, + np->tclass, NULL, &fl6, rt, + MSG_DONTWAIT, np->dontfrag); + + if (err) { + ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev, + ICMP6_MIB_OUTERRORS); + ip6_flush_pending_frames(sk); + } else { + err = icmpv6_push_pending_frames(sk, &fl6, + (struct icmp6hdr *) &pfh.icmph, + len); + } + release_sock(sk); + + if (err) + return err; + + return len; +} + +#ifdef CONFIG_PROC_FS +static void *ping_v6_seq_start(struct seq_file *seq, loff_t *pos) +{ + return ping_seq_start(seq, pos, AF_INET6); +} + +static int ping_v6_seq_show(struct seq_file *seq, void *v) +{ + if (v == SEQ_START_TOKEN) { + seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); + } else { + int bucket = ((struct ping_iter_state *) seq->private)->bucket; + struct inet_sock *inet = inet_sk(v); + __u16 srcp = ntohs(inet->inet_sport); + __u16 destp = ntohs(inet->inet_dport); + ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket); + } + return 0; +} + +static struct ping_seq_afinfo ping_v6_seq_afinfo = { + .name = "icmp6", + .family = AF_INET6, + .seq_fops = &ping_seq_fops, + .seq_ops = { + .start = ping_v6_seq_start, + .show = ping_v6_seq_show, + .next = ping_seq_next, + .stop = ping_seq_stop, + }, +}; + +static int __net_init ping_v6_proc_init_net(struct net *net) +{ + return ping_proc_register(net, &ping_v6_seq_afinfo); +} + +static void __net_init ping_v6_proc_exit_net(struct net *net) +{ + return ping_proc_unregister(net, &ping_v6_seq_afinfo); +} + +static struct pernet_operations ping_v6_net_ops = { + .init = ping_v6_proc_init_net, + .exit = ping_v6_proc_exit_net, +}; +#endif + +int __init pingv6_init(void) +{ +#ifdef CONFIG_PROC_FS + int ret = register_pernet_subsys(&ping_v6_net_ops); + if (ret) + return ret; +#endif + pingv6_ops.ipv6_recv_error = ipv6_recv_error; + pingv6_ops.ip6_datagram_recv_ctl = ip6_datagram_recv_ctl; + pingv6_ops.icmpv6_err_convert = icmpv6_err_convert; + pingv6_ops.ipv6_icmp_error = ipv6_icmp_error; + pingv6_ops.ipv6_chk_addr = ipv6_chk_addr; + return inet6_register_protosw(&pingv6_protosw); +} + +/* This never gets called because it's not possible to unload the ipv6 module, + * but just in case. + */ +void pingv6_exit(void) +{ + pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error; + pingv6_ops.ip6_datagram_recv_ctl = dummy_ip6_datagram_recv_ctl; + pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert; + pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error; + pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr; +#ifdef CONFIG_PROC_FS + unregister_pernet_subsys(&ping_v6_net_ops); +#endif + inet6_unregister_protosw(&pingv6_protosw); +} diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index eedff8ccded5..c45f7a5c36e9 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1132,7 +1132,8 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb != NULL) - amount = skb->tail - skb->transport_header; + amount = skb_tail_pointer(skb) - + skb_transport_header(skb); spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(amount, (int __user *)arg); } @@ -1226,45 +1227,16 @@ struct proto rawv6_prot = { }; #ifdef CONFIG_PROC_FS -static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) -{ - struct ipv6_pinfo *np = inet6_sk(sp); - const struct in6_addr *dest, *src; - __u16 destp, srcp; - - dest = &np->daddr; - src = &np->rcv_saddr; - destp = 0; - srcp = inet_sk(sp)->inet_num; - seq_printf(seq, - "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n", - i, - src->s6_addr32[0], src->s6_addr32[1], - src->s6_addr32[2], src->s6_addr32[3], srcp, - dest->s6_addr32[0], dest->s6_addr32[1], - dest->s6_addr32[2], dest->s6_addr32[3], destp, - sp->sk_state, - sk_wmem_alloc_get(sp), - sk_rmem_alloc_get(sp), - 0, 0L, 0, - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), - 0, - sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); -} - static int raw6_seq_show(struct seq_file *seq, void *v) { - if (v == SEQ_START_TOKEN) - seq_printf(seq, - " sl " - "local_address " - "remote_address " - "st tx_queue rx_queue tr tm->when retrnsmt" - " uid timeout inode ref pointer drops\n"); - else - raw6_sock_seq_show(seq, v, raw_seq_private(seq)->bucket); + if (v == SEQ_START_TOKEN) { + seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); + } else { + struct sock *sp = v; + __u16 srcp = inet_sk(sp)->inet_num; + ip6_dgram_sock_seq_show(seq, v, srcp, 0, + raw_seq_private(seq)->bucket); + } return 0; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ad0aa6b0b86a..b70f8979003b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -65,6 +65,12 @@ #include <linux/sysctl.h> #endif +enum rt6_nud_state { + RT6_NUD_FAIL_HARD = -2, + RT6_NUD_FAIL_SOFT = -1, + RT6_NUD_SUCCEED = 1 +}; + static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, const struct in6_addr *dest); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); @@ -83,6 +89,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); +static int rt6_score_route(struct rt6_info *rt, int oif, int strict); #ifdef CONFIG_IPV6_ROUTE_INFO static struct rt6_info *rt6_add_route_info(struct net *net, @@ -394,7 +401,8 @@ static int rt6_info_hash_nhsfn(unsigned int candidate_count, } static struct rt6_info *rt6_multipath_select(struct rt6_info *match, - struct flowi6 *fl6) + struct flowi6 *fl6, int oif, + int strict) { struct rt6_info *sibling, *next_sibling; int route_choosen; @@ -408,6 +416,8 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match, &match->rt6i_siblings, rt6i_siblings) { route_choosen--; if (route_choosen == 0) { + if (rt6_score_route(sibling, oif, strict) < 0) + break; match = sibling; break; } @@ -527,26 +537,29 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif) return 0; } -static inline bool rt6_check_neigh(struct rt6_info *rt) +static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) { struct neighbour *neigh; - bool ret = false; + enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) - return true; + return RT6_NUD_SUCCEED; rcu_read_lock_bh(); neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); if (neigh) { read_lock(&neigh->lock); if (neigh->nud_state & NUD_VALID) - ret = true; + ret = RT6_NUD_SUCCEED; #ifdef CONFIG_IPV6_ROUTER_PREF else if (!(neigh->nud_state & NUD_FAILED)) - ret = true; + ret = RT6_NUD_SUCCEED; #endif read_unlock(&neigh->lock); + } else { + ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? + RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT; } rcu_read_unlock_bh(); @@ -560,43 +573,52 @@ static int rt6_score_route(struct rt6_info *rt, int oif, m = rt6_check_dev(rt, oif); if (!m && (strict & RT6_LOOKUP_F_IFACE)) - return -1; + return RT6_NUD_FAIL_HARD; #ifdef CONFIG_IPV6_ROUTER_PREF m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; #endif - if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE)) - return -1; + if (strict & RT6_LOOKUP_F_REACHABLE) { + int n = rt6_check_neigh(rt); + if (n < 0) + return n; + } return m; } static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, - int *mpri, struct rt6_info *match) + int *mpri, struct rt6_info *match, + bool *do_rr) { int m; + bool match_do_rr = false; if (rt6_check_expired(rt)) goto out; m = rt6_score_route(rt, oif, strict); - if (m < 0) + if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) { + match_do_rr = true; + m = 0; /* lowest valid score */ + } else if (m < 0) { goto out; + } + + if (strict & RT6_LOOKUP_F_REACHABLE) + rt6_probe(rt); if (m > *mpri) { - if (strict & RT6_LOOKUP_F_REACHABLE) - rt6_probe(match); + *do_rr = match_do_rr; *mpri = m; match = rt; - } else if (strict & RT6_LOOKUP_F_REACHABLE) { - rt6_probe(rt); } - out: return match; } static struct rt6_info *find_rr_leaf(struct fib6_node *fn, struct rt6_info *rr_head, - u32 metric, int oif, int strict) + u32 metric, int oif, int strict, + bool *do_rr) { struct rt6_info *rt, *match; int mpri = -1; @@ -604,10 +626,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, match = NULL; for (rt = rr_head; rt && rt->rt6i_metric == metric; rt = rt->dst.rt6_next) - match = find_match(rt, oif, strict, &mpri, match); + match = find_match(rt, oif, strict, &mpri, match, do_rr); for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; rt = rt->dst.rt6_next) - match = find_match(rt, oif, strict, &mpri, match); + match = find_match(rt, oif, strict, &mpri, match, do_rr); return match; } @@ -616,15 +638,16 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) { struct rt6_info *match, *rt0; struct net *net; + bool do_rr = false; rt0 = fn->rr_ptr; if (!rt0) fn->rr_ptr = rt0 = fn->leaf; - match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); + match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict, + &do_rr); - if (!match && - (strict & RT6_LOOKUP_F_REACHABLE)) { + if (do_rr) { struct rt6_info *next = rt0->dst.rt6_next; /* no entries matched; do round-robin */ @@ -743,7 +766,7 @@ restart: rt = fn->leaf; rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) - rt = rt6_multipath_select(rt, fl6); + rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags); BACKTRACK(net, &fl6->saddr); out: dst_use(&rt->dst, jiffies); @@ -875,8 +898,8 @@ restart_2: restart: rt = rt6_select(fn, oif, strict | reachable); - if (rt->rt6i_nsiblings && oif == 0) - rt = rt6_multipath_select(rt, fl6); + if (rt->rt6i_nsiblings) + rt = rt6_multipath_select(rt, fl6, oif, strict | reachable); BACKTRACK(net, &fl6->saddr); if (rt == net->ipv6.ip6_null_entry || rt->rt6i_flags & RTF_CACHE) @@ -1074,10 +1097,13 @@ static void ip6_link_failure(struct sk_buff *skb) rt = (struct rt6_info *) skb_dst(skb); if (rt) { - if (rt->rt6i_flags & RTF_CACHE) - rt6_update_expires(rt, 0); - else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) + if (rt->rt6i_flags & RTF_CACHE) { + dst_hold(&rt->dst); + if (ip6_del_rt(rt)) + dst_free(&rt->dst); + } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) { rt->rt6i_node->fn_sernum = -1; + } } } @@ -1285,7 +1311,6 @@ static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), static int ip6_dst_gc(struct dst_ops *ops) { - unsigned long now = jiffies; struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; @@ -1295,13 +1320,12 @@ static int ip6_dst_gc(struct dst_ops *ops) int entries; entries = dst_entries_get_fast(ops); - if (time_after(rt_last_gc + rt_min_interval, now) && + if (time_after(rt_last_gc + rt_min_interval, jiffies) && entries <= rt_max_size) goto out; net->ipv6.ip6_rt_gc_expire++; - fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); - net->ipv6.ip6_rt_last_gc = now; + fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size); entries = dst_entries_get_slow(ops); if (entries < ops->gc_thresh) net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; @@ -1649,7 +1673,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu int optlen, on_link; u8 *lladdr; - optlen = skb->tail - skb->transport_header; + optlen = skb_tail_pointer(skb) - skb_transport_header(skb); optlen -= sizeof(*msg); if (optlen < 0) { @@ -2681,9 +2705,9 @@ errout: } static int ip6_route_dev_notify(struct notifier_block *this, - unsigned long event, void *data) + unsigned long event, void *ptr) { - struct net_device *dev = (struct net_device *)data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { @@ -2790,7 +2814,7 @@ static const struct file_operations rt6_stats_seq_fops = { #ifdef CONFIG_SYSCTL static -int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, +int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net *net; @@ -2801,11 +2825,11 @@ int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, net = (struct net *)ctl->extra1; delay = net->ipv6.sysctl.flush_delay; proc_dointvec(ctl, write, buffer, lenp, ppos); - fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); + fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); return 0; } -ctl_table ipv6_route_table_template[] = { +struct ctl_table ipv6_route_table_template[] = { { .procname = "flush", .data = &init_net.ipv6.sysctl.flush_delay, diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 335363478bbf..a3437a4cd07e 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -466,14 +466,14 @@ isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t) static void ipip6_tunnel_uninit(struct net_device *dev) { - struct net *net = dev_net(dev); - struct sit_net *sitn = net_generic(net, sit_net_id); + struct ip_tunnel *tunnel = netdev_priv(dev); + struct sit_net *sitn = net_generic(tunnel->net, sit_net_id); if (dev == sitn->fb_tunnel_dev) { RCU_INIT_POINTER(sitn->tunnels_wc[0], NULL); } else { - ipip6_tunnel_unlink(sitn, netdev_priv(dev)); - ipip6_tunnel_del_prl(netdev_priv(dev), NULL); + ipip6_tunnel_unlink(sitn, tunnel); + ipip6_tunnel_del_prl(tunnel, NULL); } dev_put(dev); } @@ -577,6 +577,10 @@ static int ipip6_rcv(struct sk_buff *skb) if (tunnel != NULL) { struct pcpu_tstats *tstats; + if (tunnel->parms.iph.protocol != IPPROTO_IPV6 && + tunnel->parms.iph.protocol != 0) + goto out; + secpath_reset(skb); skb->mac_header = skb->network_header; skb_reset_network_header(skb); @@ -589,7 +593,7 @@ static int ipip6_rcv(struct sk_buff *skb) tunnel->dev->stats.rx_errors++; goto out; } - } else { + } else if (!(tunnel->dev->flags&IFF_POINTOPOINT)) { if (is_spoofed_6rd(tunnel, iph->saddr, &ipv6_hdr(skb)->saddr) || is_spoofed_6rd(tunnel, iph->daddr, @@ -617,6 +621,8 @@ static int ipip6_rcv(struct sk_buff *skb) tstats->rx_packets++; tstats->rx_bytes += skb->len; + if (tunnel->net != dev_net(tunnel->dev)) + skb_scrub_packet(skb); netif_rx(skb); return 0; @@ -629,6 +635,40 @@ out: return 0; } +static const struct tnl_ptk_info tpi = { + /* no tunnel info required for ipip. */ + .proto = htons(ETH_P_IP), +}; + +static int ipip_rcv(struct sk_buff *skb) +{ + const struct iphdr *iph; + struct ip_tunnel *tunnel; + + if (iptunnel_pull_header(skb, 0, tpi.proto)) + goto drop; + + iph = ip_hdr(skb); + + tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, + iph->saddr, iph->daddr); + if (tunnel != NULL) { + if (tunnel->parms.iph.protocol != IPPROTO_IPIP && + tunnel->parms.iph.protocol != 0) + goto drop; + + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); + } + + return 1; + +drop: + kfree_skb(skb); + return 0; +} + /* * If the IPv6 address comes from 6rd / 6to4 (RFC 3056) addr space this function * stores the embedded IPv4 address in v4dst and returns true. @@ -690,13 +730,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, __be16 df = tiph->frag_off; struct rtable *rt; /* Route to the other host */ struct net_device *tdev; /* Device to other host */ - struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst = tiph->daddr; struct flowi4 fl4; int mtu; const struct in6_addr *addr6; int addr_type; + u8 ttl; + int err; if (skb->protocol != htons(ETH_P_IPV6)) goto tx_error; @@ -764,7 +805,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, goto tx_error; } - rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, + rt = ip_route_output_ports(tunnel->net, &fl4, NULL, dst, tiph->saddr, 0, 0, IPPROTO_IPV6, RT_TOS(tos), @@ -819,6 +860,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, tunnel->err_count = 0; } + if (tunnel->net != dev_net(dev)) + skb_scrub_packet(skb); + /* * Okay, now see if we can stuff it in the buffer as-is. */ @@ -839,34 +883,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, skb = new_skb; iph6 = ipv6_hdr(skb); } - - skb->transport_header = skb->network_header; - skb_push(skb, sizeof(struct iphdr)); - skb_reset_network_header(skb); - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - IPCB(skb)->flags = 0; - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - - /* - * Push down and install the IPIP header. - */ - - iph = ip_hdr(skb); - iph->version = 4; - iph->ihl = sizeof(struct iphdr)>>2; - iph->frag_off = df; - iph->protocol = IPPROTO_IPV6; - iph->tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); - iph->daddr = fl4.daddr; - iph->saddr = fl4.saddr; - - if ((iph->ttl = tiph->ttl) == 0) - iph->ttl = iph6->hop_limit; - - skb->ip_summed = CHECKSUM_NONE; - ip_select_ident(iph, skb_dst(skb), NULL); - iptunnel_xmit(skb, dev); + ttl = tiph->ttl; + if (ttl == 0) + ttl = iph6->hop_limit; + tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); + + err = iptunnel_xmit(dev_net(dev), rt, skb, fl4.saddr, fl4.daddr, + IPPROTO_IPV6, tos, ttl, df); + iptunnel_xmit_stats(err, &dev->stats, dev->tstats); return NETDEV_TX_OK; tx_error_icmp: @@ -877,6 +901,43 @@ tx_error: return NETDEV_TX_OK; } +static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + const struct iphdr *tiph = &tunnel->parms.iph; + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP); + return NETDEV_TX_OK; +} + +static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + switch (skb->protocol) { + case htons(ETH_P_IP): + ipip_tunnel_xmit(skb, dev); + break; + case htons(ETH_P_IPV6): + ipip6_tunnel_xmit(skb, dev); + break; + default: + goto tx_err; + } + + return NETDEV_TX_OK; + +tx_err: + dev->stats.tx_errors++; + dev_kfree_skb(skb); + return NETDEV_TX_OK; + +} + static void ipip6_tunnel_bind_dev(struct net_device *dev) { struct net_device *tdev = NULL; @@ -888,7 +949,8 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) iph = &tunnel->parms.iph; if (iph->daddr) { - struct rtable *rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, + struct rtable *rt = ip_route_output_ports(tunnel->net, &fl4, + NULL, iph->daddr, iph->saddr, 0, 0, IPPROTO_IPV6, @@ -903,7 +965,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) } if (!tdev && tunnel->parms.link) - tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); + tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link); if (tdev) { dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); @@ -916,7 +978,7 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) { - struct net *net = dev_net(t->dev); + struct net *net = t->net; struct sit_net *sitn = net_generic(net, sit_net_id); ipip6_tunnel_unlink(sitn, t); @@ -1027,7 +1089,11 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) goto done; err = -EINVAL; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPV6 || + if (p.iph.protocol != IPPROTO_IPV6 && + p.iph.protocol != IPPROTO_IPIP && + p.iph.protocol != 0) + goto done; + if (p.iph.version != 4 || p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) goto done; if (p.iph.ttl) @@ -1164,7 +1230,7 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) static const struct net_device_ops ipip6_netdev_ops = { .ndo_uninit = ipip6_tunnel_uninit, - .ndo_start_xmit = ipip6_tunnel_xmit, + .ndo_start_xmit = sit_tunnel_xmit, .ndo_do_ioctl = ipip6_tunnel_ioctl, .ndo_change_mtu = ipip6_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, @@ -1188,7 +1254,6 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; dev->iflink = 0; dev->addr_len = 4; - dev->features |= NETIF_F_NETNS_LOCAL; dev->features |= NETIF_F_LLTX; } @@ -1197,6 +1262,7 @@ static int ipip6_tunnel_init(struct net_device *dev) struct ip_tunnel *tunnel = netdev_priv(dev); tunnel->dev = dev; + tunnel->net = dev_net(dev); memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); @@ -1217,6 +1283,7 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) struct sit_net *sitn = net_generic(net, sit_net_id); tunnel->dev = dev; + tunnel->net = dev_net(dev); strcpy(tunnel->parms.name, dev->name); iph->version = 4; @@ -1232,6 +1299,22 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) return 0; } +static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + u8 proto; + + if (!data || !data[IFLA_IPTUN_PROTO]) + return 0; + + proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); + if (proto != IPPROTO_IPV6 && + proto != IPPROTO_IPIP && + proto != 0) + return -EINVAL; + + return 0; +} + static void ipip6_netlink_parms(struct nlattr *data[], struct ip_tunnel_parm *parms) { @@ -1268,6 +1351,10 @@ static void ipip6_netlink_parms(struct nlattr *data[], if (data[IFLA_IPTUN_FLAGS]) parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); + + if (data[IFLA_IPTUN_PROTO]) + parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]); + } #ifdef CONFIG_IPV6_SIT_6RD @@ -1339,9 +1426,9 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev, static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct ip_tunnel *t; + struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm p; - struct net *net = dev_net(dev); + struct net *net = t->net; struct sit_net *sitn = net_generic(net, sit_net_id); #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd ip6rd; @@ -1391,6 +1478,8 @@ static size_t ipip6_get_size(const struct net_device *dev) nla_total_size(1) + /* IFLA_IPTUN_FLAGS */ nla_total_size(2) + + /* IFLA_IPTUN_PROTO */ + nla_total_size(1) + #ifdef CONFIG_IPV6_SIT_6RD /* IFLA_IPTUN_6RD_PREFIX */ nla_total_size(sizeof(struct in6_addr)) + @@ -1416,6 +1505,7 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, !!(parm->iph.frag_off & htons(IP_DF))) || + nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) || nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags)) goto nla_put_failure; @@ -1445,6 +1535,7 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_TOS] = { .type = NLA_U8 }, [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 }, [IFLA_IPTUN_FLAGS] = { .type = NLA_U16 }, + [IFLA_IPTUN_PROTO] = { .type = NLA_U8 }, #ifdef CONFIG_IPV6_SIT_6RD [IFLA_IPTUN_6RD_PREFIX] = { .len = sizeof(struct in6_addr) }, [IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NLA_U32 }, @@ -1459,6 +1550,7 @@ static struct rtnl_link_ops sit_link_ops __read_mostly = { .policy = ipip6_policy, .priv_size = sizeof(struct ip_tunnel), .setup = ipip6_tunnel_setup, + .validate = ipip6_validate, .newlink = ipip6_newlink, .changelink = ipip6_changelink, .get_size = ipip6_get_size, @@ -1471,10 +1563,22 @@ static struct xfrm_tunnel sit_handler __read_mostly = { .priority = 1, }; +static struct xfrm_tunnel ipip_handler __read_mostly = { + .handler = ipip_rcv, + .err_handler = ipip6_err, + .priority = 2, +}; + static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head) { + struct net *net = dev_net(sitn->fb_tunnel_dev); + struct net_device *dev, *aux; int prio; + for_each_netdev_safe(net, dev, aux) + if (dev->rtnl_link_ops == &sit_link_ops) + unregister_netdevice_queue(dev, head); + for (prio = 1; prio < 4; prio++) { int h; for (h = 0; h < HASH_SIZE; h++) { @@ -1482,7 +1586,12 @@ static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_hea t = rtnl_dereference(sitn->tunnels[prio][h]); while (t != NULL) { - unregister_netdevice_queue(t->dev, head); + /* If dev is in the same netns, it has already + * been added to the list by the previous loop. + */ + if (dev_net(t->dev) != net) + unregister_netdevice_queue(t->dev, + head); t = rtnl_dereference(t->next); } } @@ -1507,6 +1616,10 @@ static int __net_init sit_init_net(struct net *net) goto err_alloc_dev; } dev_net_set(sitn->fb_tunnel_dev, net); + /* FB netdevice is special: we have one, and only one per netns. + * Allowing to move it to another netns is clearly unsafe. + */ + sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); if (err) @@ -1553,6 +1666,7 @@ static void __exit sit_cleanup(void) { rtnl_link_unregister(&sit_link_ops); xfrm4_tunnel_deregister(&sit_handler, AF_INET6); + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); unregister_pernet_device(&sit_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ @@ -1569,9 +1683,14 @@ static int __init sit_init(void) return err; err = xfrm4_tunnel_register(&sit_handler, AF_INET6); if (err < 0) { - pr_info("%s: can't add protocol\n", __func__); + pr_info("%s: can't register ip6ip4\n", __func__); goto xfrm_tunnel_failed; } + err = xfrm4_tunnel_register(&ipip_handler, AF_INET); + if (err < 0) { + pr_info("%s: can't register ip4ip4\n", __func__); + goto xfrm_tunnel4_failed; + } err = rtnl_link_register(&sit_link_ops); if (err < 0) goto rtnl_link_failed; @@ -1580,6 +1699,8 @@ out: return err; rtnl_link_failed: + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); +xfrm_tunnel4_failed: xfrm4_tunnel_deregister(&sit_handler, AF_INET6); xfrm_tunnel_failed: unregister_pernet_device(&sit_net_ops); diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index e85c48bd404f..107b2f1d90ae 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -16,7 +16,7 @@ #include <net/addrconf.h> #include <net/inet_frag.h> -static ctl_table ipv6_table_template[] = { +static struct ctl_table ipv6_table_template[] = { { .procname = "bindv6only", .data = &init_net.ipv6.sysctl.bindv6only, @@ -27,7 +27,7 @@ static ctl_table ipv6_table_template[] = { { } }; -static ctl_table ipv6_rotable[] = { +static struct ctl_table ipv6_rotable[] = { { .procname = "mld_max_msf", .data = &sysctl_mld_max_msf, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0a17ed9eaf39..6e1649d58533 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -63,6 +63,7 @@ #include <net/inet_common.h> #include <net/secure_seq.h> #include <net/tcp_memcontrol.h> +#include <net/busy_poll.h> #include <asm/uaccess.h> @@ -1498,6 +1499,7 @@ process: if (sk_filter(sk, skb)) goto discard_and_relse; + sk_mark_napi_id(sk, skb); skb->dev = NULL; bh_lock_sock_nested(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 42923b14dfa6..f4058150262b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -46,6 +46,7 @@ #include <net/ip6_checksum.h> #include <net/xfrm.h> #include <net/inet6_hashtables.h> +#include <net/busy_poll.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -841,7 +842,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, */ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk != NULL) { - int ret = udpv6_queue_rcv_skb(sk, skb); + int ret; + + sk_mark_napi_id(sk, skb); + ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); /* a return value > 0 means to resubmit the input, but @@ -955,11 +959,16 @@ static int udp_v6_push_pending_frames(struct sock *sk) struct udphdr *uh; struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); - struct flowi6 *fl6 = &inet->cork.fl.u.ip6; + struct flowi6 *fl6; int err = 0; int is_udplite = IS_UDPLITE(sk); __wsum csum = 0; + if (up->pending == AF_INET) + return udp_push_pending_frames(sk); + + fl6 = &inet->cork.fl.u.ip6; + /* Grab the skbuff where UDP header space exists. */ if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) goto out; @@ -1359,48 +1368,17 @@ static const struct inet6_protocol udpv6_protocol = { /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS - -static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket) -{ - struct inet_sock *inet = inet_sk(sp); - struct ipv6_pinfo *np = inet6_sk(sp); - const struct in6_addr *dest, *src; - __u16 destp, srcp; - - dest = &np->daddr; - src = &np->rcv_saddr; - destp = ntohs(inet->inet_dport); - srcp = ntohs(inet->inet_sport); - seq_printf(seq, - "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " - "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %d\n", - bucket, - src->s6_addr32[0], src->s6_addr32[1], - src->s6_addr32[2], src->s6_addr32[3], srcp, - dest->s6_addr32[0], dest->s6_addr32[1], - dest->s6_addr32[2], dest->s6_addr32[3], destp, - sp->sk_state, - sk_wmem_alloc_get(sp), - sk_rmem_alloc_get(sp), - 0, 0L, 0, - from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), - 0, - sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp, - atomic_read(&sp->sk_drops)); -} - int udp6_seq_show(struct seq_file *seq, void *v) { - if (v == SEQ_START_TOKEN) - seq_printf(seq, - " sl " - "local_address " - "remote_address " - "st tx_queue rx_queue tr tm->when retrnsmt" - " uid timeout inode ref pointer drops\n"); - else - udp6_sock_seq_show(seq, v, ((struct udp_iter_state *)seq->private)->bucket); + if (v == SEQ_START_TOKEN) { + seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); + } else { + int bucket = ((struct udp_iter_state *)seq->private)->bucket; + struct inet_sock *inet = inet_sk(v); + __u16 srcp = ntohs(inet->inet_sport); + __u16 destp = ntohs(inet->inet_dport); + ip6_dgram_sock_seq_show(seq, v, srcp, destp, bucket); + } return 0; } diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index d3cfaf9c7a08..5d1b8d7ac993 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -64,7 +64,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | - SKB_GSO_GRE) || + SKB_GSO_GRE | + SKB_GSO_MPLS) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index f547a47d381c..7a1e0fc1bd4d 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -330,7 +330,7 @@ static __inline__ void __ipxitf_put(struct ipx_interface *intrfc) static int ipxitf_device_event(struct notifier_block *notifier, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct ipx_interface *i, *tmp; if (!net_eq(dev_net(dev), &init_net)) diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index d14152e866d9..ffcec225b5d9 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -44,12 +44,12 @@ static int irlan_eth_open(struct net_device *dev); static int irlan_eth_close(struct net_device *dev); static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, struct net_device *dev); -static void irlan_eth_set_multicast_list( struct net_device *dev); +static void irlan_eth_set_multicast_list(struct net_device *dev); static const struct net_device_ops irlan_eth_netdev_ops = { - .ndo_open = irlan_eth_open, - .ndo_stop = irlan_eth_close, - .ndo_start_xmit = irlan_eth_xmit, + .ndo_open = irlan_eth_open, + .ndo_stop = irlan_eth_close, + .ndo_start_xmit = irlan_eth_xmit, .ndo_set_rx_mode = irlan_eth_set_multicast_list, .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, @@ -110,7 +110,7 @@ static int irlan_eth_open(struct net_device *dev) { struct irlan_cb *self = netdev_priv(dev); - IRDA_DEBUG(2, "%s()\n", __func__ ); + IRDA_DEBUG(2, "%s()\n", __func__); /* Ready to play! */ netif_stop_queue(dev); /* Wait until data link is ready */ @@ -137,7 +137,7 @@ static int irlan_eth_close(struct net_device *dev) { struct irlan_cb *self = netdev_priv(dev); - IRDA_DEBUG(2, "%s()\n", __func__ ); + IRDA_DEBUG(2, "%s()\n", __func__); /* Stop device */ netif_stop_queue(dev); @@ -310,35 +310,32 @@ static void irlan_eth_set_multicast_list(struct net_device *dev) { struct irlan_cb *self = netdev_priv(dev); - IRDA_DEBUG(2, "%s()\n", __func__ ); + IRDA_DEBUG(2, "%s()\n", __func__); /* Check if data channel has been connected yet */ if (self->client.state != IRLAN_DATA) { - IRDA_DEBUG(1, "%s(), delaying!\n", __func__ ); + IRDA_DEBUG(1, "%s(), delaying!\n", __func__); return; } if (dev->flags & IFF_PROMISC) { /* Enable promiscuous mode */ IRDA_WARNING("Promiscuous mode not implemented by IrLAN!\n"); - } - else if ((dev->flags & IFF_ALLMULTI) || + } else if ((dev->flags & IFF_ALLMULTI) || netdev_mc_count(dev) > HW_MAX_ADDRS) { /* Disable promiscuous mode, use normal mode. */ - IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__ ); + IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__); /* hardware_set_filter(NULL); */ irlan_set_multicast_filter(self, TRUE); - } - else if (!netdev_mc_empty(dev)) { - IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__ ); + } else if (!netdev_mc_empty(dev)) { + IRDA_DEBUG(4, "%s(), Setting multicast filter\n", __func__); /* Walk the address list, and load the filter */ /* hardware_set_filter(dev->mc_list); */ irlan_set_multicast_filter(self, TRUE); - } - else { - IRDA_DEBUG(4, "%s(), Clearing multicast filter\n", __func__ ); + } else { + IRDA_DEBUG(4, "%s(), Clearing multicast filter\n", __func__); irlan_set_multicast_filter(self, FALSE); } diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c index de73f6496db5..d6a59651767a 100644 --- a/net/irda/irsysctl.c +++ b/net/irda/irsysctl.c @@ -73,7 +73,7 @@ static int min_lap_keepalive_time = 100; /* 100us */ /* For other sysctl, I've no idea of the range. Maybe Dag could help * us on that - Jean II */ -static int do_devname(ctl_table *table, int write, +static int do_devname(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -90,7 +90,7 @@ static int do_devname(ctl_table *table, int write, } -static int do_discovery(ctl_table *table, int write, +static int do_discovery(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -111,7 +111,7 @@ static int do_discovery(ctl_table *table, int write, } /* One file */ -static ctl_table irda_table[] = { +static struct ctl_table irda_table[] = { { .procname = "discovery", .data = &sysctl_discovery, diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index ae691651b721..168aff5e60de 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -2293,7 +2293,7 @@ out_unlock: static int afiucv_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *event_dev = (struct net_device *)ptr; + struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); struct sock *sk; struct iucv_sock *iucv; diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 4fe76ff214c2..cd5b8ec9be04 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -621,7 +621,7 @@ static void iucv_disable(void) put_online_cpus(); } -static int __cpuinit iucv_cpu_notify(struct notifier_block *self, +static int iucv_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { cpumask_t cpumask; diff --git a/net/key/af_key.c b/net/key/af_key.c index 9da862070dd8..ab8bd2cabfa0 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2081,6 +2081,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy * pol->sadb_x_policy_type = IPSEC_POLICY_NONE; } pol->sadb_x_policy_dir = dir+1; + pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = xp->index; pol->sadb_x_policy_priority = xp->priority; @@ -3137,7 +3138,9 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; pol->sadb_x_policy_dir = XFRM_POLICY_OUT + 1; + pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = xp->index; + pol->sadb_x_policy_priority = xp->priority; /* Set sadb_comb's. */ if (x->id.proto == IPPROTO_AH) @@ -3525,6 +3528,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; pol->sadb_x_policy_dir = dir + 1; + pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = 0; pol->sadb_x_policy_priority = 0; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 6984c3a353cd..feae495a0a30 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -414,10 +414,7 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff * if (L2TP_SKB_CB(skb)->has_seq) { /* Bump our Nr */ session->nr++; - if (tunnel->version == L2TP_HDR_VER_2) - session->nr &= 0xffff; - else - session->nr &= 0xffffff; + session->nr &= session->nr_max; l2tp_dbg(session, L2TP_MSG_SEQ, "%s: updated nr to %hu\n", session->name, session->nr); @@ -542,6 +539,84 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk, return __skb_checksum_complete(skb); } +static int l2tp_seq_check_rx_window(struct l2tp_session *session, u32 nr) +{ + u32 nws; + + if (nr >= session->nr) + nws = nr - session->nr; + else + nws = (session->nr_max + 1) - (session->nr - nr); + + return nws < session->nr_window_size; +} + +/* If packet has sequence numbers, queue it if acceptable. Returns 0 if + * acceptable, else non-zero. + */ +static int l2tp_recv_data_seq(struct l2tp_session *session, struct sk_buff *skb) +{ + if (!l2tp_seq_check_rx_window(session, L2TP_SKB_CB(skb)->ns)) { + /* Packet sequence number is outside allowed window. + * Discard it. + */ + l2tp_dbg(session, L2TP_MSG_SEQ, + "%s: pkt %u len %d discarded, outside window, nr=%u\n", + session->name, L2TP_SKB_CB(skb)->ns, + L2TP_SKB_CB(skb)->length, session->nr); + goto discard; + } + + if (session->reorder_timeout != 0) { + /* Packet reordering enabled. Add skb to session's + * reorder queue, in order of ns. + */ + l2tp_recv_queue_skb(session, skb); + goto out; + } + + /* Packet reordering disabled. Discard out-of-sequence packets, while + * tracking the number if in-sequence packets after the first OOS packet + * is seen. After nr_oos_count_max in-sequence packets, reset the + * sequence number to re-enable packet reception. + */ + if (L2TP_SKB_CB(skb)->ns == session->nr) { + skb_queue_tail(&session->reorder_q, skb); + } else { + u32 nr_oos = L2TP_SKB_CB(skb)->ns; + u32 nr_next = (session->nr_oos + 1) & session->nr_max; + + if (nr_oos == nr_next) + session->nr_oos_count++; + else + session->nr_oos_count = 0; + + session->nr_oos = nr_oos; + if (session->nr_oos_count > session->nr_oos_count_max) { + session->reorder_skip = 1; + l2tp_dbg(session, L2TP_MSG_SEQ, + "%s: %d oos packets received. Resetting sequence numbers\n", + session->name, session->nr_oos_count); + } + if (!session->reorder_skip) { + atomic_long_inc(&session->stats.rx_seq_discards); + l2tp_dbg(session, L2TP_MSG_SEQ, + "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n", + session->name, L2TP_SKB_CB(skb)->ns, + L2TP_SKB_CB(skb)->length, session->nr, + skb_queue_len(&session->reorder_q)); + goto discard; + } + skb_queue_tail(&session->reorder_q, skb); + } + +out: + return 0; + +discard: + return 1; +} + /* Do receive processing of L2TP data frames. We handle both L2TPv2 * and L2TPv3 data frames here. * @@ -757,26 +832,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, * enabled. Saved L2TP protocol info is stored in skb->sb[]. */ if (L2TP_SKB_CB(skb)->has_seq) { - if (session->reorder_timeout != 0) { - /* Packet reordering enabled. Add skb to session's - * reorder queue, in order of ns. - */ - l2tp_recv_queue_skb(session, skb); - } else { - /* Packet reordering disabled. Discard out-of-sequence - * packets - */ - if (L2TP_SKB_CB(skb)->ns != session->nr) { - atomic_long_inc(&session->stats.rx_seq_discards); - l2tp_dbg(session, L2TP_MSG_SEQ, - "%s: oos pkt %u len %d discarded, waiting for %u, reorder_q_len=%d\n", - session->name, L2TP_SKB_CB(skb)->ns, - L2TP_SKB_CB(skb)->length, session->nr, - skb_queue_len(&session->reorder_q)); - goto discard; - } - skb_queue_tail(&session->reorder_q, skb); - } + if (l2tp_recv_data_seq(session, skb)) + goto discard; } else { /* No sequence numbers. Add the skb to the tail of the * reorder queue. This ensures that it will be @@ -1812,6 +1869,15 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn session->session_id = session_id; session->peer_session_id = peer_session_id; session->nr = 0; + if (tunnel->version == L2TP_HDR_VER_2) + session->nr_max = 0xffff; + else + session->nr_max = 0xffffff; + session->nr_window_size = session->nr_max / 2; + session->nr_oos_count_max = 4; + + /* Use NR of first received packet */ + session->reorder_skip = 1; sprintf(&session->name[0], "sess %u/%u", tunnel->tunnel_id, session->session_id); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 485a490fd990..66a559b104b6 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -102,6 +102,11 @@ struct l2tp_session { u32 nr; /* session NR state (receive) */ u32 ns; /* session NR state (send) */ struct sk_buff_head reorder_q; /* receive reorder queue */ + u32 nr_max; /* max NR. Depends on tunnel */ + u32 nr_window_size; /* NR window size */ + u32 nr_oos; /* NR of last OOS packet */ + int nr_oos_count; /* For OOS recovery */ + int nr_oos_count_max; struct hlist_node hlist; /* Hash list node */ atomic_t ref_count; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 8dec6876dc50..5ebee2ded9e9 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1793,7 +1793,8 @@ static const struct proto_ops pppol2tp_ops = { static const struct pppox_proto pppol2tp_proto = { .create = pppol2tp_create, - .ioctl = pppol2tp_ioctl + .ioctl = pppol2tp_ioctl, + .owner = THIS_MODULE, }; #ifdef CONFIG_L2TP_V3 diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index 0785e95c9924..be7614b9ed27 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -85,7 +85,7 @@ void ieee80211_aes_ccm_encrypt(struct crypto_cipher *tfm, u8 *scratch, *cpos++ = *pos++ ^ e[i]; } - for (i = 0; i < CCMP_MIC_LEN; i++) + for (i = 0; i < IEEE80211_CCMP_MIC_LEN; i++) mic[i] = b[i] ^ s_0[i]; } @@ -123,7 +123,7 @@ int ieee80211_aes_ccm_decrypt(struct crypto_cipher *tfm, u8 *scratch, crypto_cipher_encrypt_one(tfm, a, a); } - for (i = 0; i < CCMP_MIC_LEN; i++) { + for (i = 0; i < IEEE80211_CCMP_MIC_LEN; i++) { if ((mic[i] ^ s_0[i]) != a[i]) return -1; } @@ -138,7 +138,7 @@ struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[]) tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); if (!IS_ERR(tfm)) - crypto_cipher_setkey(tfm, key, ALG_CCMP_KEY_LEN); + crypto_cipher_setkey(tfm, key, WLAN_KEY_LEN_CCMP); return tfm; } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4fdb306e42e0..43dd7525bfcb 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -73,16 +73,19 @@ static int ieee80211_change_iface(struct wiphy *wiphy, struct ieee80211_local *local = sdata->local; if (ieee80211_sdata_running(sdata)) { + u32 mask = MONITOR_FLAG_COOK_FRAMES | + MONITOR_FLAG_ACTIVE; + /* - * Prohibit MONITOR_FLAG_COOK_FRAMES to be - * changed while the interface is up. + * Prohibit MONITOR_FLAG_COOK_FRAMES and + * MONITOR_FLAG_ACTIVE to be changed while the + * interface is up. * Else we would need to add a lot of cruft * to update everything: * cooked_mntrs, monitor and all fif_* counters * reconfigure hardware */ - if ((*flags & MONITOR_FLAG_COOK_FRAMES) != - (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES)) + if ((*flags & mask) != (sdata->u.mntr_flags & mask)) return -EBUSY; ieee80211_adjust_monitor_flags(sdata, -1); @@ -444,7 +447,7 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) struct ieee80211_local *local = sdata->local; struct timespec uptime; u64 packets = 0; - int ac; + int i, ac; sinfo->generation = sdata->local->sta_generation; @@ -488,6 +491,17 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->signal = (s8)sta->last_signal; sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal); } + if (sta->chains) { + sinfo->filled |= STATION_INFO_CHAIN_SIGNAL | + STATION_INFO_CHAIN_SIGNAL_AVG; + + sinfo->chains = sta->chains; + for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) { + sinfo->chain_signal[i] = sta->chain_signal_last[i]; + sinfo->chain_signal_avg[i] = + (s8) -ewma_read(&sta->chain_signal_avg[i]); + } + } sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate); sta_set_rate_info_rx(sta, &sinfo->rxrate); @@ -652,6 +666,8 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy, if (sta->sdata->dev != dev) continue; + sinfo.filled = 0; + sta_set_sinfo(sta, &sinfo); i = 0; ADD_STA_STATS(sta); } @@ -728,7 +744,7 @@ static void ieee80211_get_et_strings(struct wiphy *wiphy, if (sset == ETH_SS_STATS) { sz_sta_stats = sizeof(ieee80211_gstrings_sta_stats); - memcpy(data, *ieee80211_gstrings_sta_stats, sz_sta_stats); + memcpy(data, ieee80211_gstrings_sta_stats, sz_sta_stats); } drv_get_et_strings(sdata, sset, &(data[sz_sta_stats])); } @@ -1741,6 +1757,7 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, ifmsh->mesh_pp_id = setup->path_sel_proto; ifmsh->mesh_pm_id = setup->path_metric; ifmsh->user_mpm = setup->user_mpm; + ifmsh->mesh_auth_id = setup->auth_id; ifmsh->security = IEEE80211_MESH_SEC_NONE; if (setup->is_authenticated) ifmsh->security |= IEEE80211_MESH_SEC_AUTHED; @@ -1750,6 +1767,7 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, /* mcast rate setting in Mesh Node */ memcpy(sdata->vif.bss_conf.mcast_rate, setup->mcast_rate, sizeof(setup->mcast_rate)); + sdata->vif.bss_conf.basic_rates = setup->basic_rates; sdata->vif.bss_conf.beacon_int = setup->beacon_interval; sdata->vif.bss_conf.dtim_period = setup->dtim_period; @@ -1862,6 +1880,8 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy, if (_chg_mesh_attr(NL80211_MESHCONF_AWAKE_WINDOW, mask)) conf->dot11MeshAwakeWindowDuration = nconf->dot11MeshAwakeWindowDuration; + if (_chg_mesh_attr(NL80211_MESHCONF_PLINK_TIMEOUT, mask)) + conf->plink_timeout = nconf->plink_timeout; ieee80211_mbss_info_change_notify(sdata, BSS_CHANGED_BEACON); return 0; } @@ -2312,7 +2332,7 @@ int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode old_req; int err; - lockdep_assert_held(&sdata->u.mgd.mtx); + lockdep_assert_held(&sdata->wdev.mtx); old_req = sdata->u.mgd.req_smps; sdata->u.mgd.req_smps = smps_mode; @@ -2369,9 +2389,9 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, local->dynamic_ps_forced_timeout = timeout; /* no change, but if automatic follow powersave */ - mutex_lock(&sdata->u.mgd.mtx); + sdata_lock(sdata); __ieee80211_request_smps(sdata, sdata->u.mgd.req_smps); - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); @@ -2809,7 +2829,8 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, !rcu_access_pointer(sdata->bss->beacon)) need_offchan = true; if (!ieee80211_is_action(mgmt->frame_control) || - mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) + mgmt->u.action.category == WLAN_CATEGORY_PUBLIC || + mgmt->u.action.category == WLAN_CATEGORY_SELF_PROTECTED) break; rcu_read_lock(); sta = sta_info_get(sdata, mgmt->da); @@ -2829,6 +2850,12 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, return -EOPNOTSUPP; } + /* configurations requiring offchan cannot work if no channel has been + * specified + */ + if (need_offchan && !chan) + return -EINVAL; + mutex_lock(&local->mtx); /* Check if the operating channel is the requested channel */ @@ -2838,10 +2865,15 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (chanctx_conf) - need_offchan = chan != chanctx_conf->def.chan; - else + if (chanctx_conf) { + need_offchan = chan && (chan != chanctx_conf->def.chan); + } else if (!chan) { + ret = -EINVAL; + rcu_read_unlock(); + goto out_unlock; + } else { need_offchan = true; + } rcu_read_unlock(); } @@ -2901,19 +2933,8 @@ static void ieee80211_mgmt_frame_register(struct wiphy *wiphy, u16 frame_type, bool reg) { struct ieee80211_local *local = wiphy_priv(wiphy); - struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); switch (frame_type) { - case IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_AUTH: - if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { - struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - - if (reg) - ifibss->auth_frame_registrations++; - else - ifibss->auth_frame_registrations--; - } - break; case IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ: if (reg) local->probe_req_reg++; diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 14abcf44f974..cafe614ef93d 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -228,9 +228,9 @@ static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata, if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; - mutex_lock(&sdata->u.mgd.mtx); + sdata_lock(sdata); err = __ieee80211_request_smps(sdata, smps_mode); - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); return err; } @@ -313,16 +313,16 @@ static ssize_t ieee80211_if_parse_tkip_mic_test( case NL80211_IFTYPE_STATION: fc |= cpu_to_le16(IEEE80211_FCTL_TODS); /* BSSID SA DA */ - mutex_lock(&sdata->u.mgd.mtx); + sdata_lock(sdata); if (!sdata->u.mgd.associated) { - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); dev_kfree_skb(skb); return -ENOTCONN; } memcpy(hdr->addr1, sdata->u.mgd.associated->bssid, ETH_ALEN); memcpy(hdr->addr2, sdata->vif.addr, ETH_ALEN); memcpy(hdr->addr3, addr, ETH_ALEN); - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); break; default: dev_kfree_skb(skb); @@ -471,6 +471,8 @@ __IEEE80211_IF_FILE_W(tsf); IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC); #ifdef CONFIG_MAC80211_MESH +IEEE80211_IF_FILE(estab_plinks, u.mesh.estab_plinks, ATOMIC); + /* Mesh stats attributes */ IEEE80211_IF_FILE(fwded_mcast, u.mesh.mshstats.fwded_mcast, DEC); IEEE80211_IF_FILE(fwded_unicast, u.mesh.mshstats.fwded_unicast, DEC); @@ -480,7 +482,6 @@ IEEE80211_IF_FILE(dropped_frames_congestion, u.mesh.mshstats.dropped_frames_congestion, DEC); IEEE80211_IF_FILE(dropped_frames_no_route, u.mesh.mshstats.dropped_frames_no_route, DEC); -IEEE80211_IF_FILE(estab_plinks, u.mesh.estab_plinks, ATOMIC); /* Mesh parameters */ IEEE80211_IF_FILE(dot11MeshMaxRetries, @@ -583,6 +584,7 @@ static void add_wds_files(struct ieee80211_sub_if_data *sdata) static void add_mesh_files(struct ieee80211_sub_if_data *sdata) { DEBUGFS_ADD_MODE(tsf, 0600); + DEBUGFS_ADD_MODE(estab_plinks, 0400); } static void add_mesh_stats(struct ieee80211_sub_if_data *sdata) @@ -598,7 +600,6 @@ static void add_mesh_stats(struct ieee80211_sub_if_data *sdata) MESHSTATS_ADD(dropped_frames_ttl); MESHSTATS_ADD(dropped_frames_no_route); MESHSTATS_ADD(dropped_frames_congestion); - MESHSTATS_ADD(estab_plinks); #undef MESHSTATS_ADD } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 169664c122e2..b931c96a596f 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -146,7 +146,8 @@ static inline int drv_add_interface(struct ieee80211_local *local, if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN || (sdata->vif.type == NL80211_IFTYPE_MONITOR && - !(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF)))) + !(local->hw.flags & IEEE80211_HW_WANT_MONITOR_VIF) && + !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)))) return -EINVAL; trace_drv_add_interface(local, sdata); diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index af8cee06e4f3..f83534f6a2ee 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -281,13 +281,14 @@ void ieee80211_ba_session_work(struct work_struct *work) sta, tid, WLAN_BACK_RECIPIENT, WLAN_REASON_UNSPECIFIED, true); + spin_lock_bh(&sta->lock); + tid_tx = sta->ampdu_mlme.tid_start_tx[tid]; if (tid_tx) { /* * Assign it over to the normal tid_tx array * where it "goes live". */ - spin_lock_bh(&sta->lock); sta->ampdu_mlme.tid_start_tx[tid] = NULL; /* could there be a race? */ @@ -300,6 +301,7 @@ void ieee80211_ba_session_work(struct work_struct *work) ieee80211_tx_ba_session_handle_start(sta, tid); continue; } + spin_unlock_bh(&sta->lock); tid_tx = rcu_dereference_protected_tid_tx(sta, tid); if (tid_tx && test_and_clear_bit(HT_AGG_STATE_WANT_STOP, @@ -429,9 +431,9 @@ void ieee80211_request_smps_work(struct work_struct *work) container_of(work, struct ieee80211_sub_if_data, u.mgd.request_smps_work); - mutex_lock(&sdata->u.mgd.mtx); + sdata_lock(sdata); __ieee80211_request_smps(sdata, sdata->u.mgd.driver_smps_mode); - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); } void ieee80211_request_smps(struct ieee80211_vif *vif, diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 170f9a7fa319..ea7b9c2c7e66 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -54,7 +54,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct beacon_data *presp; int frame_len; - lockdep_assert_held(&ifibss->mtx); + sdata_assert_lock(sdata); /* Reset own TSF to allow time synchronization work. */ drv_reset_tsf(local, sdata); @@ -74,14 +74,14 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, } presp = rcu_dereference_protected(ifibss->presp, - lockdep_is_held(&ifibss->mtx)); + lockdep_is_held(&sdata->wdev.mtx)); rcu_assign_pointer(ifibss->presp, NULL); if (presp) kfree_rcu(presp, rcu_head); sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0; - cfg80211_chandef_create(&chandef, chan, ifibss->channel_type); + chandef = ifibss->chandef; if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) { chandef.width = NL80211_CHAN_WIDTH_20; chandef.center_freq1 = chan->center_freq; @@ -176,6 +176,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, /* add HT capability and information IEs */ if (chandef.width != NL80211_CHAN_WIDTH_20_NOHT && + chandef.width != NL80211_CHAN_WIDTH_5 && + chandef.width != NL80211_CHAN_WIDTH_10 && sband->ht_cap.ht_supported) { pos = ieee80211_ie_build_ht_cap(pos, &sband->ht_cap, sband->ht_cap.cap); @@ -263,7 +265,7 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, const struct cfg80211_bss_ies *ies; u64 tsf; - lockdep_assert_held(&sdata->u.ibss.mtx); + sdata_assert_lock(sdata); if (beacon_int < 10) beacon_int = 10; @@ -298,8 +300,7 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, tsf, false); } -static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta, - bool auth) +static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta) __acquires(RCU) { struct ieee80211_sub_if_data *sdata = sta->sdata; @@ -321,26 +322,19 @@ static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta, /* If it fails, maybe we raced another insertion? */ if (sta_info_insert_rcu(sta)) return sta_info_get(sdata, addr); - if (auth && !sdata->u.ibss.auth_frame_registrations) { - ibss_dbg(sdata, - "TX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=1)\n", - sdata->vif.addr, addr, sdata->u.ibss.bssid); - ieee80211_send_auth(sdata, 1, WLAN_AUTH_OPEN, 0, NULL, 0, - addr, sdata->u.ibss.bssid, NULL, 0, 0, 0); - } return sta; } static struct sta_info * -ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, - const u8 *bssid, const u8 *addr, - u32 supp_rates, bool auth) +ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid, + const u8 *addr, u32 supp_rates) __acquires(RCU) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; + struct ieee80211_supported_band *sband; int band; /* @@ -380,10 +374,11 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, sta->last_rx = jiffies; /* make sure mandatory rates are always added */ + sband = local->hw.wiphy->bands[band]; sta->sta.supp_rates[band] = supp_rates | - ieee80211_mandatory_rates(local, band); + ieee80211_mandatory_rates(sband); - return ieee80211_ibss_finish_sta(sta, auth); + return ieee80211_ibss_finish_sta(sta); } static void ieee80211_rx_mgmt_deauth_ibss(struct ieee80211_sub_if_data *sdata, @@ -405,10 +400,8 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, size_t len) { u16 auth_alg, auth_transaction; - struct sta_info *sta; - u8 deauth_frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; - lockdep_assert_held(&sdata->u.ibss.mtx); + sdata_assert_lock(sdata); if (len < 24 + 6) return; @@ -423,22 +416,6 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) return; - sta_info_destroy_addr(sdata, mgmt->sa); - sta = ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, 0, false); - rcu_read_unlock(); - - /* - * if we have any problem in allocating the new station, we reply with a - * DEAUTH frame to tell the other end that we had a problem - */ - if (!sta) { - ieee80211_send_deauth_disassoc(sdata, sdata->u.ibss.bssid, - IEEE80211_STYPE_DEAUTH, - WLAN_REASON_UNSPECIFIED, true, - deauth_frame_buf); - return; - } - /* * IEEE 802.11 standard does not require authentication in IBSS * networks and most implementations do not seem to use it. @@ -492,7 +469,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, prev_rates = sta->sta.supp_rates[band]; /* make sure mandatory rates are always added */ sta->sta.supp_rates[band] = supp_rates | - ieee80211_mandatory_rates(local, band); + ieee80211_mandatory_rates(sband); if (sta->sta.supp_rates[band] != prev_rates) { ibss_dbg(sdata, @@ -504,7 +481,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, } else { rcu_read_unlock(); sta = ieee80211_ibss_add_sta(sdata, mgmt->bssid, - mgmt->sa, supp_rates, true); + mgmt->sa, supp_rates); } } @@ -512,7 +489,9 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, set_sta_flag(sta, WLAN_STA_WME); if (sta && elems->ht_operation && elems->ht_cap_elem && - sdata->u.ibss.channel_type != NL80211_CHAN_NO_HT) { + sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT && + sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_5 && + sdata->u.ibss.chandef.width != NL80211_CHAN_WIDTH_10) { /* we both use HT */ struct ieee80211_ht_cap htcap_ie; struct cfg80211_chan_def chandef; @@ -527,8 +506,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, * fall back to HT20 if we don't use or use * the other extension channel */ - if (cfg80211_get_chandef_type(&chandef) != - sdata->u.ibss.channel_type) + if (chandef.center_freq1 != + sdata->u.ibss.chandef.center_freq1) htcap_ie.cap_info &= cpu_to_le16(~IEEE80211_HT_CAP_SUP_WIDTH_20_40); @@ -567,7 +546,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, /* different channel */ if (sdata->u.ibss.fixed_channel && - sdata->u.ibss.channel != cbss->channel) + sdata->u.ibss.chandef.chan != cbss->channel) goto put_bss; /* different SSID */ @@ -608,7 +587,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, ieee80211_sta_join_ibss(sdata, bss); supp_rates = ieee80211_sta_get_rates(local, elems, band, NULL); ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, - supp_rates, true); + supp_rates); rcu_read_unlock(); } @@ -624,6 +603,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; + struct ieee80211_supported_band *sband; int band; /* @@ -658,8 +638,9 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, sta->last_rx = jiffies; /* make sure mandatory rates are always added */ + sband = local->hw.wiphy->bands[band]; sta->sta.supp_rates[band] = supp_rates | - ieee80211_mandatory_rates(local, band); + ieee80211_mandatory_rates(sband); spin_lock(&ifibss->incomplete_lock); list_add(&sta->list, &ifibss->incomplete_stations); @@ -673,7 +654,7 @@ static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) int active = 0; struct sta_info *sta; - lockdep_assert_held(&sdata->u.ibss.mtx); + sdata_assert_lock(sdata); rcu_read_lock(); @@ -699,7 +680,7 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - lockdep_assert_held(&ifibss->mtx); + sdata_assert_lock(sdata); mod_timer(&ifibss->timer, round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL)); @@ -730,7 +711,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) u16 capability; int i; - lockdep_assert_held(&ifibss->mtx); + sdata_assert_lock(sdata); if (ifibss->fixed_bssid) { memcpy(bssid, ifibss->bssid, ETH_ALEN); @@ -755,7 +736,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) sdata->drop_unencrypted = 0; __ieee80211_sta_join_ibss(sdata, bssid, sdata->vif.bss_conf.beacon_int, - ifibss->channel, ifibss->basic_rates, + ifibss->chandef.chan, ifibss->basic_rates, capability, 0, true); } @@ -773,7 +754,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) int active_ibss; u16 capability; - lockdep_assert_held(&ifibss->mtx); + sdata_assert_lock(sdata); active_ibss = ieee80211_sta_active_ibss(sdata); ibss_dbg(sdata, "sta_find_ibss (active_ibss=%d)\n", active_ibss); @@ -787,7 +768,7 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) if (ifibss->fixed_bssid) bssid = ifibss->bssid; if (ifibss->fixed_channel) - chan = ifibss->channel; + chan = ifibss->chandef.chan; if (!is_zero_ether_addr(ifibss->bssid)) bssid = ifibss->bssid; cbss = cfg80211_get_bss(local->hw.wiphy, chan, bssid, @@ -843,10 +824,10 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, struct beacon_data *presp; u8 *pos, *end; - lockdep_assert_held(&ifibss->mtx); + sdata_assert_lock(sdata); presp = rcu_dereference_protected(ifibss->presp, - lockdep_is_held(&ifibss->mtx)); + lockdep_is_held(&sdata->wdev.mtx)); if (ifibss->state != IEEE80211_IBSS_MLME_JOINED || len < 24 + 2 || !presp) @@ -930,7 +911,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mgmt = (struct ieee80211_mgmt *) skb->data; fc = le16_to_cpu(mgmt->frame_control); - mutex_lock(&sdata->u.ibss.mtx); + sdata_lock(sdata); if (!sdata->u.ibss.ssid_len) goto mgmt_out; /* not ready to merge yet */ @@ -953,7 +934,7 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, } mgmt_out: - mutex_unlock(&sdata->u.ibss.mtx); + sdata_unlock(sdata); } void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) @@ -961,7 +942,7 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct sta_info *sta; - mutex_lock(&ifibss->mtx); + sdata_lock(sdata); /* * Work could be scheduled after scan or similar @@ -978,7 +959,7 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) list_del(&sta->list); spin_unlock_bh(&ifibss->incomplete_lock); - ieee80211_ibss_finish_sta(sta, true); + ieee80211_ibss_finish_sta(sta); rcu_read_unlock(); spin_lock_bh(&ifibss->incomplete_lock); } @@ -997,7 +978,7 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata) } out: - mutex_unlock(&ifibss->mtx); + sdata_unlock(sdata); } static void ieee80211_ibss_timer(unsigned long data) @@ -1014,7 +995,6 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata) setup_timer(&ifibss->timer, ieee80211_ibss_timer, (unsigned long) sdata); - mutex_init(&ifibss->mtx); INIT_LIST_HEAD(&ifibss->incomplete_stations); spin_lock_init(&ifibss->incomplete_lock); } @@ -1041,8 +1021,6 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, { u32 changed = 0; - mutex_lock(&sdata->u.ibss.mtx); - if (params->bssid) { memcpy(sdata->u.ibss.bssid, params->bssid, ETH_ALEN); sdata->u.ibss.fixed_bssid = true; @@ -1057,9 +1035,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.beacon_int = params->beacon_interval; - sdata->u.ibss.channel = params->chandef.chan; - sdata->u.ibss.channel_type = - cfg80211_get_chandef_type(¶ms->chandef); + sdata->u.ibss.chandef = params->chandef; sdata->u.ibss.fixed_channel = params->channel_fixed; if (params->ie) { @@ -1075,8 +1051,6 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, memcpy(sdata->u.ibss.ssid, params->ssid, params->ssid_len); sdata->u.ibss.ssid_len = params->ssid_len; - mutex_unlock(&sdata->u.ibss.mtx); - /* * 802.11n-2009 9.13.3.1: In an IBSS, the HT Protection field is * reserved, but an HT STA shall protect HT transmissions as though @@ -1112,8 +1086,6 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) struct sta_info *sta; struct beacon_data *presp; - mutex_lock(&sdata->u.ibss.mtx); - active_ibss = ieee80211_sta_active_ibss(sdata); if (!active_ibss && !is_zero_ether_addr(ifibss->bssid)) { @@ -1122,7 +1094,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) if (ifibss->privacy) capability |= WLAN_CAPABILITY_PRIVACY; - cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->channel, + cbss = cfg80211_get_bss(local->hw.wiphy, ifibss->chandef.chan, ifibss->bssid, ifibss->ssid, ifibss->ssid_len, WLAN_CAPABILITY_IBSS | WLAN_CAPABILITY_PRIVACY, @@ -1157,7 +1129,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) /* remove beacon */ kfree(sdata->u.ibss.ie); presp = rcu_dereference_protected(ifibss->presp, - lockdep_is_held(&sdata->u.ibss.mtx)); + lockdep_is_held(&sdata->wdev.mtx)); RCU_INIT_POINTER(sdata->u.ibss.presp, NULL); sdata->vif.bss_conf.ibss_joined = false; sdata->vif.bss_conf.ibss_creator = false; @@ -1173,7 +1145,5 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) del_timer_sync(&sdata->u.ibss.timer); - mutex_unlock(&sdata->u.ibss.mtx); - return 0; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 9ca8e3278cc0..8412a303993a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -94,6 +94,7 @@ struct ieee80211_bss { #define IEEE80211_MAX_SUPP_RATES 32 u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; size_t supp_rates_len; + struct ieee80211_rate *beacon_rate; /* * During association, we save an ERP value from a probe response so @@ -366,7 +367,7 @@ struct ieee80211_mgd_assoc_data { u8 ssid_len; u8 supp_rates_len; bool wmm, uapsd; - bool have_beacon, need_beacon; + bool need_beacon; bool synced; bool timeout_started; @@ -394,7 +395,6 @@ struct ieee80211_if_managed { bool nullfunc_failed; bool connection_loss; - struct mutex mtx; struct cfg80211_bss *associated; struct ieee80211_mgd_auth_data *auth_data; struct ieee80211_mgd_assoc_data *assoc_data; @@ -405,6 +405,7 @@ struct ieee80211_if_managed { bool powersave; /* powersave requested for this iface */ bool broken_ap; /* AP is broken -- turn off powersave */ + bool have_beacon; u8 dtim_period; enum ieee80211_smps_mode req_smps, /* requested smps mode */ driver_smps_mode; /* smps mode request */ @@ -488,8 +489,6 @@ struct ieee80211_if_managed { struct ieee80211_if_ibss { struct timer_list timer; - struct mutex mtx; - unsigned long last_scan_completed; u32 basic_rates; @@ -499,14 +498,12 @@ struct ieee80211_if_ibss { bool privacy; bool control_port; - unsigned int auth_frame_registrations; u8 bssid[ETH_ALEN] __aligned(2); u8 ssid[IEEE80211_MAX_SSID_LEN]; u8 ssid_len, ie_len; u8 *ie; - struct ieee80211_channel *channel; - enum nl80211_channel_type channel_type; + struct cfg80211_chan_def chandef; unsigned long ibss_join_req; /* probe response/beacon for IBSS */ @@ -545,6 +542,7 @@ struct ieee80211_if_mesh { struct timer_list mesh_path_root_timer; unsigned long wrkq_flags; + unsigned long mbss_changed; u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN]; size_t mesh_id_len; @@ -580,8 +578,6 @@ struct ieee80211_if_mesh { bool accepting_plinks; int num_gates; struct beacon_data __rcu *beacon; - /* just protects beacon updates for now */ - struct mutex mtx; const u8 *ie; u8 ie_len; enum { @@ -778,6 +774,26 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p) return container_of(p, struct ieee80211_sub_if_data, vif); } +static inline void sdata_lock(struct ieee80211_sub_if_data *sdata) + __acquires(&sdata->wdev.mtx) +{ + mutex_lock(&sdata->wdev.mtx); + __acquire(&sdata->wdev.mtx); +} + +static inline void sdata_unlock(struct ieee80211_sub_if_data *sdata) + __releases(&sdata->wdev.mtx) +{ + mutex_unlock(&sdata->wdev.mtx); + __release(&sdata->wdev.mtx); +} + +static inline void +sdata_assert_lock(struct ieee80211_sub_if_data *sdata) +{ + lockdep_assert_held(&sdata->wdev.mtx); +} + static inline enum ieee80211_band ieee80211_get_sdata_band(struct ieee80211_sub_if_data *sdata) { @@ -1507,9 +1523,6 @@ static inline void ieee802_11_parse_elems(const u8 *start, size_t len, ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0); } -u32 ieee80211_mandatory_rates(struct ieee80211_local *local, - enum ieee80211_band band); - void ieee80211_dynamic_ps_enable_work(struct work_struct *work); void ieee80211_dynamic_ps_disable_work(struct work_struct *work); void ieee80211_dynamic_ps_timer(unsigned long data); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 98d20c0f6fed..cc117591f678 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -159,7 +159,8 @@ static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) return 0; } -static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr) +static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr, + bool check_dup) { struct ieee80211_local *local = sdata->local; struct ieee80211_sub_if_data *iter; @@ -180,13 +181,16 @@ static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr) ((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) | ((u64)m[4] << 1*8) | ((u64)m[5] << 0*8); + if (!check_dup) + return ret; mutex_lock(&local->iflist_mtx); list_for_each_entry(iter, &local->interfaces, list) { if (iter == sdata) continue; - if (iter->vif.type == NL80211_IFTYPE_MONITOR) + if (iter->vif.type == NL80211_IFTYPE_MONITOR && + !(iter->u.mntr_flags & MONITOR_FLAG_ACTIVE)) continue; m = iter->vif.addr; @@ -208,12 +212,17 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sockaddr *sa = addr; + bool check_dup = true; int ret; if (ieee80211_sdata_running(sdata)) return -EBUSY; - ret = ieee80211_verify_mac(sdata, sa->sa_data); + if (sdata->vif.type == NL80211_IFTYPE_MONITOR && + !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + check_dup = false; + + ret = ieee80211_verify_mac(sdata, sa->sa_data, check_dup); if (ret) return ret; @@ -545,7 +554,11 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) break; } - if (local->monitors == 0 && local->open_count == 0) { + if (sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE) { + res = drv_add_interface(local, sdata); + if (res) + goto err_stop; + } else if (local->monitors == 0 && local->open_count == 0) { res = ieee80211_add_virtual_monitor(local); if (res) goto err_stop; @@ -923,7 +936,11 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, mutex_lock(&local->mtx); ieee80211_recalc_idle(local); mutex_unlock(&local->mtx); - break; + + if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + break; + + /* fall through */ default: if (going_down) drv_remove_interface(local, sdata); @@ -1072,7 +1089,7 @@ static const struct net_device_ops ieee80211_monitorif_ops = { .ndo_start_xmit = ieee80211_monitor_start_xmit, .ndo_set_rx_mode = ieee80211_set_multicast_list, .ndo_change_mtu = ieee80211_change_mtu, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = ieee80211_change_mac, .ndo_select_queue = ieee80211_monitor_select_queue, }; @@ -1747,10 +1764,9 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) } static int netdev_notify(struct notifier_block *nb, - unsigned long state, - void *ndev) + unsigned long state, void *ptr) { - struct net_device *dev = ndev; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct ieee80211_sub_if_data *sdata; if (state != NETDEV_CHANGENAME) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 67059b88fea5..e39cc91d0cf1 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -335,12 +335,12 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, switch (cipher) { case WLAN_CIPHER_SUITE_WEP40: case WLAN_CIPHER_SUITE_WEP104: - key->conf.iv_len = WEP_IV_LEN; - key->conf.icv_len = WEP_ICV_LEN; + key->conf.iv_len = IEEE80211_WEP_IV_LEN; + key->conf.icv_len = IEEE80211_WEP_ICV_LEN; break; case WLAN_CIPHER_SUITE_TKIP: - key->conf.iv_len = TKIP_IV_LEN; - key->conf.icv_len = TKIP_ICV_LEN; + key->conf.iv_len = IEEE80211_TKIP_IV_LEN; + key->conf.icv_len = IEEE80211_TKIP_ICV_LEN; if (seq) { for (i = 0; i < IEEE80211_NUM_TIDS; i++) { key->u.tkip.rx[i].iv32 = @@ -352,13 +352,13 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, spin_lock_init(&key->u.tkip.txlock); break; case WLAN_CIPHER_SUITE_CCMP: - key->conf.iv_len = CCMP_HDR_LEN; - key->conf.icv_len = CCMP_MIC_LEN; + key->conf.iv_len = IEEE80211_CCMP_HDR_LEN; + key->conf.icv_len = IEEE80211_CCMP_MIC_LEN; if (seq) { for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) - for (j = 0; j < CCMP_PN_LEN; j++) + for (j = 0; j < IEEE80211_CCMP_PN_LEN; j++) key->u.ccmp.rx_pn[i][j] = - seq[CCMP_PN_LEN - j - 1]; + seq[IEEE80211_CCMP_PN_LEN - j - 1]; } /* * Initialize AES key state here as an optimization so that @@ -375,9 +375,9 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, key->conf.iv_len = 0; key->conf.icv_len = sizeof(struct ieee80211_mmie); if (seq) - for (j = 0; j < CMAC_PN_LEN; j++) + for (j = 0; j < IEEE80211_CMAC_PN_LEN; j++) key->u.aes_cmac.rx_pn[j] = - seq[CMAC_PN_LEN - j - 1]; + seq[IEEE80211_CMAC_PN_LEN - j - 1]; /* * Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. @@ -740,13 +740,13 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf, pn = key->u.ccmp.rx_pn[IEEE80211_NUM_TIDS]; else pn = key->u.ccmp.rx_pn[tid]; - memcpy(seq->ccmp.pn, pn, CCMP_PN_LEN); + memcpy(seq->ccmp.pn, pn, IEEE80211_CCMP_PN_LEN); break; case WLAN_CIPHER_SUITE_AES_CMAC: if (WARN_ON(tid != 0)) return; pn = key->u.aes_cmac.rx_pn; - memcpy(seq->aes_cmac.pn, pn, CMAC_PN_LEN); + memcpy(seq->aes_cmac.pn, pn, IEEE80211_CMAC_PN_LEN); break; } } diff --git a/net/mac80211/key.h b/net/mac80211/key.h index e8de3e6d7804..036d57e76a5e 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -19,17 +19,6 @@ #define NUM_DEFAULT_KEYS 4 #define NUM_DEFAULT_MGMT_KEYS 2 -#define WEP_IV_LEN 4 -#define WEP_ICV_LEN 4 -#define ALG_CCMP_KEY_LEN 16 -#define CCMP_HDR_LEN 8 -#define CCMP_MIC_LEN 8 -#define CCMP_TK_LEN 16 -#define CCMP_PN_LEN 6 -#define TKIP_IV_LEN 8 -#define TKIP_ICV_LEN 4 -#define CMAC_PN_LEN 6 - struct ieee80211_local; struct ieee80211_sub_if_data; struct sta_info; @@ -93,13 +82,13 @@ struct ieee80211_key { * frames and the last counter is used with Robust * Management frames. */ - u8 rx_pn[IEEE80211_NUM_TIDS + 1][CCMP_PN_LEN]; + u8 rx_pn[IEEE80211_NUM_TIDS + 1][IEEE80211_CCMP_PN_LEN]; struct crypto_cipher *tfm; u32 replays; /* dot11RSNAStatsCCMPReplays */ } ccmp; struct { atomic64_t tx_pn; - u8 rx_pn[CMAC_PN_LEN]; + u8 rx_pn[IEEE80211_CMAC_PN_LEN]; struct crypto_cipher *tfm; u32 replays; /* dot11RSNAStatsCMACReplays */ u32 icverrors; /* dot11RSNAStatsCMACICVErrors */ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 8eae74ac4e1e..091088ac7890 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -331,7 +331,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, return NOTIFY_DONE; ifmgd = &sdata->u.mgd; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); /* Copy the addresses to the bss_conf list */ ifa = idev->ifa_list; @@ -349,7 +349,7 @@ static int ieee80211_ifa_changed(struct notifier_block *nb, ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_ARP_FILTER); - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); return NOTIFY_DONE; } @@ -686,8 +686,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) return -EINVAL; #ifdef CONFIG_PM - if ((hw->wiphy->wowlan.flags || hw->wiphy->wowlan.n_patterns) && - (!local->ops->suspend || !local->ops->resume)) + if (hw->wiphy->wowlan && (!local->ops->suspend || !local->ops->resume)) return -EINVAL; #endif diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 6952760881c8..447f41bbe744 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -271,8 +271,7 @@ int mesh_add_meshconf_ie(struct ieee80211_sub_if_data *sdata, *pos++ = ifmsh->mesh_auth_id; /* Mesh Formation Info - number of neighbors */ neighbors = atomic_read(&ifmsh->estab_plinks); - /* Number of neighbor mesh STAs or 15 whichever is smaller */ - neighbors = (neighbors > 15) ? 15 : neighbors; + neighbors = min_t(int, neighbors, IEEE80211_MAX_MESH_PEERINGS); *pos++ = neighbors << 1; /* Mesh capability */ *pos = IEEE80211_MESHCONF_CAPAB_FORWARDING; @@ -417,7 +416,9 @@ int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata, sband = local->hw.wiphy->bands[band]; if (!sband->ht_cap.ht_supported || - sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT) + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10) return 0; if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_cap)) @@ -573,7 +574,7 @@ static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; u32 changed; - ieee80211_sta_expire(sdata, IEEE80211_MESH_PEER_INACTIVITY_LIMIT); + ieee80211_sta_expire(sdata, ifmsh->mshcfg.plink_timeout * HZ); mesh_path_expire(sdata); changed = mesh_accept_plinks_update(sdata); @@ -697,38 +698,38 @@ out_free: } static int -ieee80211_mesh_rebuild_beacon(struct ieee80211_if_mesh *ifmsh) +ieee80211_mesh_rebuild_beacon(struct ieee80211_sub_if_data *sdata) { struct beacon_data *old_bcn; int ret; - mutex_lock(&ifmsh->mtx); - - old_bcn = rcu_dereference_protected(ifmsh->beacon, - lockdep_is_held(&ifmsh->mtx)); - ret = ieee80211_mesh_build_beacon(ifmsh); + old_bcn = rcu_dereference_protected(sdata->u.mesh.beacon, + lockdep_is_held(&sdata->wdev.mtx)); + ret = ieee80211_mesh_build_beacon(&sdata->u.mesh); if (ret) /* just reuse old beacon */ - goto out; + return ret; if (old_bcn) kfree_rcu(old_bcn, rcu_head); -out: - mutex_unlock(&ifmsh->mtx); - return ret; + return 0; } void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata, u32 changed) { - if (sdata->vif.bss_conf.enable_beacon && - (changed & (BSS_CHANGED_BEACON | - BSS_CHANGED_HT | - BSS_CHANGED_BASIC_RATES | - BSS_CHANGED_BEACON_INT))) - if (ieee80211_mesh_rebuild_beacon(&sdata->u.mesh)) - return; - ieee80211_bss_info_change_notify(sdata, changed); + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + unsigned long bits = changed; + u32 bit; + + if (!bits) + return; + + /* if we race with running work, worst case this work becomes a noop */ + for_each_set_bit(bit, &bits, sizeof(changed) * BITS_PER_BYTE) + set_bit(bit, &ifmsh->mbss_changed); + set_bit(MESH_WORK_MBSS_CHANGED, &ifmsh->wrkq_flags); + ieee80211_queue_work(&sdata->local->hw, &sdata->work); } int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) @@ -740,7 +741,6 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) BSS_CHANGED_HT | BSS_CHANGED_BASIC_RATES | BSS_CHANGED_BEACON_INT; - enum ieee80211_band band = ieee80211_get_sdata_band(sdata); local->fif_other_bss++; /* mesh ifaces must set allmulti to forward mcast traffic */ @@ -748,7 +748,6 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) ieee80211_configure_filter(local); ifmsh->mesh_cc_id = 0; /* Disabled */ - ifmsh->mesh_auth_id = 0; /* Disabled */ /* register sync ops from extensible synchronization framework */ ifmsh->sync_ops = ieee80211_mesh_sync_ops_get(ifmsh->mesh_sp_id); ifmsh->adjusting_tbtt = false; @@ -759,8 +758,6 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) sdata->vif.bss_conf.ht_operation_mode = ifmsh->mshcfg.ht_opmode; sdata->vif.bss_conf.enable_beacon = true; - sdata->vif.bss_conf.basic_rates = - ieee80211_mandatory_rates(local, band); changed |= ieee80211_mps_local_status_update(sdata); @@ -788,12 +785,10 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) sdata->vif.bss_conf.enable_beacon = false; clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); - mutex_lock(&ifmsh->mtx); bcn = rcu_dereference_protected(ifmsh->beacon, - lockdep_is_held(&ifmsh->mtx)); + lockdep_is_held(&sdata->wdev.mtx)); rcu_assign_pointer(ifmsh->beacon, NULL); kfree_rcu(bcn, rcu_head); - mutex_unlock(&ifmsh->mtx); /* flush STAs and mpaths on this iface */ sta_info_flush(sdata); @@ -806,14 +801,10 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) del_timer_sync(&sdata->u.mesh.housekeeping_timer); del_timer_sync(&sdata->u.mesh.mesh_path_root_timer); del_timer_sync(&sdata->u.mesh.mesh_path_timer); - /* - * If the timer fired while we waited for it, it will have - * requeued the work. Now the work will be running again - * but will not rearm the timer again because it checks - * whether the interface is running, which, at this point, - * it no longer is. - */ - cancel_work_sync(&sdata->work); + + /* clear any mesh work (for next join) we may have accrued */ + ifmsh->wrkq_flags = 0; + ifmsh->mbss_changed = 0; local->fif_other_bss--; atomic_dec(&local->iff_allmultis); @@ -954,6 +945,12 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; u16 stype; + sdata_lock(sdata); + + /* mesh already went down */ + if (!sdata->wdev.mesh_id_len) + goto out; + rx_status = IEEE80211_SKB_RXCB(skb); mgmt = (struct ieee80211_mgmt *) skb->data; stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE; @@ -971,12 +968,42 @@ void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, ieee80211_mesh_rx_mgmt_action(sdata, mgmt, skb->len, rx_status); break; } +out: + sdata_unlock(sdata); +} + +static void mesh_bss_info_changed(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + u32 bit, changed = 0; + + for_each_set_bit(bit, &ifmsh->mbss_changed, + sizeof(changed) * BITS_PER_BYTE) { + clear_bit(bit, &ifmsh->mbss_changed); + changed |= BIT(bit); + } + + if (sdata->vif.bss_conf.enable_beacon && + (changed & (BSS_CHANGED_BEACON | + BSS_CHANGED_HT | + BSS_CHANGED_BASIC_RATES | + BSS_CHANGED_BEACON_INT))) + if (ieee80211_mesh_rebuild_beacon(sdata)) + return; + + ieee80211_bss_info_change_notify(sdata, changed); } void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + sdata_lock(sdata); + + /* mesh already went down */ + if (!sdata->wdev.mesh_id_len) + goto out; + if (ifmsh->preq_queue_len && time_after(jiffies, ifmsh->last_preq + msecs_to_jiffies(ifmsh->mshcfg.dot11MeshHWMPpreqMinInterval))) @@ -996,6 +1023,11 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata) if (test_and_clear_bit(MESH_WORK_DRIFT_ADJUST, &ifmsh->wrkq_flags)) mesh_sync_adjust_tbtt(sdata); + + if (test_and_clear_bit(MESH_WORK_MBSS_CHANGED, &ifmsh->wrkq_flags)) + mesh_bss_info_changed(sdata); +out: + sdata_unlock(sdata); } void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) @@ -1041,7 +1073,6 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) spin_lock_init(&ifmsh->mesh_preq_queue_lock); spin_lock_init(&ifmsh->sync_offset_lock); RCU_INIT_POINTER(ifmsh->beacon, NULL); - mutex_init(&ifmsh->mtx); sdata->vif.bss_conf.bssid = zero_addr; } diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index da158774eebb..2bc7fd2f787d 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -58,6 +58,7 @@ enum mesh_path_flags { * @MESH_WORK_ROOT: the mesh root station needs to send a frame * @MESH_WORK_DRIFT_ADJUST: time to compensate for clock drift relative to other * mesh nodes + * @MESH_WORK_MBSS_CHANGED: rebuild beacon and notify driver of BSS changes */ enum mesh_deferred_task_flags { MESH_WORK_HOUSEKEEPING, @@ -65,6 +66,7 @@ enum mesh_deferred_task_flags { MESH_WORK_GROW_MPP_TABLE, MESH_WORK_ROOT, MESH_WORK_DRIFT_ADJUST, + MESH_WORK_MBSS_CHANGED, }; /** @@ -188,7 +190,6 @@ struct mesh_rmc { u32 idx_mask; }; -#define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ) #define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ) #define MESH_PATH_EXPIRE (600 * HZ) @@ -324,14 +325,14 @@ static inline u32 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) { atomic_inc(&sdata->u.mesh.estab_plinks); - return mesh_accept_plinks_update(sdata); + return mesh_accept_plinks_update(sdata) | BSS_CHANGED_BEACON; } static inline u32 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) { atomic_dec(&sdata->u.mesh.estab_plinks); - return mesh_accept_plinks_update(sdata); + return mesh_accept_plinks_update(sdata) | BSS_CHANGED_BEACON; } static inline int mesh_plink_free_count(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 09bebed99416..02c05fa15c20 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -154,8 +154,14 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata) u16 ht_opmode; bool non_ht_sta = false, ht20_sta = false; - if (sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT) + switch (sdata->vif.bss_conf.chandef.width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: return 0; + default: + break; + } rcu_read_lock(); list_for_each_entry_rcu(sta, &local->sta_list, list) { diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c index 3b7bfc01ee36..22290a929b94 100644 --- a/net/mac80211/mesh_ps.c +++ b/net/mac80211/mesh_ps.c @@ -229,6 +229,10 @@ void ieee80211_mps_sta_status_update(struct sta_info *sta) enum nl80211_mesh_power_mode pm; bool do_buffer; + /* For non-assoc STA, prevent buffering or frame transmission */ + if (sta->sta_state < IEEE80211_STA_ASSOC) + return; + /* * use peer-specific power mode if peering is established and the * peer's power mode is known diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 741448b30825..ae31968d42d3 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -91,41 +91,6 @@ MODULE_PARM_DESC(probe_wait_ms, #define IEEE80211_SIGNAL_AVE_MIN_COUNT 4 /* - * All cfg80211 functions have to be called outside a locked - * section so that they can acquire a lock themselves... This - * is much simpler than queuing up things in cfg80211, but we - * do need some indirection for that here. - */ -enum rx_mgmt_action { - /* no action required */ - RX_MGMT_NONE, - - /* caller must call cfg80211_send_deauth() */ - RX_MGMT_CFG80211_DEAUTH, - - /* caller must call cfg80211_send_disassoc() */ - RX_MGMT_CFG80211_DISASSOC, - - /* caller must call cfg80211_send_rx_auth() */ - RX_MGMT_CFG80211_RX_AUTH, - - /* caller must call cfg80211_send_rx_assoc() */ - RX_MGMT_CFG80211_RX_ASSOC, - - /* caller must call cfg80211_send_assoc_timeout() */ - RX_MGMT_CFG80211_ASSOC_TIMEOUT, - - /* used when a processed beacon causes a deauth */ - RX_MGMT_CFG80211_TX_DEAUTH, -}; - -/* utils */ -static inline void ASSERT_MGD_MTX(struct ieee80211_if_managed *ifmgd) -{ - lockdep_assert_held(&ifmgd->mtx); -} - -/* * We can have multiple work items (and connection probing) * scheduling this timer, but we need to take care to only * reschedule it when it should fire _earlier_ than it was @@ -135,13 +100,14 @@ static inline void ASSERT_MGD_MTX(struct ieee80211_if_managed *ifmgd) * has happened -- the work that runs from this timer will * do that. */ -static void run_again(struct ieee80211_if_managed *ifmgd, unsigned long timeout) +static void run_again(struct ieee80211_sub_if_data *sdata, + unsigned long timeout) { - ASSERT_MGD_MTX(ifmgd); + sdata_assert_lock(sdata); - if (!timer_pending(&ifmgd->timer) || - time_before(timeout, ifmgd->timer.expires)) - mod_timer(&ifmgd->timer, timeout); + if (!timer_pending(&sdata->u.mgd.timer) || + time_before(timeout, sdata->u.mgd.timer.expires)) + mod_timer(&sdata->u.mgd.timer, timeout); } void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata) @@ -224,6 +190,12 @@ static u32 chandef_downgrade(struct cfg80211_chan_def *c) c->width = NL80211_CHAN_WIDTH_20_NOHT; ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; break; + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: + WARN_ON_ONCE(1); + /* keep c->width */ + ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; + break; } WARN_ON_ONCE(!cfg80211_chandef_valid(c)); @@ -652,7 +624,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) struct ieee80211_channel *chan; u32 rates = 0; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); @@ -914,6 +886,10 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | IEEE80211_TX_INTFL_OFFCHAN_TX_OK; + + if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; + if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL | IEEE80211_STA_CONNECTION_POLL)) IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_CTL_USE_MINRATE; @@ -962,7 +938,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) if (!ieee80211_sdata_running(sdata)) return; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (!ifmgd->associated) goto out; @@ -985,7 +961,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) IEEE80211_QUEUE_STOP_REASON_CSA); out: ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); } void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success) @@ -1036,7 +1012,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, const struct ieee80211_ht_operation *ht_oper; int secondary_channel_offset = -1; - ASSERT_MGD_MTX(ifmgd); + sdata_assert_lock(sdata); if (!cbss) return; @@ -1390,6 +1366,9 @@ static bool ieee80211_powersave_allowed(struct ieee80211_sub_if_data *sdata) IEEE80211_STA_CONNECTION_POLL)) return false; + if (!mgd->have_beacon) + return false; + rcu_read_lock(); sta = sta_info_get(sdata, mgd->bssid); if (sta) @@ -1798,7 +1777,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ieee80211_led_assoc(local, 1); - if (sdata->u.mgd.assoc_data->have_beacon) { + if (sdata->u.mgd.have_beacon) { /* * If the AP is buggy we may get here with no DTIM period * known, so assume it's 1 which is the only safe assumption @@ -1806,8 +1785,10 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, * probably just won't work at all. */ bss_conf->dtim_period = sdata->u.mgd.dtim_period ?: 1; - bss_info_changed |= BSS_CHANGED_DTIM_PERIOD; + bss_conf->beacon_rate = bss->beacon_rate; + bss_info_changed |= BSS_CHANGED_BEACON_INFO; } else { + bss_conf->beacon_rate = NULL; bss_conf->dtim_period = 0; } @@ -1842,7 +1823,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; u32 changed = 0; - ASSERT_MGD_MTX(ifmgd); + sdata_assert_lock(sdata); if (WARN_ON_ONCE(tx && !frame_buf)) return; @@ -1930,6 +1911,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, del_timer_sync(&sdata->u.mgd.chswitch_timer); sdata->vif.bss_conf.dtim_period = 0; + sdata->vif.bss_conf.beacon_rate = NULL; + + ifmgd->have_beacon = false; ifmgd->flags = 0; ieee80211_vif_release_channel(sdata); @@ -2051,7 +2035,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) } ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms); - run_again(ifmgd, ifmgd->probe_timeout); + run_again(sdata, ifmgd->probe_timeout); if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) ieee80211_flush_queues(sdata->local, sdata); } @@ -2065,7 +2049,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, if (!ieee80211_sdata_running(sdata)) return; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (!ifmgd->associated) goto out; @@ -2119,7 +2103,7 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, ifmgd->probe_send_count = 0; ieee80211_mgd_probe_ap_send(sdata); out: - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); } struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, @@ -2135,7 +2119,7 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) return NULL; - ASSERT_MGD_MTX(ifmgd); + sdata_assert_lock(sdata); if (ifmgd->associated) cbss = ifmgd->associated; @@ -2168,9 +2152,9 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (!ifmgd->associated) { - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); return; } @@ -2181,13 +2165,10 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) ieee80211_wake_queues_by_reason(&sdata->local->hw, IEEE80211_MAX_QUEUE_MAP, IEEE80211_QUEUE_STOP_REASON_CSA); - mutex_unlock(&ifmgd->mtx); - /* - * must be outside lock due to cfg80211, - * but that's not a problem. - */ - cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); + cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); + sdata_unlock(sdata); } static void ieee80211_beacon_connection_loss_work(struct work_struct *work) @@ -2254,7 +2235,7 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, { struct ieee80211_mgd_auth_data *auth_data = sdata->u.mgd.auth_data; - lockdep_assert_held(&sdata->u.mgd.mtx); + sdata_assert_lock(sdata); if (!assoc) { sta_info_destroy_addr(sdata, auth_data->bss->bssid); @@ -2295,27 +2276,26 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, auth_data->key_idx, tx_flags); } -static enum rx_mgmt_action __must_check -ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len) +static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 bssid[ETH_ALEN]; u16 auth_alg, auth_transaction, status_code; struct sta_info *sta; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); if (len < 24 + 6) - return RX_MGMT_NONE; + return; if (!ifmgd->auth_data || ifmgd->auth_data->done) - return RX_MGMT_NONE; + return; memcpy(bssid, ifmgd->auth_data->bss->bssid, ETH_ALEN); if (!ether_addr_equal(bssid, mgmt->bssid)) - return RX_MGMT_NONE; + return; auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); @@ -2327,14 +2307,15 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, mgmt->sa, auth_alg, ifmgd->auth_data->algorithm, auth_transaction, ifmgd->auth_data->expected_transaction); - return RX_MGMT_NONE; + return; } if (status_code != WLAN_STATUS_SUCCESS) { sdata_info(sdata, "%pM denied authentication (status %d)\n", mgmt->sa, status_code); ieee80211_destroy_auth_data(sdata, false); - return RX_MGMT_CFG80211_RX_AUTH; + cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); + return; } switch (ifmgd->auth_data->algorithm) { @@ -2347,20 +2328,20 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, if (ifmgd->auth_data->expected_transaction != 4) { ieee80211_auth_challenge(sdata, mgmt, len); /* need another frame */ - return RX_MGMT_NONE; + return; } break; default: WARN_ONCE(1, "invalid auth alg %d", ifmgd->auth_data->algorithm); - return RX_MGMT_NONE; + return; } sdata_info(sdata, "authenticated\n"); ifmgd->auth_data->done = true; ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_WAIT_ASSOC; ifmgd->auth_data->timeout_started = true; - run_again(ifmgd, ifmgd->auth_data->timeout); + run_again(sdata, ifmgd->auth_data->timeout); if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE && ifmgd->auth_data->expected_transaction != 2) { @@ -2368,7 +2349,8 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, * Report auth frame to user space for processing since another * round of Authentication frames is still needed. */ - return RX_MGMT_CFG80211_RX_AUTH; + cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); + return; } /* move station state to auth */ @@ -2384,30 +2366,29 @@ ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, } mutex_unlock(&sdata->local->sta_mtx); - return RX_MGMT_CFG80211_RX_AUTH; + cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); + return; out_err: mutex_unlock(&sdata->local->sta_mtx); /* ignore frame -- wait for timeout */ - return RX_MGMT_NONE; } -static enum rx_mgmt_action __must_check -ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len) +static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; const u8 *bssid = NULL; u16 reason_code; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); if (len < 24 + 2) - return RX_MGMT_NONE; + return; if (!ifmgd->associated || !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) - return RX_MGMT_NONE; + return; bssid = ifmgd->associated->bssid; @@ -2418,25 +2399,24 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, 0, 0, false, NULL); - return RX_MGMT_CFG80211_DEAUTH; + cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); } -static enum rx_mgmt_action __must_check -ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len) +static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u16 reason_code; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); if (len < 24 + 2) - return RX_MGMT_NONE; + return; if (!ifmgd->associated || !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) - return RX_MGMT_NONE; + return; reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); @@ -2445,7 +2425,7 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, 0, 0, false, NULL); - return RX_MGMT_CFG80211_DISASSOC; + cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); } static void ieee80211_get_rates(struct ieee80211_supported_band *sband, @@ -2495,7 +2475,7 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, { struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data; - lockdep_assert_held(&sdata->u.mgd.mtx); + sdata_assert_lock(sdata); if (!assoc) { sta_info_destroy_addr(sdata, assoc_data->bss->bssid); @@ -2749,10 +2729,9 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, return ret; } -static enum rx_mgmt_action __must_check -ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len, - struct cfg80211_bss **bss) +static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + size_t len) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data; @@ -2760,13 +2739,14 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems elems; u8 *pos; bool reassoc; + struct cfg80211_bss *bss; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); if (!assoc_data) - return RX_MGMT_NONE; + return; if (!ether_addr_equal(assoc_data->bss->bssid, mgmt->bssid)) - return RX_MGMT_NONE; + return; /* * AssocResp and ReassocResp have identical structure, so process both @@ -2774,7 +2754,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, */ if (len < 24 + 6) - return RX_MGMT_NONE; + return; reassoc = ieee80211_is_reassoc_req(mgmt->frame_control); capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); @@ -2801,22 +2781,22 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, assoc_data->timeout = jiffies + msecs_to_jiffies(ms); assoc_data->timeout_started = true; if (ms > IEEE80211_ASSOC_TIMEOUT) - run_again(ifmgd, assoc_data->timeout); - return RX_MGMT_NONE; + run_again(sdata, assoc_data->timeout); + return; } - *bss = assoc_data->bss; + bss = assoc_data->bss; if (status_code != WLAN_STATUS_SUCCESS) { sdata_info(sdata, "%pM denied association (code=%d)\n", mgmt->sa, status_code); ieee80211_destroy_assoc_data(sdata, false); } else { - if (!ieee80211_assoc_success(sdata, *bss, mgmt, len)) { + if (!ieee80211_assoc_success(sdata, bss, mgmt, len)) { /* oops -- internal error -- send timeout for now */ ieee80211_destroy_assoc_data(sdata, false); - cfg80211_put_bss(sdata->local->hw.wiphy, *bss); - return RX_MGMT_CFG80211_ASSOC_TIMEOUT; + cfg80211_assoc_timeout(sdata->dev, bss); + return; } sdata_info(sdata, "associated\n"); @@ -2828,7 +2808,7 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, ieee80211_destroy_assoc_data(sdata, true); } - return RX_MGMT_CFG80211_RX_ASSOC; + cfg80211_rx_assoc_resp(sdata->dev, bss, (u8 *)mgmt, len); } static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, @@ -2840,23 +2820,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, int freq; struct ieee80211_bss *bss; struct ieee80211_channel *channel; - bool need_ps = false; - - lockdep_assert_held(&sdata->u.mgd.mtx); - if ((sdata->u.mgd.associated && - ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) || - (sdata->u.mgd.assoc_data && - ether_addr_equal(mgmt->bssid, - sdata->u.mgd.assoc_data->bss->bssid))) { - /* not previously set so we may need to recalc */ - need_ps = sdata->u.mgd.associated && !sdata->u.mgd.dtim_period; - - if (elems->tim && !elems->parse_error) { - const struct ieee80211_tim_ie *tim_ie = elems->tim; - sdata->u.mgd.dtim_period = tim_ie->dtim_period; - } - } + sdata_assert_lock(sdata); if (elems->ds_params) freq = ieee80211_channel_to_frequency(elems->ds_params[0], @@ -2871,19 +2836,15 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, channel); - if (bss) + if (bss) { ieee80211_rx_bss_put(local, bss); + sdata->vif.bss_conf.beacon_rate = bss->beacon_rate; + } if (!sdata->u.mgd.associated || !ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) return; - if (need_ps) { - mutex_lock(&local->iflist_mtx); - ieee80211_recalc_ps(local, -1); - mutex_unlock(&local->iflist_mtx); - } - ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, elems, true); @@ -2901,7 +2862,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, ifmgd = &sdata->u.mgd; - ASSERT_MGD_MTX(ifmgd); + sdata_assert_lock(sdata); if (!ether_addr_equal(mgmt->da, sdata->vif.addr)) return; /* ignore ProbeResp to foreign address */ @@ -2926,7 +2887,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, ifmgd->auth_data->tries = 0; ifmgd->auth_data->timeout = jiffies; ifmgd->auth_data->timeout_started = true; - run_again(ifmgd, ifmgd->auth_data->timeout); + run_again(sdata, ifmgd->auth_data->timeout); } } @@ -2951,10 +2912,9 @@ static const u64 care_about_ies = (1ULL << WLAN_EID_HT_CAPABILITY) | (1ULL << WLAN_EID_HT_OPERATION); -static enum rx_mgmt_action -ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len, - u8 *deauth_buf, struct ieee80211_rx_status *rx_status) +static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len, + struct ieee80211_rx_status *rx_status) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; @@ -2969,24 +2929,25 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, u8 erp_value = 0; u32 ncrc; u8 *bssid; + u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN]; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); /* Process beacon from the current BSS */ baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; if (baselen > len) - return RX_MGMT_NONE; + return; rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); if (!chanctx_conf) { rcu_read_unlock(); - return RX_MGMT_NONE; + return; } if (rx_status->freq != chanctx_conf->def.chan->center_freq) { rcu_read_unlock(); - return RX_MGMT_NONE; + return; } chan = chanctx_conf->def.chan; rcu_read_unlock(); @@ -2997,7 +2958,11 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, len - baselen, false, &elems); ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); - ifmgd->assoc_data->have_beacon = true; + if (elems.tim && !elems.parse_error) { + const struct ieee80211_tim_ie *tim_ie = elems.tim; + ifmgd->dtim_period = tim_ie->dtim_period; + } + ifmgd->have_beacon = true; ifmgd->assoc_data->need_beacon = false; if (local->hw.flags & IEEE80211_HW_TIMING_BEACON_ONLY) { sdata->vif.bss_conf.sync_tsf = @@ -3013,13 +2978,13 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, /* continue assoc process */ ifmgd->assoc_data->timeout = jiffies; ifmgd->assoc_data->timeout_started = true; - run_again(ifmgd, ifmgd->assoc_data->timeout); - return RX_MGMT_NONE; + run_again(sdata, ifmgd->assoc_data->timeout); + return; } if (!ifmgd->associated || !ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid)) - return RX_MGMT_NONE; + return; bssid = ifmgd->associated->bssid; /* Track average RSSI from the Beacon frames of the current AP */ @@ -3165,7 +3130,7 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, } if (ncrc == ifmgd->beacon_crc && ifmgd->beacon_crc_valid) - return RX_MGMT_NONE; + return; ifmgd->beacon_crc = ncrc; ifmgd->beacon_crc_valid = true; @@ -3179,7 +3144,7 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, * If we haven't had a beacon before, tell the driver about the * DTIM period (and beacon timing if desired) now. */ - if (!bss_conf->dtim_period) { + if (!ifmgd->have_beacon) { /* a few bogus AP send dtim_period = 0 or no TIM IE */ if (elems.tim) bss_conf->dtim_period = elems.tim->dtim_period ?: 1; @@ -3198,7 +3163,14 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.sync_dtim_count = 0; } - changed |= BSS_CHANGED_DTIM_PERIOD; + changed |= BSS_CHANGED_BEACON_INFO; + ifmgd->have_beacon = true; + + mutex_lock(&local->iflist_mtx); + ieee80211_recalc_ps(local, -1); + mutex_unlock(&local->iflist_mtx); + + ieee80211_recalc_ps_vif(sdata); } if (elems.erp_info) { @@ -3220,7 +3192,9 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DEAUTH_LEAVING, true, deauth_buf); - return RX_MGMT_CFG80211_TX_DEAUTH; + cfg80211_tx_mlme_mgmt(sdata->dev, deauth_buf, + sizeof(deauth_buf)); + return; } if (sta && elems.opmode_notif) @@ -3237,19 +3211,13 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, elems.pwr_constr_elem); ieee80211_bss_info_change_notify(sdata, changed); - - return RX_MGMT_NONE; } void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_rx_status *rx_status; struct ieee80211_mgmt *mgmt; - struct cfg80211_bss *bss = NULL; - enum rx_mgmt_action rma = RX_MGMT_NONE; - u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN]; u16 fc; struct ieee802_11_elems elems; int ies_len; @@ -3258,28 +3226,27 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mgmt = (struct ieee80211_mgmt *) skb->data; fc = le16_to_cpu(mgmt->frame_control); - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); switch (fc & IEEE80211_FCTL_STYPE) { case IEEE80211_STYPE_BEACON: - rma = ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, - deauth_buf, rx_status); + ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, rx_status); break; case IEEE80211_STYPE_PROBE_RESP: ieee80211_rx_mgmt_probe_resp(sdata, skb); break; case IEEE80211_STYPE_AUTH: - rma = ieee80211_rx_mgmt_auth(sdata, mgmt, skb->len); + ieee80211_rx_mgmt_auth(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_DEAUTH: - rma = ieee80211_rx_mgmt_deauth(sdata, mgmt, skb->len); + ieee80211_rx_mgmt_deauth(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_DISASSOC: - rma = ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len); + ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_ASSOC_RESP: case IEEE80211_STYPE_REASSOC_RESP: - rma = ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len, &bss); + ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len); break; case IEEE80211_STYPE_ACTION: if (mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) { @@ -3325,34 +3292,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, } break; } - mutex_unlock(&ifmgd->mtx); - - switch (rma) { - case RX_MGMT_NONE: - /* no action */ - break; - case RX_MGMT_CFG80211_DEAUTH: - cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); - break; - case RX_MGMT_CFG80211_DISASSOC: - cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len); - break; - case RX_MGMT_CFG80211_RX_AUTH: - cfg80211_send_rx_auth(sdata->dev, (u8 *)mgmt, skb->len); - break; - case RX_MGMT_CFG80211_RX_ASSOC: - cfg80211_send_rx_assoc(sdata->dev, bss, (u8 *)mgmt, skb->len); - break; - case RX_MGMT_CFG80211_ASSOC_TIMEOUT: - cfg80211_send_assoc_timeout(sdata->dev, mgmt->bssid); - break; - case RX_MGMT_CFG80211_TX_DEAUTH: - cfg80211_send_deauth(sdata->dev, deauth_buf, - sizeof(deauth_buf)); - break; - default: - WARN(1, "unexpected: %d", rma); - } + sdata_unlock(sdata); } static void ieee80211_sta_timer(unsigned long data) @@ -3366,20 +3306,13 @@ static void ieee80211_sta_timer(unsigned long data) static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, u8 *bssid, u8 reason, bool tx) { - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH, reason, tx, frame_buf); - mutex_unlock(&ifmgd->mtx); - /* - * must be outside lock due to cfg80211, - * but that's not a problem. - */ - cfg80211_send_deauth(sdata->dev, frame_buf, IEEE80211_DEAUTH_FRAME_LEN); - - mutex_lock(&ifmgd->mtx); + cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); } static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) @@ -3389,7 +3322,7 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) struct ieee80211_mgd_auth_data *auth_data = ifmgd->auth_data; u32 tx_flags = 0; - lockdep_assert_held(&ifmgd->mtx); + sdata_assert_lock(sdata); if (WARN_ON_ONCE(!auth_data)) return -EINVAL; @@ -3462,7 +3395,7 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) if (tx_flags == 0) { auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; ifmgd->auth_data->timeout_started = true; - run_again(ifmgd, auth_data->timeout); + run_again(sdata, auth_data->timeout); } else { auth_data->timeout_started = false; } @@ -3475,7 +3408,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata) struct ieee80211_mgd_assoc_data *assoc_data = sdata->u.mgd.assoc_data; struct ieee80211_local *local = sdata->local; - lockdep_assert_held(&sdata->u.mgd.mtx); + sdata_assert_lock(sdata); assoc_data->tries++; if (assoc_data->tries > IEEE80211_ASSOC_MAX_TRIES) { @@ -3499,7 +3432,7 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata) if (!(local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) { assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT; assoc_data->timeout_started = true; - run_again(&sdata->u.mgd, assoc_data->timeout); + run_again(sdata, assoc_data->timeout); } else { assoc_data->timeout_started = false; } @@ -3524,7 +3457,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local = sdata->local; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (ifmgd->status_received) { __le16 fc = ifmgd->status_fc; @@ -3536,7 +3469,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) if (status_acked) { ifmgd->auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT_SHORT; - run_again(ifmgd, ifmgd->auth_data->timeout); + run_again(sdata, ifmgd->auth_data->timeout); } else { ifmgd->auth_data->timeout = jiffies - 1; } @@ -3547,7 +3480,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) if (status_acked) { ifmgd->assoc_data->timeout = jiffies + IEEE80211_ASSOC_TIMEOUT_SHORT; - run_again(ifmgd, ifmgd->assoc_data->timeout); + run_again(sdata, ifmgd->assoc_data->timeout); } else { ifmgd->assoc_data->timeout = jiffies - 1; } @@ -3570,30 +3503,22 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) ieee80211_destroy_auth_data(sdata, false); - mutex_unlock(&ifmgd->mtx); - cfg80211_send_auth_timeout(sdata->dev, bssid); - mutex_lock(&ifmgd->mtx); + cfg80211_auth_timeout(sdata->dev, bssid); } } else if (ifmgd->auth_data && ifmgd->auth_data->timeout_started) - run_again(ifmgd, ifmgd->auth_data->timeout); + run_again(sdata, ifmgd->auth_data->timeout); if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started && time_after(jiffies, ifmgd->assoc_data->timeout)) { - if ((ifmgd->assoc_data->need_beacon && - !ifmgd->assoc_data->have_beacon) || + if ((ifmgd->assoc_data->need_beacon && !ifmgd->have_beacon) || ieee80211_do_assoc(sdata)) { - u8 bssid[ETH_ALEN]; - - memcpy(bssid, ifmgd->assoc_data->bss->bssid, ETH_ALEN); + struct cfg80211_bss *bss = ifmgd->assoc_data->bss; ieee80211_destroy_assoc_data(sdata, false); - - mutex_unlock(&ifmgd->mtx); - cfg80211_send_assoc_timeout(sdata->dev, bssid); - mutex_lock(&ifmgd->mtx); + cfg80211_assoc_timeout(sdata->dev, bss); } } else if (ifmgd->assoc_data && ifmgd->assoc_data->timeout_started) - run_again(ifmgd, ifmgd->assoc_data->timeout); + run_again(sdata, ifmgd->assoc_data->timeout); if (ifmgd->flags & (IEEE80211_STA_BEACON_POLL | IEEE80211_STA_CONNECTION_POLL) && @@ -3627,7 +3552,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) false); } } else if (time_is_after_jiffies(ifmgd->probe_timeout)) - run_again(ifmgd, ifmgd->probe_timeout); + run_again(sdata, ifmgd->probe_timeout); else if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) { mlme_dbg(sdata, "Failed to send nullfunc to AP %pM after %dms, disconnecting\n", @@ -3656,7 +3581,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) } } - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); } static void ieee80211_sta_bcn_mon_timer(unsigned long data) @@ -3717,9 +3642,9 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - mutex_lock(&ifmgd->mtx); + sdata_lock(sdata); if (!ifmgd->associated) { - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); return; } @@ -3730,10 +3655,10 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) ifmgd->associated->bssid, WLAN_REASON_UNSPECIFIED, true); - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); return; } - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); } #endif @@ -3765,8 +3690,6 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ifmgd->uapsd_max_sp_len = sdata->local->hw.uapsd_max_sp_len; ifmgd->p2p_noa_index = -1; - mutex_init(&ifmgd->mtx); - if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS) ifmgd->req_smps = IEEE80211_SMPS_AUTOMATIC; else @@ -3923,6 +3846,12 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, */ ret = ieee80211_vif_use_channel(sdata, &chandef, IEEE80211_CHANCTX_SHARED); + + /* don't downgrade for 5 and 10 MHz channels, though. */ + if (chandef.width == NL80211_CHAN_WIDTH_5 || + chandef.width == NL80211_CHAN_WIDTH_10) + return ret; + while (ret && chandef.width != NL80211_CHAN_WIDTH_20_NOHT) { ifmgd->flags |= chandef_downgrade(&chandef); ret = ieee80211_vif_use_channel(sdata, &chandef, @@ -4122,8 +4051,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, /* try to authenticate/probe */ - mutex_lock(&ifmgd->mtx); - if ((ifmgd->auth_data && !ifmgd->auth_data->done) || ifmgd->assoc_data) { err = -EBUSY; @@ -4143,8 +4070,8 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, WLAN_REASON_UNSPECIFIED, false, frame_buf); - __cfg80211_send_deauth(sdata->dev, frame_buf, - sizeof(frame_buf)); + cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, + sizeof(frame_buf)); } sdata_info(sdata, "authenticate with %pM\n", req->bss->bssid); @@ -4161,8 +4088,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, /* hold our own reference */ cfg80211_ref_bss(local->hw.wiphy, auth_data->bss); - err = 0; - goto out_unlock; + return 0; err_clear: memset(ifmgd->bssid, 0, ETH_ALEN); @@ -4170,9 +4096,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, ifmgd->auth_data = NULL; err_free: kfree(auth_data); - out_unlock: - mutex_unlock(&ifmgd->mtx); - return err; } @@ -4203,8 +4126,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, assoc_data->ssid_len = ssidie[1]; rcu_read_unlock(); - mutex_lock(&ifmgd->mtx); - if (ifmgd->associated) { u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; @@ -4212,8 +4133,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, WLAN_REASON_UNSPECIFIED, false, frame_buf); - __cfg80211_send_deauth(sdata->dev, frame_buf, - sizeof(frame_buf)); + cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, + sizeof(frame_buf)); } if (ifmgd->auth_data && !ifmgd->auth_data->done) { @@ -4360,6 +4281,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, ifmgd->assoc_data = assoc_data; ifmgd->dtim_period = 0; + ifmgd->have_beacon = false; err = ieee80211_prep_connection(sdata, req->bss, true); if (err) @@ -4391,7 +4313,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, ifmgd->dtim_period = tim->dtim_period; dtim_count = tim->dtim_count; } - assoc_data->have_beacon = true; + ifmgd->have_beacon = true; assoc_data->timeout = jiffies; assoc_data->timeout_started = true; @@ -4407,7 +4329,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, } rcu_read_unlock(); - run_again(ifmgd, assoc_data->timeout); + run_again(sdata, assoc_data->timeout); if (bss->corrupt_data) { char *corrupt_type = "data"; @@ -4423,17 +4345,13 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, corrupt_type); } - err = 0; - goto out; + return 0; err_clear: memset(ifmgd->bssid, 0, ETH_ALEN); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); ifmgd->assoc_data = NULL; err_free: kfree(assoc_data); - out: - mutex_unlock(&ifmgd->mtx); - return err; } @@ -4445,8 +4363,6 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, bool tx = !req->local_state_change; bool report_frame = false; - mutex_lock(&ifmgd->mtx); - sdata_info(sdata, "deauthenticating from %pM by local choice (reason=%d)\n", req->bssid, req->reason_code); @@ -4458,7 +4374,6 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, req->reason_code, tx, frame_buf); ieee80211_destroy_auth_data(sdata, false); - mutex_unlock(&ifmgd->mtx); report_frame = true; goto out; @@ -4470,12 +4385,11 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, req->reason_code, tx, frame_buf); report_frame = true; } - mutex_unlock(&ifmgd->mtx); out: if (report_frame) - __cfg80211_send_deauth(sdata->dev, frame_buf, - IEEE80211_DEAUTH_FRAME_LEN); + cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); return 0; } @@ -4487,18 +4401,14 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, u8 bssid[ETH_ALEN]; u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; - mutex_lock(&ifmgd->mtx); - /* * cfg80211 should catch this ... but it's racy since * we can receive a disassoc frame, process it, hand it * to cfg80211 while that's in a locked section already * trying to tell us that the user wants to disconnect. */ - if (ifmgd->associated != req->bss) { - mutex_unlock(&ifmgd->mtx); + if (ifmgd->associated != req->bss) return -ENOLINK; - } sdata_info(sdata, "disassociating from %pM by local choice (reason=%d)\n", @@ -4508,10 +4418,9 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DISASSOC, req->reason_code, !req->local_state_change, frame_buf); - mutex_unlock(&ifmgd->mtx); - __cfg80211_send_disassoc(sdata->dev, frame_buf, - IEEE80211_DEAUTH_FRAME_LEN); + cfg80211_tx_mlme_mgmt(sdata->dev, frame_buf, + IEEE80211_DEAUTH_FRAME_LEN); return 0; } @@ -4531,13 +4440,16 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata) cancel_work_sync(&ifmgd->csa_connection_drop_work); cancel_work_sync(&ifmgd->chswitch_work); - mutex_lock(&ifmgd->mtx); - if (ifmgd->assoc_data) + sdata_lock(sdata); + if (ifmgd->assoc_data) { + struct cfg80211_bss *bss = ifmgd->assoc_data->bss; ieee80211_destroy_assoc_data(sdata, false); + cfg80211_assoc_timeout(sdata->dev, bss); + } if (ifmgd->auth_data) ieee80211_destroy_auth_data(sdata, false); del_timer_sync(&ifmgd->timer); - mutex_unlock(&ifmgd->mtx); + sdata_unlock(sdata); } void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 7fc5d0d8149a..340126204343 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -99,10 +99,13 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) } mutex_unlock(&local->sta_mtx); - /* remove all interfaces */ + /* remove all interfaces that were created in the driver */ list_for_each_entry(sdata, &local->interfaces, list) { - if (!ieee80211_sdata_running(sdata)) + if (!ieee80211_sdata_running(sdata) || + sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sdata->vif.type == NL80211_IFTYPE_MONITOR) continue; + drv_remove_interface(local, sdata); } diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index a02bef35b134..30d58d2d13e2 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -397,8 +397,14 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate, return; /* if HT BSS, and we handle a data frame, also try HT rates */ - if (chan_width == NL80211_CHAN_WIDTH_20_NOHT) + switch (chan_width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: return; + default: + break; + } alt_rate.idx = 0; /* keep protection flags */ diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index ac7ef5414bde..e6512e2ffd20 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -290,7 +290,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, struct minstrel_rate *msr, *mr; unsigned int ndx; bool mrr_capable; - bool prev_sample = mi->prev_sample; + bool prev_sample; int delta; int sampling_ratio; @@ -314,6 +314,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, (mi->sample_count + mi->sample_deferred / 2); /* delta < 0: no sampling required */ + prev_sample = mi->prev_sample; mi->prev_sample = false; if (delta < 0 || (!mrr_capable && prev_sample)) return; diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 5b2d3012b983..f5aed963b22e 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -804,10 +804,18 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, sample_group = &minstrel_mcs_groups[sample_idx / MCS_GROUP_RATES]; info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; + rate->count = 1; + + if (sample_idx / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) { + int idx = sample_idx % ARRAY_SIZE(mp->cck_rates); + rate->idx = mp->cck_rates[idx]; + rate->flags = 0; + return; + } + rate->idx = sample_idx % MCS_GROUP_RATES + (sample_group->streams - 1) * MCS_GROUP_RATES; rate->flags = IEEE80211_TX_RC_MCS | sample_group->flags; - rate->count = 1; } static void diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 8e2952620256..2c5a79bd3777 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -258,6 +258,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, pos += 2; if (status->flag & RX_FLAG_HT) { + unsigned int stbc; + rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS); *pos++ = local->hw.radiotap_mcs_details; *pos = 0; @@ -267,6 +269,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, *pos |= IEEE80211_RADIOTAP_MCS_BW_40; if (status->flag & RX_FLAG_HT_GF) *pos |= IEEE80211_RADIOTAP_MCS_FMT_GF; + stbc = (status->flag & RX_FLAG_STBC_MASK) >> RX_FLAG_STBC_SHIFT; + *pos |= stbc << IEEE80211_RADIOTAP_MCS_STBC_SHIFT; pos++; *pos++ = status->rate_idx; } @@ -932,8 +936,14 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); - /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */ - if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) { + /* + * Drop duplicate 802.11 retransmissions + * (IEEE 802.11-2012: 9.3.2.10 "Duplicate detection and recovery") + */ + if (rx->skb->len >= 24 && rx->sta && + !ieee80211_is_ctl(hdr->frame_control) && + !ieee80211_is_qos_nullfunc(hdr->frame_control) && + !is_multicast_ether_addr(hdr->addr1)) { if (unlikely(ieee80211_has_retry(hdr->frame_control) && rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) { @@ -1372,6 +1382,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + int i; if (!sta) return RX_CONTINUE; @@ -1422,6 +1433,19 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) ewma_add(&sta->avg_signal, -status->signal); } + if (status->chains) { + sta->chains = status->chains; + for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) { + int signal = status->chain_signal[i]; + + if (!(status->chains & BIT(i))) + continue; + + sta->chain_signal_last[i] = signal; + ewma_add(&sta->chain_signal_avg[i], -signal); + } + } + /* * Change STA power saving mode only at the end of a frame * exchange sequence. @@ -1608,7 +1632,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) entry->ccmp = 1; memcpy(entry->last_pn, rx->key->u.ccmp.rx_pn[queue], - CCMP_PN_LEN); + IEEE80211_CCMP_PN_LEN); } return RX_QUEUED; } @@ -1627,21 +1651,21 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) * (IEEE 802.11i, 8.3.3.4.5) */ if (entry->ccmp) { int i; - u8 pn[CCMP_PN_LEN], *rpn; + u8 pn[IEEE80211_CCMP_PN_LEN], *rpn; int queue; if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP) return RX_DROP_UNUSABLE; - memcpy(pn, entry->last_pn, CCMP_PN_LEN); - for (i = CCMP_PN_LEN - 1; i >= 0; i--) { + memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN); + for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) { pn[i]++; if (pn[i]) break; } queue = rx->security_idx; rpn = rx->key->u.ccmp.rx_pn[queue]; - if (memcmp(pn, rpn, CCMP_PN_LEN)) + if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN)) return RX_DROP_UNUSABLE; - memcpy(entry->last_pn, pn, CCMP_PN_LEN); + memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN); } skb_pull(rx->skb, ieee80211_hdrlen(fc)); @@ -1729,27 +1753,21 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) if (unlikely(!ieee80211_has_protected(fc) && ieee80211_is_unicast_robust_mgmt_frame(rx->skb) && rx->key)) { - if (ieee80211_is_deauth(fc)) - cfg80211_send_unprot_deauth(rx->sdata->dev, - rx->skb->data, - rx->skb->len); - else if (ieee80211_is_disassoc(fc)) - cfg80211_send_unprot_disassoc(rx->sdata->dev, - rx->skb->data, - rx->skb->len); + if (ieee80211_is_deauth(fc) || + ieee80211_is_disassoc(fc)) + cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, + rx->skb->data, + rx->skb->len); return -EACCES; } /* BIP does not use Protected field, so need to check MMIE */ if (unlikely(ieee80211_is_multicast_robust_mgmt_frame(rx->skb) && ieee80211_get_mmie_keyidx(rx->skb) < 0)) { - if (ieee80211_is_deauth(fc)) - cfg80211_send_unprot_deauth(rx->sdata->dev, - rx->skb->data, - rx->skb->len); - else if (ieee80211_is_disassoc(fc)) - cfg80211_send_unprot_disassoc(rx->sdata->dev, - rx->skb->data, - rx->skb->len); + if (ieee80211_is_deauth(fc) || + ieee80211_is_disassoc(fc)) + cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev, + rx->skb->data, + rx->skb->len); return -EACCES; } /* diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 99b103921a4b..1b122a79b0d8 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -140,6 +140,15 @@ ieee80211_bss_info_update(struct ieee80211_local *local, bss->valid_data |= IEEE80211_BSS_VALID_WMM; } + if (beacon) { + struct ieee80211_supported_band *sband = + local->hw.wiphy->bands[rx_status->band]; + if (!(rx_status->flag & RX_FLAG_HT) && + !(rx_status->flag & RX_FLAG_VHT)) + bss->beacon_rate = + &sband->bitrates[rx_status->rate_idx]; + } + return bss; } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 11216bc13b27..aeb967a0aeed 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -149,6 +149,7 @@ static void cleanup_single_sta(struct sta_info *sta) * directly by station destruction. */ for (i = 0; i < IEEE80211_NUM_TIDS; i++) { + kfree(sta->ampdu_mlme.tid_start_tx[i]); tid_tx = rcu_dereference_raw(sta->ampdu_mlme.tid_tx[i]); if (!tid_tx) continue; @@ -346,6 +347,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, if (ieee80211_vif_is_mesh(&sdata->vif) && !sdata->u.mesh.user_mpm) init_timer(&sta->plink_timer); + sta->nonpeer_pm = NL80211_MESH_POWER_ACTIVE; #endif memcpy(sta->sta.addr, addr, ETH_ALEN); @@ -358,6 +360,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, do_posix_clock_monotonic_gettime(&uptime); sta->last_connected = uptime.tv_sec; ewma_init(&sta->avg_signal, 1024, 8); + for (i = 0; i < ARRAY_SIZE(sta->chain_signal_avg); i++) + ewma_init(&sta->chain_signal_avg[i], 1024, 8); if (sta_prepare_rate_control(local, sta, gfp)) { kfree(sta); @@ -1130,6 +1134,7 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata, * ends the poll/service period. */ info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER | + IEEE80211_TX_CTL_PS_RESPONSE | IEEE80211_TX_STATUS_EOSP | IEEE80211_TX_CTL_REQ_TX_STATUS; @@ -1267,7 +1272,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, * STA may still remain is PS mode after this frame * exchange. */ - info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER; + info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER | + IEEE80211_TX_CTL_PS_RESPONSE; /* * Use MoreData flag to indicate whether there are diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index adc30045f99e..4208dbd5861f 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -203,6 +203,7 @@ struct tid_ampdu_rx { * driver requested to close until the work for it runs * @mtx: mutex to protect all TX data (except non-NULL assignments * to tid_tx[idx], which are protected by the sta spinlock) + * tid_start_tx is also protected by sta->lock. */ struct sta_ampdu_mlme { struct mutex mtx; @@ -297,6 +298,9 @@ struct sta_ampdu_mlme { * @rcu_head: RCU head used for freeing this station struct * @cur_max_bandwidth: maximum bandwidth to use for TX to the station, * taken from HT/VHT capabilities or VHT operating mode notification + * @chains: chains ever used for RX from this station + * @chain_signal_last: last signal (per chain) + * @chain_signal_avg: signal average (per chain) */ struct sta_info { /* General information, mostly static */ @@ -344,6 +348,11 @@ struct sta_info { int last_signal; struct ewma avg_signal; int last_ack_signal; + + u8 chains; + s8 chain_signal_last[IEEE80211_MAX_CHAINS]; + struct ewma chain_signal_avg[IEEE80211_MAX_CHAINS]; + /* Plus 1 for non-QoS frames */ __le16 last_seq_ctrl[IEEE80211_NUM_TIDS + 1]; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 9972e07a2f96..4105d0ca963e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -398,13 +398,14 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) if (ieee80211_has_order(hdr->frame_control)) return TX_CONTINUE; + if (tx->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) + info->hw_queue = tx->sdata->vif.cab_queue; + /* no stations in PS mode */ if (!atomic_read(&ps->num_sta_ps)) return TX_CONTINUE; info->flags |= IEEE80211_TX_CTL_SEND_AFTER_DTIM; - if (tx->local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) - info->hw_queue = tx->sdata->vif.cab_queue; /* device releases frame after DTIM beacon */ if (!(tx->local->hw.flags & IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING)) @@ -1789,12 +1790,6 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb, break; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: - if (!sdata->u.mesh.mshcfg.dot11MeshTTL) { - /* Do not send frames with mesh_ttl == 0 */ - sdata->u.mesh.mshstats.dropped_frames_ttl++; - goto fail_rcu; - } - if (!is_multicast_ether_addr(skb->data)) { struct sta_info *next_hop; bool mpp_lookup = true; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 72e6292955bb..22654452a561 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -560,6 +560,9 @@ void ieee80211_iterate_active_interfaces( list_for_each_entry(sdata, &local->interfaces, list) { switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: + if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + continue; + break; case NL80211_IFTYPE_AP_VLAN: continue; default: @@ -598,6 +601,9 @@ void ieee80211_iterate_active_interfaces_atomic( list_for_each_entry_rcu(sdata, &local->interfaces, list) { switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: + if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + continue; + break; case NL80211_IFTYPE_AP_VLAN: continue; default: @@ -1072,32 +1078,6 @@ void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, ieee80211_set_wmm_default(sdata, true); } -u32 ieee80211_mandatory_rates(struct ieee80211_local *local, - enum ieee80211_band band) -{ - struct ieee80211_supported_band *sband; - struct ieee80211_rate *bitrates; - u32 mandatory_rates; - enum ieee80211_rate_flags mandatory_flag; - int i; - - sband = local->hw.wiphy->bands[band]; - if (WARN_ON(!sband)) - return 1; - - if (band == IEEE80211_BAND_2GHZ) - mandatory_flag = IEEE80211_RATE_MANDATORY_B; - else - mandatory_flag = IEEE80211_RATE_MANDATORY_A; - - bitrates = sband->bitrates; - mandatory_rates = 0; - for (i = 0; i < sband->n_bitrates; i++) - if (bitrates[i].flags & mandatory_flag) - mandatory_rates |= BIT(i); - return mandatory_rates; -} - void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, const u8 *extra, size_t extra_len, const u8 *da, @@ -1604,12 +1584,13 @@ int ieee80211_reconfig(struct ieee80211_local *local) BSS_CHANGED_ARP_FILTER | BSS_CHANGED_PS; - if (sdata->u.mgd.dtim_period) - changed |= BSS_CHANGED_DTIM_PERIOD; + /* Re-send beacon info report to the driver */ + if (sdata->u.mgd.have_beacon) + changed |= BSS_CHANGED_BEACON_INFO; - mutex_lock(&sdata->u.mgd.mtx); + sdata_lock(sdata); ieee80211_bss_info_change_notify(sdata, changed); - mutex_unlock(&sdata->u.mgd.mtx); + sdata_unlock(sdata); break; case NL80211_IFTYPE_ADHOC: changed |= BSS_CHANGED_IBSS; diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 171344d4eb7c..97c289414e32 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -396,7 +396,7 @@ void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, new_bw = ieee80211_sta_cur_vht_bw(sta); if (new_bw != sta->sta.bandwidth) { sta->sta.bandwidth = new_bw; - changed |= IEEE80211_RC_NSS_CHANGED; + changed |= IEEE80211_RC_BW_CHANGED; } change: diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index c04d401dae92..6ee2b5863572 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -28,7 +28,7 @@ int ieee80211_wep_init(struct ieee80211_local *local) { /* start WEP IV from a random value */ - get_random_bytes(&local->wep_iv, WEP_IV_LEN); + get_random_bytes(&local->wep_iv, IEEE80211_WEP_IV_LEN); local->wep_tx_tfm = crypto_alloc_cipher("arc4", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(local->wep_tx_tfm)) { @@ -98,20 +98,21 @@ static u8 *ieee80211_wep_add_iv(struct ieee80211_local *local, hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - if (WARN_ON(skb_tailroom(skb) < WEP_ICV_LEN || - skb_headroom(skb) < WEP_IV_LEN)) + if (WARN_ON(skb_tailroom(skb) < IEEE80211_WEP_ICV_LEN || + skb_headroom(skb) < IEEE80211_WEP_IV_LEN)) return NULL; hdrlen = ieee80211_hdrlen(hdr->frame_control); - newhdr = skb_push(skb, WEP_IV_LEN); - memmove(newhdr, newhdr + WEP_IV_LEN, hdrlen); + newhdr = skb_push(skb, IEEE80211_WEP_IV_LEN); + memmove(newhdr, newhdr + IEEE80211_WEP_IV_LEN, hdrlen); /* the HW only needs room for the IV, but not the actual IV */ if (info->control.hw_key && (info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)) return newhdr + hdrlen; - skb_set_network_header(skb, skb_network_offset(skb) + WEP_IV_LEN); + skb_set_network_header(skb, skb_network_offset(skb) + + IEEE80211_WEP_IV_LEN); ieee80211_wep_get_iv(local, keylen, keyidx, newhdr + hdrlen); return newhdr + hdrlen; } @@ -125,8 +126,8 @@ static void ieee80211_wep_remove_iv(struct ieee80211_local *local, unsigned int hdrlen; hdrlen = ieee80211_hdrlen(hdr->frame_control); - memmove(skb->data + WEP_IV_LEN, skb->data, hdrlen); - skb_pull(skb, WEP_IV_LEN); + memmove(skb->data + IEEE80211_WEP_IV_LEN, skb->data, hdrlen); + skb_pull(skb, IEEE80211_WEP_IV_LEN); } @@ -146,7 +147,7 @@ int ieee80211_wep_encrypt_data(struct crypto_cipher *tfm, u8 *rc4key, put_unaligned(icv, (__le32 *)(data + data_len)); crypto_cipher_setkey(tfm, rc4key, klen); - for (i = 0; i < data_len + WEP_ICV_LEN; i++) + for (i = 0; i < data_len + IEEE80211_WEP_ICV_LEN; i++) crypto_cipher_encrypt_one(tfm, data + i, data + i); return 0; @@ -172,7 +173,7 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, if (!iv) return -1; - len = skb->len - (iv + WEP_IV_LEN - skb->data); + len = skb->len - (iv + IEEE80211_WEP_IV_LEN - skb->data); /* Prepend 24-bit IV to RC4 key */ memcpy(rc4key, iv, 3); @@ -181,10 +182,10 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, memcpy(rc4key + 3, key, keylen); /* Add room for ICV */ - skb_put(skb, WEP_ICV_LEN); + skb_put(skb, IEEE80211_WEP_ICV_LEN); return ieee80211_wep_encrypt_data(local->wep_tx_tfm, rc4key, keylen + 3, - iv + WEP_IV_LEN, len); + iv + IEEE80211_WEP_IV_LEN, len); } @@ -201,11 +202,11 @@ int ieee80211_wep_decrypt_data(struct crypto_cipher *tfm, u8 *rc4key, return -1; crypto_cipher_setkey(tfm, rc4key, klen); - for (i = 0; i < data_len + WEP_ICV_LEN; i++) + for (i = 0; i < data_len + IEEE80211_WEP_ICV_LEN; i++) crypto_cipher_decrypt_one(tfm, data + i, data + i); crc = cpu_to_le32(~crc32_le(~0, data, data_len)); - if (memcmp(&crc, data + data_len, WEP_ICV_LEN) != 0) + if (memcmp(&crc, data + data_len, IEEE80211_WEP_ICV_LEN) != 0) /* ICV mismatch */ return -1; @@ -237,10 +238,10 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local, return -1; hdrlen = ieee80211_hdrlen(hdr->frame_control); - if (skb->len < hdrlen + WEP_IV_LEN + WEP_ICV_LEN) + if (skb->len < hdrlen + IEEE80211_WEP_IV_LEN + IEEE80211_WEP_ICV_LEN) return -1; - len = skb->len - hdrlen - WEP_IV_LEN - WEP_ICV_LEN; + len = skb->len - hdrlen - IEEE80211_WEP_IV_LEN - IEEE80211_WEP_ICV_LEN; keyidx = skb->data[hdrlen + 3] >> 6; @@ -256,16 +257,16 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local, memcpy(rc4key + 3, key->conf.key, key->conf.keylen); if (ieee80211_wep_decrypt_data(local->wep_rx_tfm, rc4key, klen, - skb->data + hdrlen + WEP_IV_LEN, - len)) + skb->data + hdrlen + + IEEE80211_WEP_IV_LEN, len)) ret = -1; /* Trim ICV */ - skb_trim(skb, skb->len - WEP_ICV_LEN); + skb_trim(skb, skb->len - IEEE80211_WEP_ICV_LEN); /* Remove IV */ - memmove(skb->data + WEP_IV_LEN, skb->data, hdrlen); - skb_pull(skb, WEP_IV_LEN); + memmove(skb->data + IEEE80211_WEP_IV_LEN, skb->data, hdrlen); + skb_pull(skb, IEEE80211_WEP_IV_LEN); return ret; } @@ -305,13 +306,14 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx) if (ieee80211_wep_decrypt(rx->local, rx->skb, rx->key)) return RX_DROP_UNUSABLE; } else if (!(status->flag & RX_FLAG_IV_STRIPPED)) { - if (!pskb_may_pull(rx->skb, ieee80211_hdrlen(fc) + WEP_IV_LEN)) + if (!pskb_may_pull(rx->skb, ieee80211_hdrlen(fc) + + IEEE80211_WEP_IV_LEN)) return RX_DROP_UNUSABLE; if (rx->sta && ieee80211_wep_is_weak_iv(rx->skb, rx->key)) rx->sta->wep_weak_iv_count++; ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); /* remove ICV */ - if (pskb_trim(rx->skb, rx->skb->len - WEP_ICV_LEN)) + if (pskb_trim(rx->skb, rx->skb->len - IEEE80211_WEP_ICV_LEN)) return RX_DROP_UNUSABLE; } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index c7c6d644486f..c9edfcb7a13b 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -62,10 +62,10 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) tail = MICHAEL_MIC_LEN; if (!info->control.hw_key) - tail += TKIP_ICV_LEN; + tail += IEEE80211_TKIP_ICV_LEN; if (WARN_ON(skb_tailroom(skb) < tail || - skb_headroom(skb) < TKIP_IV_LEN)) + skb_headroom(skb) < IEEE80211_TKIP_IV_LEN)) return TX_DROP; key = &tx->key->conf.key[NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY]; @@ -198,15 +198,16 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) if (info->control.hw_key) tail = 0; else - tail = TKIP_ICV_LEN; + tail = IEEE80211_TKIP_ICV_LEN; if (WARN_ON(skb_tailroom(skb) < tail || - skb_headroom(skb) < TKIP_IV_LEN)) + skb_headroom(skb) < IEEE80211_TKIP_IV_LEN)) return -1; - pos = skb_push(skb, TKIP_IV_LEN); - memmove(pos, pos + TKIP_IV_LEN, hdrlen); - skb_set_network_header(skb, skb_network_offset(skb) + TKIP_IV_LEN); + pos = skb_push(skb, IEEE80211_TKIP_IV_LEN); + memmove(pos, pos + IEEE80211_TKIP_IV_LEN, hdrlen); + skb_set_network_header(skb, skb_network_offset(skb) + + IEEE80211_TKIP_IV_LEN); pos += hdrlen; /* the HW only needs room for the IV, but not the actual IV */ @@ -227,7 +228,7 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) return 0; /* Add room for ICV */ - skb_put(skb, TKIP_ICV_LEN); + skb_put(skb, IEEE80211_TKIP_ICV_LEN); return ieee80211_tkip_encrypt_data(tx->local->wep_tx_tfm, key, skb, pos, len); @@ -290,11 +291,11 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; /* Trim ICV */ - skb_trim(skb, skb->len - TKIP_ICV_LEN); + skb_trim(skb, skb->len - IEEE80211_TKIP_ICV_LEN); /* Remove IV */ - memmove(skb->data + TKIP_IV_LEN, skb->data, hdrlen); - skb_pull(skb, TKIP_IV_LEN); + memmove(skb->data + IEEE80211_TKIP_IV_LEN, skb->data, hdrlen); + skb_pull(skb, IEEE80211_TKIP_IV_LEN); return RX_CONTINUE; } @@ -337,9 +338,9 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch, else qos_tid = 0; - data_len = skb->len - hdrlen - CCMP_HDR_LEN; + data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN; if (encrypted) - data_len -= CCMP_MIC_LEN; + data_len -= IEEE80211_CCMP_MIC_LEN; /* First block, b_0 */ b_0[0] = 0x59; /* flags: Adata: 1, M: 011, L: 001 */ @@ -348,7 +349,7 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch, */ b_0[1] = qos_tid | (mgmt << 4); memcpy(&b_0[2], hdr->addr2, ETH_ALEN); - memcpy(&b_0[8], pn, CCMP_PN_LEN); + memcpy(&b_0[8], pn, IEEE80211_CCMP_PN_LEN); /* l(m) */ put_unaligned_be16(data_len, &b_0[14]); @@ -424,15 +425,16 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) if (info->control.hw_key) tail = 0; else - tail = CCMP_MIC_LEN; + tail = IEEE80211_CCMP_MIC_LEN; if (WARN_ON(skb_tailroom(skb) < tail || - skb_headroom(skb) < CCMP_HDR_LEN)) + skb_headroom(skb) < IEEE80211_CCMP_HDR_LEN)) return -1; - pos = skb_push(skb, CCMP_HDR_LEN); - memmove(pos, pos + CCMP_HDR_LEN, hdrlen); - skb_set_network_header(skb, skb_network_offset(skb) + CCMP_HDR_LEN); + pos = skb_push(skb, IEEE80211_CCMP_HDR_LEN); + memmove(pos, pos + IEEE80211_CCMP_HDR_LEN, hdrlen); + skb_set_network_header(skb, skb_network_offset(skb) + + IEEE80211_CCMP_HDR_LEN); /* the HW only needs room for the IV, but not the actual IV */ if (info->control.hw_key && @@ -457,10 +459,10 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) if (info->control.hw_key) return 0; - pos += CCMP_HDR_LEN; + pos += IEEE80211_CCMP_HDR_LEN; ccmp_special_blocks(skb, pn, scratch, 0); ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, scratch, pos, len, - pos, skb_put(skb, CCMP_MIC_LEN)); + pos, skb_put(skb, IEEE80211_CCMP_MIC_LEN)); return 0; } @@ -490,7 +492,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) struct ieee80211_key *key = rx->key; struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - u8 pn[CCMP_PN_LEN]; + u8 pn[IEEE80211_CCMP_PN_LEN]; int data_len; int queue; @@ -500,12 +502,13 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) !ieee80211_is_robust_mgmt_frame(hdr)) return RX_CONTINUE; - data_len = skb->len - hdrlen - CCMP_HDR_LEN - CCMP_MIC_LEN; + data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - + IEEE80211_CCMP_MIC_LEN; if (!rx->sta || data_len < 0) return RX_DROP_UNUSABLE; if (status->flag & RX_FLAG_DECRYPTED) { - if (!pskb_may_pull(rx->skb, hdrlen + CCMP_HDR_LEN)) + if (!pskb_may_pull(rx->skb, hdrlen + IEEE80211_CCMP_HDR_LEN)) return RX_DROP_UNUSABLE; } else { if (skb_linearize(rx->skb)) @@ -516,7 +519,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) queue = rx->security_idx; - if (memcmp(pn, key->u.ccmp.rx_pn[queue], CCMP_PN_LEN) <= 0) { + if (memcmp(pn, key->u.ccmp.rx_pn[queue], IEEE80211_CCMP_PN_LEN) <= 0) { key->u.ccmp.replays++; return RX_DROP_UNUSABLE; } @@ -528,19 +531,20 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) if (ieee80211_aes_ccm_decrypt( key->u.ccmp.tfm, scratch, - skb->data + hdrlen + CCMP_HDR_LEN, data_len, - skb->data + skb->len - CCMP_MIC_LEN, - skb->data + hdrlen + CCMP_HDR_LEN)) + skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN, + data_len, + skb->data + skb->len - IEEE80211_CCMP_MIC_LEN, + skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN)) return RX_DROP_UNUSABLE; } - memcpy(key->u.ccmp.rx_pn[queue], pn, CCMP_PN_LEN); + memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN); /* Remove CCMP header and MIC */ - if (pskb_trim(skb, skb->len - CCMP_MIC_LEN)) + if (pskb_trim(skb, skb->len - IEEE80211_CCMP_MIC_LEN)) return RX_DROP_UNUSABLE; - memmove(skb->data + CCMP_HDR_LEN, skb->data, hdrlen); - skb_pull(skb, CCMP_HDR_LEN); + memmove(skb->data + IEEE80211_CCMP_HDR_LEN, skb->data, hdrlen); + skb_pull(skb, IEEE80211_CCMP_HDR_LEN); return RX_CONTINUE; } diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig new file mode 100644 index 000000000000..37421db88965 --- /dev/null +++ b/net/mpls/Kconfig @@ -0,0 +1,9 @@ +# +# MPLS configuration +# +config NET_MPLS_GSO + tristate "MPLS: GSO support" + help + This is helper module to allow segmentation of non-MPLS GSO packets + that have had MPLS stack entries pushed onto them and thus + become MPLS GSO packets. diff --git a/net/mpls/Makefile b/net/mpls/Makefile new file mode 100644 index 000000000000..0a3c171be537 --- /dev/null +++ b/net/mpls/Makefile @@ -0,0 +1,4 @@ +# +# Makefile for MPLS. +# +obj-y += mpls_gso.o diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c new file mode 100644 index 000000000000..1bec1219ab81 --- /dev/null +++ b/net/mpls/mpls_gso.c @@ -0,0 +1,108 @@ +/* + * MPLS GSO Support + * + * Authors: Simon Horman (horms@verge.net.au) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on: GSO portions of net/ipv4/gre.c + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/err.h> +#include <linux/module.h> +#include <linux/netdev_features.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> + +static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + netdev_features_t mpls_features; + __be16 mpls_protocol; + + if (unlikely(skb_shinfo(skb)->gso_type & + ~(SKB_GSO_TCPV4 | + SKB_GSO_TCPV6 | + SKB_GSO_UDP | + SKB_GSO_DODGY | + SKB_GSO_TCP_ECN | + SKB_GSO_GRE | + SKB_GSO_MPLS))) + goto out; + + /* Setup inner SKB. */ + mpls_protocol = skb->protocol; + skb->protocol = skb->inner_protocol; + + /* Push back the mac header that skb_mac_gso_segment() has pulled. + * It will be re-pulled by the call to skb_mac_gso_segment() below + */ + __skb_push(skb, skb->mac_len); + + /* Segment inner packet. */ + mpls_features = skb->dev->mpls_features & netif_skb_features(skb); + segs = skb_mac_gso_segment(skb, mpls_features); + + + /* Restore outer protocol. */ + skb->protocol = mpls_protocol; + + /* Re-pull the mac header that the call to skb_mac_gso_segment() + * above pulled. It will be re-pushed after returning + * skb_mac_gso_segment(), an indirect caller of this function. + */ + __skb_push(skb, skb->data - skb_mac_header(skb)); + +out: + return segs; +} + +static int mpls_gso_send_check(struct sk_buff *skb) +{ + return 0; +} + +static struct packet_offload mpls_mc_offload = { + .type = cpu_to_be16(ETH_P_MPLS_MC), + .callbacks = { + .gso_send_check = mpls_gso_send_check, + .gso_segment = mpls_gso_segment, + }, +}; + +static struct packet_offload mpls_uc_offload = { + .type = cpu_to_be16(ETH_P_MPLS_UC), + .callbacks = { + .gso_send_check = mpls_gso_send_check, + .gso_segment = mpls_gso_segment, + }, +}; + +static int __init mpls_gso_init(void) +{ + pr_info("MPLS GSO support\n"); + + dev_add_offload(&mpls_uc_offload); + dev_add_offload(&mpls_mc_offload); + + return 0; +} + +static void __exit mpls_gso_exit(void) +{ + dev_remove_offload(&mpls_uc_offload); + dev_remove_offload(&mpls_mc_offload); +} + +module_init(mpls_gso_init); +module_exit(mpls_gso_exit); + +MODULE_DESCRIPTION("MPLS GSO support"); +MODULE_AUTHOR("Simon Horman (horms@verge.net.au)"); +MODULE_LICENSE("GPL"); diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 857ca9f35177..2217363ab422 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -304,17 +304,26 @@ static struct pernet_operations netfilter_net_ops = { .exit = netfilter_net_exit, }; -void __init netfilter_init(void) +int __init netfilter_init(void) { - int i, h; + int i, h, ret; + for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) { for (h = 0; h < NF_MAX_HOOKS; h++) INIT_LIST_HEAD(&nf_hooks[i][h]); } - if (register_pernet_subsys(&netfilter_net_ops) < 0) - panic("cannot create netfilter proc entry"); + ret = register_pernet_subsys(&netfilter_net_ops); + if (ret < 0) + goto err; + + ret = netfilter_log_init(); + if (ret < 0) + goto err_pernet; - if (netfilter_log_init() < 0) - panic("cannot initialize nf_log"); + return 0; +err_pernet: + unregister_pernet_subsys(&netfilter_net_ops); +err: + return ret; } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index a083bda322b6..4c8e5c0aa1ab 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -975,8 +975,7 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) return cp; } } - rcu_read_unlock(); - rcu_read_lock(); + cond_resched_rcu(); } return NULL; @@ -1015,8 +1014,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) iter->l = &ip_vs_conn_tab[idx]; return cp; } - rcu_read_unlock(); - rcu_read_lock(); + cond_resched_rcu(); } iter->l = NULL; return NULL; @@ -1206,17 +1204,13 @@ void ip_vs_random_dropentry(struct net *net) int idx; struct ip_vs_conn *cp, *cp_c; + rcu_read_lock(); /* * Randomly scan 1/32 of the whole table every second */ for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) { unsigned int hash = net_random() & ip_vs_conn_tab_mask; - /* - * Lock is actually needed in this loop. - */ - rcu_read_lock(); - hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->flags & IP_VS_CONN_F_TEMPLATE) /* connection template */ @@ -1237,6 +1231,18 @@ void ip_vs_random_dropentry(struct net *net) default: continue; } + } else if (cp->protocol == IPPROTO_SCTP) { + switch (cp->state) { + case IP_VS_SCTP_S_INIT1: + case IP_VS_SCTP_S_INIT: + break; + case IP_VS_SCTP_S_ESTABLISHED: + if (todrop_entry(cp)) + break; + continue; + default: + continue; + } } else { if (!todrop_entry(cp)) continue; @@ -1252,8 +1258,9 @@ void ip_vs_random_dropentry(struct net *net) __ip_vs_conn_put(cp); } } - rcu_read_unlock(); + cond_resched_rcu(); } + rcu_read_unlock(); } @@ -1267,11 +1274,8 @@ static void ip_vs_conn_flush(struct net *net) struct netns_ipvs *ipvs = net_ipvs(net); flush_again: + rcu_read_lock(); for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { - /* - * Lock is actually needed in this loop. - */ - rcu_read_lock(); hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { if (!ip_vs_conn_net_eq(cp, net)) @@ -1286,8 +1290,9 @@ flush_again: __ip_vs_conn_put(cp); } } - rcu_read_unlock(); + cond_resched_rcu(); } + rcu_read_unlock(); /* the counter may be not NULL, because maybe some conn entries are run by slow timer handler or unhashed but still referred */ diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 23b8eb53a569..4f69e83ff836 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -305,7 +305,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * return *ignored=0 i.e. ICMP and NF_DROP */ sched = rcu_dereference(svc->scheduler); - dest = sched->schedule(svc, skb); + dest = sched->schedule(svc, skb, iph); if (!dest) { IP_VS_DBG(1, "p-schedule: no dest found.\n"); kfree(param.pe_data); @@ -452,7 +452,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, } sched = rcu_dereference(svc->scheduler); - dest = sched->schedule(svc, skb); + dest = sched->schedule(svc, skb, iph); if (dest == NULL) { IP_VS_DBG(1, "Schedule: no dest found.\n"); return NULL; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 9e6c2a075a4c..c8148e487386 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1487,9 +1487,9 @@ ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) * Currently only NETDEV_DOWN is handled to release refs to cached dsts */ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, - void *ptr) + void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_service *svc; @@ -1575,7 +1575,7 @@ static int zero; static int three = 3; static int -proc_do_defense_mode(ctl_table *table, int write, +proc_do_defense_mode(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; @@ -1596,7 +1596,7 @@ proc_do_defense_mode(ctl_table *table, int write, } static int -proc_do_sync_threshold(ctl_table *table, int write, +proc_do_sync_threshold(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; @@ -1616,7 +1616,7 @@ proc_do_sync_threshold(ctl_table *table, int write, } static int -proc_do_sync_mode(ctl_table *table, int write, +proc_do_sync_mode(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; @@ -1634,7 +1634,7 @@ proc_do_sync_mode(ctl_table *table, int write, } static int -proc_do_sync_ports(ctl_table *table, int write, +proc_do_sync_ports(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; @@ -1715,12 +1715,18 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_do_sync_ports, }, { - .procname = "sync_qlen_max", + .procname = "sync_persist_mode", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { + .procname = "sync_qlen_max", + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, + { .procname = "sync_sock_size", .maxlen = sizeof(int), .mode = 0644, @@ -1739,6 +1745,18 @@ static struct ctl_table vs_vars[] = { .proc_handler = proc_dointvec, }, { + .procname = "sloppy_tcp", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sloppy_sctp", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "expire_quiescent_template", .maxlen = sizeof(int), .mode = 0644, @@ -3717,12 +3735,15 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net) tbl[idx++].data = &ipvs->sysctl_sync_ver; ipvs->sysctl_sync_ports = 1; tbl[idx++].data = &ipvs->sysctl_sync_ports; + tbl[idx++].data = &ipvs->sysctl_sync_persist_mode; ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32; tbl[idx++].data = &ipvs->sysctl_sync_qlen_max; ipvs->sysctl_sync_sock_size = 0; tbl[idx++].data = &ipvs->sysctl_sync_sock_size; tbl[idx++].data = &ipvs->sysctl_cache_bypass; tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; + tbl[idx++].data = &ipvs->sysctl_sloppy_tcp; + tbl[idx++].data = &ipvs->sysctl_sloppy_sctp; tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index ccab120df45e..c3b84546ea9e 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -214,18 +214,16 @@ static inline int is_overloaded(struct ip_vs_dest *dest) * Destination hashing scheduling */ static struct ip_vs_dest * -ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest; struct ip_vs_dh_state *s; - struct ip_vs_iphdr iph; - - ip_vs_fill_iph_addr_only(svc->af, skb, &iph); IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); s = (struct ip_vs_dh_state *) svc->sched_data; - dest = ip_vs_dh_get(svc->af, s, &iph.daddr); + dest = ip_vs_dh_get(svc->af, s, &iph->daddr); if (!dest || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || atomic_read(&dest->weight) <= 0 @@ -235,7 +233,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n", - IP_VS_DBG_ADDR(svc->af, &iph.daddr), + IP_VS_DBG_ADDR(svc->af, &iph->daddr), IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 5ea26bd87743..1383b0eadc0e 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -118,7 +118,7 @@ struct ip_vs_lblc_table { * IPVS LBLC sysctl table */ #ifdef CONFIG_SYSCTL -static ctl_table vs_vars_table[] = { +static struct ctl_table vs_vars_table[] = { { .procname = "lblc_expiration", .data = NULL, @@ -487,19 +487,17 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) * Locality-Based (weighted) Least-Connection scheduling */ static struct ip_vs_dest * -ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_lblc_table *tbl = svc->sched_data; - struct ip_vs_iphdr iph; struct ip_vs_dest *dest = NULL; struct ip_vs_lblc_entry *en; - ip_vs_fill_iph_addr_only(svc->af, skb, &iph); - IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); /* First look in our cache */ - en = ip_vs_lblc_get(svc->af, tbl, &iph.daddr); + en = ip_vs_lblc_get(svc->af, tbl, &iph->daddr); if (en) { /* We only hold a read lock, but this is atomic */ en->lastuse = jiffies; @@ -529,12 +527,12 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* If we fail to create a cache entry, we'll just use the valid dest */ spin_lock_bh(&svc->sched_lock); if (!tbl->dead) - ip_vs_lblc_new(tbl, &iph.daddr, dest); + ip_vs_lblc_new(tbl, &iph->daddr, dest); spin_unlock_bh(&svc->sched_lock); out: IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n", - IP_VS_DBG_ADDR(svc->af, &iph.daddr), + IP_VS_DBG_ADDR(svc->af, &iph->daddr), IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); return dest; diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 50123c2ab484..3cd85b2fc67c 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -299,7 +299,7 @@ struct ip_vs_lblcr_table { * IPVS LBLCR sysctl table */ -static ctl_table vs_vars_table[] = { +static struct ctl_table vs_vars_table[] = { { .procname = "lblcr_expiration", .data = NULL, @@ -655,19 +655,17 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) * Locality-Based (weighted) Least-Connection scheduling */ static struct ip_vs_dest * -ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_lblcr_table *tbl = svc->sched_data; - struct ip_vs_iphdr iph; struct ip_vs_dest *dest; struct ip_vs_lblcr_entry *en; - ip_vs_fill_iph_addr_only(svc->af, skb, &iph); - IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); /* First look in our cache */ - en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); + en = ip_vs_lblcr_get(svc->af, tbl, &iph->daddr); if (en) { en->lastuse = jiffies; @@ -718,12 +716,12 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* If we fail to create a cache entry, we'll just use the valid dest */ spin_lock_bh(&svc->sched_lock); if (!tbl->dead) - ip_vs_lblcr_new(tbl, &iph.daddr, dest); + ip_vs_lblcr_new(tbl, &iph->daddr, dest); spin_unlock_bh(&svc->sched_lock); out: IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n", - IP_VS_DBG_ADDR(svc->af, &iph.daddr), + IP_VS_DBG_ADDR(svc->af, &iph->daddr), IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); return dest; diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c index 5128e338a749..2bdcb1cf2127 100644 --- a/net/netfilter/ipvs/ip_vs_lc.c +++ b/net/netfilter/ipvs/ip_vs_lc.c @@ -26,7 +26,8 @@ * Least Connection scheduling */ static struct ip_vs_dest * -ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least = NULL; unsigned int loh = 0, doh; diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index 646cfd4baa73..d8d9860934fe 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -55,7 +55,8 @@ ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) * Weighted Least Connection scheduling */ static struct ip_vs_dest * -ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least = NULL; unsigned int loh = 0, doh; diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 86464881cd20..3c0da8728036 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -15,6 +15,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, { struct net *net; struct ip_vs_service *svc; + struct netns_ipvs *ipvs; sctp_chunkhdr_t _schunkh, *sch; sctp_sctphdr_t *sh, _sctph; @@ -27,13 +28,14 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (sch == NULL) return 0; net = skb_net(skb); + ipvs = net_ipvs(net); rcu_read_lock(); - if ((sch->type == SCTP_CID_INIT) && + if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) && (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, &iph->daddr, sh->dest))) { int ignored; - if (ip_vs_todrop(net_ipvs(net))) { + if (ip_vs_todrop(ipvs)) { /* * It seems that we are very loaded. * We have to drop this packet :( @@ -183,710 +185,159 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) return 1; } -struct ipvs_sctp_nextstate { - int next_state; -}; enum ipvs_sctp_event_t { - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_DATA_SER, - IP_VS_SCTP_EVE_INIT_CLI, - IP_VS_SCTP_EVE_INIT_SER, - IP_VS_SCTP_EVE_INIT_ACK_CLI, - IP_VS_SCTP_EVE_INIT_ACK_SER, - IP_VS_SCTP_EVE_COOKIE_ECHO_CLI, - IP_VS_SCTP_EVE_COOKIE_ECHO_SER, - IP_VS_SCTP_EVE_COOKIE_ACK_CLI, - IP_VS_SCTP_EVE_COOKIE_ACK_SER, - IP_VS_SCTP_EVE_ABORT_CLI, - IP_VS_SCTP_EVE__ABORT_SER, - IP_VS_SCTP_EVE_SHUT_CLI, - IP_VS_SCTP_EVE_SHUT_SER, - IP_VS_SCTP_EVE_SHUT_ACK_CLI, - IP_VS_SCTP_EVE_SHUT_ACK_SER, - IP_VS_SCTP_EVE_SHUT_COM_CLI, - IP_VS_SCTP_EVE_SHUT_COM_SER, - IP_VS_SCTP_EVE_LAST + IP_VS_SCTP_DATA = 0, /* DATA, SACK, HEARTBEATs */ + IP_VS_SCTP_INIT, + IP_VS_SCTP_INIT_ACK, + IP_VS_SCTP_COOKIE_ECHO, + IP_VS_SCTP_COOKIE_ACK, + IP_VS_SCTP_SHUTDOWN, + IP_VS_SCTP_SHUTDOWN_ACK, + IP_VS_SCTP_SHUTDOWN_COMPLETE, + IP_VS_SCTP_ERROR, + IP_VS_SCTP_ABORT, + IP_VS_SCTP_EVENT_LAST }; -static enum ipvs_sctp_event_t sctp_events[256] = { - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_INIT_CLI, - IP_VS_SCTP_EVE_INIT_ACK_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_ABORT_CLI, - IP_VS_SCTP_EVE_SHUT_CLI, - IP_VS_SCTP_EVE_SHUT_ACK_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_COOKIE_ECHO_CLI, - IP_VS_SCTP_EVE_COOKIE_ACK_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_DATA_CLI, - IP_VS_SCTP_EVE_SHUT_COM_CLI, +/* RFC 2960, 3.2 Chunk Field Descriptions */ +static __u8 sctp_events[] = { + [SCTP_CID_DATA] = IP_VS_SCTP_DATA, + [SCTP_CID_INIT] = IP_VS_SCTP_INIT, + [SCTP_CID_INIT_ACK] = IP_VS_SCTP_INIT_ACK, + [SCTP_CID_SACK] = IP_VS_SCTP_DATA, + [SCTP_CID_HEARTBEAT] = IP_VS_SCTP_DATA, + [SCTP_CID_HEARTBEAT_ACK] = IP_VS_SCTP_DATA, + [SCTP_CID_ABORT] = IP_VS_SCTP_ABORT, + [SCTP_CID_SHUTDOWN] = IP_VS_SCTP_SHUTDOWN, + [SCTP_CID_SHUTDOWN_ACK] = IP_VS_SCTP_SHUTDOWN_ACK, + [SCTP_CID_ERROR] = IP_VS_SCTP_ERROR, + [SCTP_CID_COOKIE_ECHO] = IP_VS_SCTP_COOKIE_ECHO, + [SCTP_CID_COOKIE_ACK] = IP_VS_SCTP_COOKIE_ACK, + [SCTP_CID_ECN_ECNE] = IP_VS_SCTP_DATA, + [SCTP_CID_ECN_CWR] = IP_VS_SCTP_DATA, + [SCTP_CID_SHUTDOWN_COMPLETE] = IP_VS_SCTP_SHUTDOWN_COMPLETE, }; -static struct ipvs_sctp_nextstate - sctp_states_table[IP_VS_SCTP_S_LAST][IP_VS_SCTP_EVE_LAST] = { - /* - * STATE : IP_VS_SCTP_S_NONE - */ - /*next state *//*event */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ }, - }, - /* - * STATE : IP_VS_SCTP_S_INIT_CLI - * Cient sent INIT and is waiting for reply from server(In ECHO_WAIT) - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ECHO_CLI */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_ECHO_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_INIT_SER - * Server sent INIT and waiting for INIT ACK from the client - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_INIT_ACK_CLI - * Client sent INIT ACK and waiting for ECHO from the server - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK has been resent by the client, let us stay is in - * the same state - */ - {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - /* - * INIT_ACK sent by the server, close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * ECHO by client, it should not happen, close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - /* - * ECHO by server, this is what we are expecting, move to ECHO_SER - */ - {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, it should not happen, close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - /* - * Unexpected COOKIE ACK from server, staty in the same state - */ - {IP_VS_SCTP_S_INIT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_INIT_ACK_SER - * Server sent INIT ACK and waiting for ECHO from the client - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * Unexpected INIT_ACK by the client, let us close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - /* - * INIT_ACK resent by the server, let us move to same state - */ - {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client send the ECHO, this is what we are expecting, - * move to ECHO_CLI - */ - {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - /* - * ECHO received from the server, Not sure what to do, - * let us close it - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, let us stay in the same state - */ - {IP_VS_SCTP_S_INIT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - /* - * COOKIE ACK from server, hmm... this should not happen, lets close - * the connection. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_ECHO_CLI - * Cient sent ECHO and waiting COOKEI ACK from the Server - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK has been by the client, let us close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * “If an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunk”. Stay in the same state - */ - {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client resent the ECHO, let us stay in the same state - */ - {IP_VS_SCTP_S_ECHO_CLI /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - /* - * ECHO received from the server, Not sure what to do, - * let us close it - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, this shoud not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - /* - * COOKIE ACK from server, this is what we are awaiting,lets move to - * ESTABLISHED. - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_ECHO_SER - * Server sent ECHO and waiting COOKEI ACK from the client - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * “If an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunk”. Stay in the same state - */ - {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - /* - * INIT_ACK has been by the server, let us close the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent the ECHO, not sure what to do, let's close the - * connection. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - /* - * ECHO resent by the server, stay in the same state - */ - {IP_VS_SCTP_S_ECHO_SER /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, this is what we are expecting, let's move - * to ESTABLISHED. - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - /* - * COOKIE ACK from server, this should not happen, lets close the - * connection. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_ESTABLISHED - * Association established - */ - {{IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * “If an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunk”. Stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the - * peer and peer shall move to the ESTABISHED. if it doesn't handle - * it will send ERROR chunk. So, stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, not sure what to do stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - /* - * SHUTDOWN from the client, move to SHUDDOWN_CLI - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - /* - * SHUTDOWN from the server, move to SHUTDOWN_SER - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, - /* - * client sent SHUDTDOWN_ACK, this should not happen, let's close - * the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_SHUT_CLI - * SHUTDOWN sent from the client, waitinf for SHUT ACK from the server - */ - /* - * We received the data chuck, keep the state unchanged. I assume - * that still data chuncks can be received by both the peers in - * SHUDOWN state - */ - - {{IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * “If an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunk”. Stay in the same state - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the - * peer and peer shall move to the ESTABISHED. if it doesn't handle - * it will send ERROR chunk. So, stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, not sure what to do stay in the same state - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - /* - * SHUTDOWN resent from the client, move to SHUDDOWN_CLI - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - /* - * SHUTDOWN from the server, move to SHUTDOWN_SER - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, - /* - * client sent SHUDTDOWN_ACK, this should not happen, let's close - * the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - /* - * Server sent SHUTDOWN ACK, this is what we are expecting, let's move - * to SHUDOWN_ACK_SER - */ - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - /* - * SHUTDOWN COM from client, this should not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_SHUT_SER - * SHUTDOWN sent from the server, waitinf for SHUTDOWN ACK from client - */ - /* - * We received the data chuck, keep the state unchanged. I assume - * that still data chuncks can be received by both the peers in - * SHUDOWN state - */ - - {{IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * “If an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunk”. Stay in the same state - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the - * peer and peer shall move to the ESTABISHED. if it doesn't handle - * it will send ERROR chunk. So, stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, not sure what to do stay in the same state - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - /* - * SHUTDOWN resent from the client, move to SHUDDOWN_CLI - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - /* - * SHUTDOWN resent from the server, move to SHUTDOWN_SER - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, - /* - * client sent SHUDTDOWN_ACK, this is what we are expecting, let's - * move to SHUT_ACK_CLI - */ - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - /* - * Server sent SHUTDOWN ACK, this should not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - /* - * SHUTDOWN COM from client, this should not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - - /* - * State : IP_VS_SCTP_S_SHUT_ACK_CLI - * SHUTDOWN ACK from the client, awaiting for SHUTDOWN COM from server - */ - /* - * We received the data chuck, keep the state unchanged. I assume - * that still data chuncks can be received by both the peers in - * SHUDOWN state - */ - - {{IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * “If an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunk”. Stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the - * peer and peer shall move to the ESTABISHED. if it doesn't handle - * it will send ERROR chunk. So, stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, not sure what to do stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - /* - * SHUTDOWN sent from the client, move to SHUDDOWN_CLI - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - /* - * SHUTDOWN sent from the server, move to SHUTDOWN_SER - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, - /* - * client resent SHUDTDOWN_ACK, let's stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_CLI /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - /* - * Server sent SHUTDOWN ACK, this should not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - /* - * SHUTDOWN COM from client, this should not happen, let's close the - * connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - /* - * SHUTDOWN COMPLETE from server this is what we are expecting. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - - /* - * State : IP_VS_SCTP_S_SHUT_ACK_SER - * SHUTDOWN ACK from the server, awaiting for SHUTDOWN COM from client - */ - /* - * We received the data chuck, keep the state unchanged. I assume - * that still data chuncks can be received by both the peers in - * SHUDOWN state - */ +/* SCTP States: + * See RFC 2960, 4. SCTP Association State Diagram + * + * New states (not in diagram): + * - INIT1 state: use shorter timeout for dropped INIT packets + * - REJECTED state: use shorter timeout if INIT is rejected with ABORT + * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging + * + * The states are as seen in real server. In the diagram, INIT1, INIT, + * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state. + * + * States as per packets from client (C) and server (S): + * + * Setup of client connection: + * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK + * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK + * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO + * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK + * + * Setup of server connection: + * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK + * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO + * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK + */ - {{IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_DATA_SER */ }, - /* - * We have got an INIT from client. From the spec.“Upon receipt of - * an INIT in the COOKIE-WAIT state, an endpoint MUST respond with - * an INIT ACK using the same parameters it sent in its original - * INIT chunk (including its Initiate Tag, unchanged”). - */ - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - /* - * INIT_ACK sent by the server, Unexpected INIT ACK, spec says, - * “If an INIT ACK is received by an endpoint in any state other - * than the COOKIE-WAIT state, the endpoint should discard the - * INIT ACK chunk”. Stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - /* - * Client sent ECHO, Spec(sec 5.2.4) says it may be handled by the - * peer and peer shall move to the ESTABISHED. if it doesn't handle - * it will send ERROR chunk. So, stay in the same state - */ - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_ESTABLISHED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - /* - * COOKIE ACK from client, not sure what to do stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - /* - * SHUTDOWN sent from the client, move to SHUDDOWN_CLI - */ - {IP_VS_SCTP_S_SHUT_CLI /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - /* - * SHUTDOWN sent from the server, move to SHUTDOWN_SER - */ - {IP_VS_SCTP_S_SHUT_SER /* IP_VS_SCTP_EVE_SHUT_SER */ }, - /* - * client sent SHUDTDOWN_ACK, this should not happen let's close - * the connection. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - /* - * Server resent SHUTDOWN ACK, stay in the same state - */ - {IP_VS_SCTP_S_SHUT_ACK_SER /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - /* - * SHUTDOWN COM from client, this what we are expecting, let's close - * the connection - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - /* - * SHUTDOWN COMPLETE from server this should not happen. - */ - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - }, - /* - * State : IP_VS_SCTP_S_CLOSED - */ - {{IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_DATA_SER */ }, - {IP_VS_SCTP_S_INIT_CLI /* IP_VS_SCTP_EVE_INIT_CLI */ }, - {IP_VS_SCTP_S_INIT_SER /* IP_VS_SCTP_EVE_INIT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_INIT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ECHO_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_COOKIE_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_ABORT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_ACK_SER */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_CLI */ }, - {IP_VS_SCTP_S_CLOSED /* IP_VS_SCTP_EVE_SHUT_COM_SER */ } - } +#define sNO IP_VS_SCTP_S_NONE +#define sI1 IP_VS_SCTP_S_INIT1 +#define sIN IP_VS_SCTP_S_INIT +#define sCS IP_VS_SCTP_S_COOKIE_SENT +#define sCR IP_VS_SCTP_S_COOKIE_REPLIED +#define sCW IP_VS_SCTP_S_COOKIE_WAIT +#define sCO IP_VS_SCTP_S_COOKIE +#define sCE IP_VS_SCTP_S_COOKIE_ECHOED +#define sES IP_VS_SCTP_S_ESTABLISHED +#define sSS IP_VS_SCTP_S_SHUTDOWN_SENT +#define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED +#define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT +#define sRJ IP_VS_SCTP_S_REJECTED +#define sCL IP_VS_SCTP_S_CLOSED + +static const __u8 sctp_states + [IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = { + { /* INPUT */ +/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ +/* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* i */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN}, +/* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL}, +/* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL}, +/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL}, +/* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL}, +/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL}, +/* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, + }, + { /* OUTPUT */ +/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ +/* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* i */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW}, +/* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL}, +/* s */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL}, +/* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL}, +/* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* ab */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, + }, + { /* INPUT-ONLY */ +/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/ +/* d */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* i */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN}, +/* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL}, +/* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL}, +/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL}, +/* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL}, +/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL}, +/* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, + }, }; -/* - * Timeout table[state] - */ +#define IP_VS_SCTP_MAX_RTO ((60 + 1) * HZ) + +/* Timeout table[state] */ static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { - [IP_VS_SCTP_S_NONE] = 2 * HZ, - [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ, - [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ, - [IP_VS_SCTP_S_INIT_ACK_CLI] = 1 * 60 * HZ, - [IP_VS_SCTP_S_INIT_ACK_SER] = 1 * 60 * HZ, - [IP_VS_SCTP_S_ECHO_CLI] = 1 * 60 * HZ, - [IP_VS_SCTP_S_ECHO_SER] = 1 * 60 * HZ, - [IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ, - [IP_VS_SCTP_S_SHUT_CLI] = 1 * 60 * HZ, - [IP_VS_SCTP_S_SHUT_SER] = 1 * 60 * HZ, - [IP_VS_SCTP_S_SHUT_ACK_CLI] = 1 * 60 * HZ, - [IP_VS_SCTP_S_SHUT_ACK_SER] = 1 * 60 * HZ, - [IP_VS_SCTP_S_CLOSED] = 10 * HZ, - [IP_VS_SCTP_S_LAST] = 2 * HZ, + [IP_VS_SCTP_S_NONE] = 2 * HZ, + [IP_VS_SCTP_S_INIT1] = (0 + 3 + 1) * HZ, + [IP_VS_SCTP_S_INIT] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_COOKIE_SENT] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_COOKIE_REPLIED] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_COOKIE_WAIT] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_COOKIE] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_COOKIE_ECHOED] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ, + [IP_VS_SCTP_S_SHUTDOWN_SENT] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_REJECTED] = (0 + 3 + 1) * HZ, + [IP_VS_SCTP_S_CLOSED] = IP_VS_SCTP_MAX_RTO, + [IP_VS_SCTP_S_LAST] = 2 * HZ, }; static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = { - [IP_VS_SCTP_S_NONE] = "NONE", - [IP_VS_SCTP_S_INIT_CLI] = "INIT_CLI", - [IP_VS_SCTP_S_INIT_SER] = "INIT_SER", - [IP_VS_SCTP_S_INIT_ACK_CLI] = "INIT_ACK_CLI", - [IP_VS_SCTP_S_INIT_ACK_SER] = "INIT_ACK_SER", - [IP_VS_SCTP_S_ECHO_CLI] = "COOKIE_ECHO_CLI", - [IP_VS_SCTP_S_ECHO_SER] = "COOKIE_ECHO_SER", - [IP_VS_SCTP_S_ESTABLISHED] = "ESTABISHED", - [IP_VS_SCTP_S_SHUT_CLI] = "SHUTDOWN_CLI", - [IP_VS_SCTP_S_SHUT_SER] = "SHUTDOWN_SER", - [IP_VS_SCTP_S_SHUT_ACK_CLI] = "SHUTDOWN_ACK_CLI", - [IP_VS_SCTP_S_SHUT_ACK_SER] = "SHUTDOWN_ACK_SER", - [IP_VS_SCTP_S_CLOSED] = "CLOSED", - [IP_VS_SCTP_S_LAST] = "BUG!" + [IP_VS_SCTP_S_NONE] = "NONE", + [IP_VS_SCTP_S_INIT1] = "INIT1", + [IP_VS_SCTP_S_INIT] = "INIT", + [IP_VS_SCTP_S_COOKIE_SENT] = "C-SENT", + [IP_VS_SCTP_S_COOKIE_REPLIED] = "C-REPLIED", + [IP_VS_SCTP_S_COOKIE_WAIT] = "C-WAIT", + [IP_VS_SCTP_S_COOKIE] = "COOKIE", + [IP_VS_SCTP_S_COOKIE_ECHOED] = "C-ECHOED", + [IP_VS_SCTP_S_ESTABLISHED] = "ESTABLISHED", + [IP_VS_SCTP_S_SHUTDOWN_SENT] = "S-SENT", + [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = "S-RECEIVED", + [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = "S-ACK-SENT", + [IP_VS_SCTP_S_REJECTED] = "REJECTED", + [IP_VS_SCTP_S_CLOSED] = "CLOSED", + [IP_VS_SCTP_S_LAST] = "BUG!", }; @@ -943,17 +394,20 @@ set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, } } - event = sctp_events[chunk_type]; + event = (chunk_type < sizeof(sctp_events)) ? + sctp_events[chunk_type] : IP_VS_SCTP_DATA; - /* - * If the direction is IP_VS_DIR_OUTPUT, this event is from server - */ - if (direction == IP_VS_DIR_OUTPUT) - event++; - /* - * get next state + /* Update direction to INPUT_ONLY if necessary + * or delete NO_OUTPUT flag if output packet detected */ - next_state = sctp_states_table[cp->state][event].next_state; + if (cp->flags & IP_VS_CONN_F_NOOUTPUT) { + if (direction == IP_VS_DIR_OUTPUT) + cp->flags &= ~IP_VS_CONN_F_NOOUTPUT; + else + direction = IP_VS_DIR_INPUT_ONLY; + } + + next_state = sctp_states[direction][event][cp->state]; if (next_state != cp->state) { struct ip_vs_dest *dest = cp->dest; diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 50a15944c6c1..e3a697234a98 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -39,6 +39,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct net *net; struct ip_vs_service *svc; struct tcphdr _tcph, *th; + struct netns_ipvs *ipvs; th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); if (th == NULL) { @@ -46,14 +47,15 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, return 0; } net = skb_net(skb); + ipvs = net_ipvs(net); /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ rcu_read_lock(); - if (th->syn && + if ((th->syn || sysctl_sloppy_tcp(ipvs)) && !th->rst && (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, &iph->daddr, th->dest))) { int ignored; - if (ip_vs_todrop(net_ipvs(net))) { + if (ip_vs_todrop(ipvs)) { /* * It seems that we are very loaded. * We have to drop this packet :( @@ -401,7 +403,7 @@ static struct tcp_states_t tcp_states [] = { /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }}, /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sTW }}, -/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, +/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sSR }}, /* OUTPUT */ @@ -415,7 +417,7 @@ static struct tcp_states_t tcp_states [] = { /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSR }}, /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }}, -/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, +/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, }; @@ -424,7 +426,7 @@ static struct tcp_states_t tcp_states_dos [] = { /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /*syn*/ {{sSR, sES, sES, sSR, sSR, sSR, sSR, sSR, sSR, sSR, sSA }}, /*fin*/ {{sCL, sCW, sSS, sTW, sTW, sTW, sCL, sCW, sLA, sLI, sSA }}, -/*ack*/ {{sCL, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }}, +/*ack*/ {{sES, sES, sSS, sSR, sFW, sTW, sCL, sCW, sCL, sLI, sSA }}, /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, /* OUTPUT */ @@ -438,7 +440,7 @@ static struct tcp_states_t tcp_states_dos [] = { /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ /*syn*/ {{sSA, sES, sES, sSR, sSA, sSA, sSA, sSA, sSA, sSA, sSA }}, /*fin*/ {{sCL, sFW, sSS, sTW, sFW, sTW, sCL, sCW, sLA, sLI, sTW }}, -/*ack*/ {{sCL, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, +/*ack*/ {{sES, sES, sSS, sES, sFW, sTW, sCL, sCW, sCL, sLI, sES }}, /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, }; diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index c35986c793d9..176b87c35e34 100644 --- a/net/netfilter/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -55,7 +55,8 @@ static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest) * Round-Robin Scheduling */ static struct ip_vs_dest * -ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct list_head *p; struct ip_vs_dest *dest, *last; diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index f3205925359a..a5284cc3d882 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -59,7 +59,8 @@ ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) * Weighted Least Connection scheduling */ static struct ip_vs_dest * -ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least; unsigned int loh, doh; diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index a65edfe4b16c..f16c027df15b 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -48,6 +48,10 @@ #include <net/ip_vs.h> +#include <net/tcp.h> +#include <linux/udp.h> +#include <linux/sctp.h> + /* * IPVS SH bucket @@ -71,10 +75,19 @@ struct ip_vs_sh_state { struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE]; }; +/* Helper function to determine if server is unavailable */ +static inline bool is_unavailable(struct ip_vs_dest *dest) +{ + return atomic_read(&dest->weight) <= 0 || + dest->flags & IP_VS_DEST_F_OVERLOAD; +} + /* * Returns hash value for IPVS SH entry */ -static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr) +static inline unsigned int +ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr, + __be16 port, unsigned int offset) { __be32 addr_fold = addr->ip; @@ -83,7 +96,8 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad addr_fold = addr->ip6[0]^addr->ip6[1]^ addr->ip6[2]^addr->ip6[3]; #endif - return (ntohl(addr_fold)*2654435761UL) & IP_VS_SH_TAB_MASK; + return (offset + (ntohs(port) + ntohl(addr_fold))*2654435761UL) & + IP_VS_SH_TAB_MASK; } @@ -91,12 +105,42 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad * Get ip_vs_dest associated with supplied parameters. */ static inline struct ip_vs_dest * -ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr) +ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s, + const union nf_inet_addr *addr, __be16 port) { - return rcu_dereference(s->buckets[ip_vs_sh_hashkey(af, addr)].dest); + unsigned int hash = ip_vs_sh_hashkey(svc->af, addr, port, 0); + struct ip_vs_dest *dest = rcu_dereference(s->buckets[hash].dest); + + return (!dest || is_unavailable(dest)) ? NULL : dest; } +/* As ip_vs_sh_get, but with fallback if selected server is unavailable */ +static inline struct ip_vs_dest * +ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s, + const union nf_inet_addr *addr, __be16 port) +{ + unsigned int offset; + unsigned int hash; + struct ip_vs_dest *dest; + + for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) { + hash = ip_vs_sh_hashkey(svc->af, addr, port, offset); + dest = rcu_dereference(s->buckets[hash].dest); + if (!dest) + break; + if (is_unavailable(dest)) + IP_VS_DBG_BUF(6, "SH: selected unavailable server " + "%s:%d (offset %d)", + IP_VS_DBG_ADDR(svc->af, &dest->addr), + ntohs(dest->port), offset); + else + return dest; + } + + return NULL; +} + /* * Assign all the hash buckets of the specified table with the service. */ @@ -213,13 +257,33 @@ static int ip_vs_sh_dest_changed(struct ip_vs_service *svc, } -/* - * If the dest flags is set with IP_VS_DEST_F_OVERLOAD, - * consider that the server is overloaded here. - */ -static inline int is_overloaded(struct ip_vs_dest *dest) +/* Helper function to get port number */ +static inline __be16 +ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph) { - return dest->flags & IP_VS_DEST_F_OVERLOAD; + __be16 port; + struct tcphdr _tcph, *th; + struct udphdr _udph, *uh; + sctp_sctphdr_t _sctph, *sh; + + switch (iph->protocol) { + case IPPROTO_TCP: + th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph); + port = th->source; + break; + case IPPROTO_UDP: + uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph); + port = uh->source; + break; + case IPPROTO_SCTP: + sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); + port = sh->source; + break; + default: + port = 0; + } + + return port; } @@ -227,28 +291,32 @@ static inline int is_overloaded(struct ip_vs_dest *dest) * Source Hashing scheduling */ static struct ip_vs_dest * -ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest; struct ip_vs_sh_state *s; - struct ip_vs_iphdr iph; - - ip_vs_fill_iph_addr_only(svc->af, skb, &iph); + __be16 port = 0; IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); + if (svc->flags & IP_VS_SVC_F_SCHED_SH_PORT) + port = ip_vs_sh_get_port(skb, iph); + s = (struct ip_vs_sh_state *) svc->sched_data; - dest = ip_vs_sh_get(svc->af, s, &iph.saddr); - if (!dest - || !(dest->flags & IP_VS_DEST_F_AVAILABLE) - || atomic_read(&dest->weight) <= 0 - || is_overloaded(dest)) { + + if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK) + dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port); + else + dest = ip_vs_sh_get(svc, s, &iph->saddr, port); + + if (!dest) { ip_vs_scheduler_err(svc, "no destination available"); return NULL; } IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n", - IP_VS_DBG_ADDR(svc->af, &iph.saddr), + IP_VS_DBG_ADDR(svc->af, &iph->saddr), IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index f6046d9af8d3..f4484719f3e6 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -425,6 +425,16 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs) return sb; } +/* Check if connection is controlled by persistence */ +static inline bool in_persistence(struct ip_vs_conn *cp) +{ + for (cp = cp->control; cp; cp = cp->control) { + if (cp->flags & IP_VS_CONN_F_TEMPLATE) + return true; + } + return false; +} + /* Check if conn should be synced. * pkts: conn packets, use sysctl_sync_threshold to avoid packet check * - (1) sync_refresh_period: reduce sync rate. Additionally, retry @@ -447,6 +457,8 @@ static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs, /* Check if we sync in current state */ if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE)) force = 0; + else if (unlikely(sysctl_sync_persist_mode(ipvs) && in_persistence(cp))) + return 0; else if (likely(cp->protocol == IPPROTO_TCP)) { if (!((1 << cp->state) & ((1 << IP_VS_TCP_S_ESTABLISHED) | @@ -461,9 +473,10 @@ static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs, } else if (unlikely(cp->protocol == IPPROTO_SCTP)) { if (!((1 << cp->state) & ((1 << IP_VS_SCTP_S_ESTABLISHED) | - (1 << IP_VS_SCTP_S_CLOSED) | - (1 << IP_VS_SCTP_S_SHUT_ACK_CLI) | - (1 << IP_VS_SCTP_S_SHUT_ACK_SER)))) + (1 << IP_VS_SCTP_S_SHUTDOWN_SENT) | + (1 << IP_VS_SCTP_S_SHUTDOWN_RECEIVED) | + (1 << IP_VS_SCTP_S_SHUTDOWN_ACK_SENT) | + (1 << IP_VS_SCTP_S_CLOSED)))) return 0; force = cp->state != cp->old_state; if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED) diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index c60a81c4ce9a..6dc1fa128840 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -31,7 +31,8 @@ * Weighted Least Connection scheduling */ static struct ip_vs_dest * -ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *least; unsigned int loh, doh; diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 0e68555bceb9..0546cd572d6b 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -162,7 +162,8 @@ static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc, * Weighted Round-Robin Scheduling */ static struct ip_vs_dest * -ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, + struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *last, *stop = NULL; struct ip_vs_wrr_mark *mark = svc->sched_data; diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index c63b618cd619..4fd1ca94fd4a 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -293,6 +293,11 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, sizeof(exp->tuple.dst.u3) - len); exp->tuple.dst.u.all = *dst; + +#ifdef CONFIG_NF_NAT_NEEDED + memset(&exp->saved_addr, 0, sizeof(exp->saved_addr)); + memset(&exp->saved_proto, 0, sizeof(exp->saved_proto)); +#endif } EXPORT_SYMBOL_GPL(nf_ct_expect_init); diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 6b217074237b..b8a0924064ef 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -55,10 +55,14 @@ unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb, struct nf_conntrack_expect *exp); EXPORT_SYMBOL_GPL(nf_nat_ftp_hook); -static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, char); -static int try_eprt(const char *, size_t, struct nf_conntrack_man *, char); +static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, + char, unsigned int *); +static int try_rfc1123(const char *, size_t, struct nf_conntrack_man *, + char, unsigned int *); +static int try_eprt(const char *, size_t, struct nf_conntrack_man *, + char, unsigned int *); static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *, - char); + char, unsigned int *); static struct ftp_search { const char *pattern; @@ -66,7 +70,7 @@ static struct ftp_search { char skip; char term; enum nf_ct_ftp_type ftptype; - int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char); + int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char, unsigned int *); } search[IP_CT_DIR_MAX][2] = { [IP_CT_DIR_ORIGINAL] = { { @@ -90,10 +94,8 @@ static struct ftp_search { { .pattern = "227 ", .plen = sizeof("227 ") - 1, - .skip = '(', - .term = ')', .ftptype = NF_CT_FTP_PASV, - .getnum = try_rfc959, + .getnum = try_rfc1123, }, { .pattern = "229 ", @@ -132,8 +134,9 @@ static int try_number(const char *data, size_t dlen, u_int32_t array[], i++; else { /* Unexpected character; true if it's the - terminator and we're finished. */ - if (*data == term && i == array_size - 1) + terminator (or we don't care about one) + and we're finished. */ + if ((*data == term || !term) && i == array_size - 1) return len; pr_debug("Char %u (got %u nums) `%u' unexpected\n", @@ -148,7 +151,8 @@ static int try_number(const char *data, size_t dlen, u_int32_t array[], /* Returns 0, or length of numbers: 192,168,1,1,5,6 */ static int try_rfc959(const char *data, size_t dlen, - struct nf_conntrack_man *cmd, char term) + struct nf_conntrack_man *cmd, char term, + unsigned int *offset) { int length; u_int32_t array[6]; @@ -163,6 +167,33 @@ static int try_rfc959(const char *data, size_t dlen, return length; } +/* + * From RFC 1123: + * The format of the 227 reply to a PASV command is not + * well standardized. In particular, an FTP client cannot + * assume that the parentheses shown on page 40 of RFC-959 + * will be present (and in fact, Figure 3 on page 43 omits + * them). Therefore, a User-FTP program that interprets + * the PASV reply must scan the reply for the first digit + * of the host and port numbers. + */ +static int try_rfc1123(const char *data, size_t dlen, + struct nf_conntrack_man *cmd, char term, + unsigned int *offset) +{ + int i; + for (i = 0; i < dlen; i++) + if (isdigit(data[i])) + break; + + if (i == dlen) + return 0; + + *offset += i; + + return try_rfc959(data + i, dlen - i, cmd, 0, offset); +} + /* Grab port: number up to delimiter */ static int get_port(const char *data, int start, size_t dlen, char delim, __be16 *port) @@ -191,7 +222,7 @@ static int get_port(const char *data, int start, size_t dlen, char delim, /* Returns 0, or length of numbers: |1|132.235.1.2|6275| or |2|3ffe::1|6275| */ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd, - char term) + char term, unsigned int *offset) { char delim; int length; @@ -239,7 +270,8 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd, /* Returns 0, or length of numbers: |||6446| */ static int try_epsv_response(const char *data, size_t dlen, - struct nf_conntrack_man *cmd, char term) + struct nf_conntrack_man *cmd, char term, + unsigned int *offset) { char delim; @@ -261,9 +293,10 @@ static int find_pattern(const char *data, size_t dlen, unsigned int *numlen, struct nf_conntrack_man *cmd, int (*getnum)(const char *, size_t, - struct nf_conntrack_man *, char)) + struct nf_conntrack_man *, char, + unsigned int *)) { - size_t i; + size_t i = plen; pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen); if (dlen == 0) @@ -293,16 +326,18 @@ static int find_pattern(const char *data, size_t dlen, pr_debug("Pattern matches!\n"); /* Now we've found the constant string, try to skip to the 'skip' character */ - for (i = plen; data[i] != skip; i++) - if (i == dlen - 1) return -1; + if (skip) { + for (i = plen; data[i] != skip; i++) + if (i == dlen - 1) return -1; - /* Skip over the last character */ - i++; + /* Skip over the last character */ + i++; + } pr_debug("Skipped up to `%c'!\n", skip); *numoff = i; - *numlen = getnum(data + i, dlen - i, cmd, term); + *numlen = getnum(data + i, dlen - i, cmd, term, numoff); if (!*numlen) return -1; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index ecf065f94032..edc410e778f7 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -828,7 +828,9 @@ ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nf_conntrack_tuple *tuple) struct nf_conntrack_l3proto *l3proto; int ret = 0; - nla_parse_nested(tb, CTA_IP_MAX, attr, NULL); + ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL); + if (ret < 0) + return ret; rcu_read_lock(); l3proto = __nf_ct_l3proto_find(tuple->src.l3num); @@ -895,7 +897,9 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[], memset(tuple, 0, sizeof(*tuple)); - nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy); + err = nla_parse_nested(tb, CTA_TUPLE_MAX, cda[type], tuple_nla_policy); + if (err < 0) + return err; if (!tb[CTA_TUPLE_IP]) return -EINVAL; @@ -946,9 +950,12 @@ static inline int ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, struct nlattr **helpinfo) { + int err; struct nlattr *tb[CTA_HELP_MAX+1]; - nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy); + err = nla_parse_nested(tb, CTA_HELP_MAX, attr, help_nla_policy); + if (err < 0) + return err; if (!tb[CTA_HELP_NAME]) return -EINVAL; @@ -1431,7 +1438,9 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[] struct nf_conntrack_l4proto *l4proto; int err = 0; - nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy); + err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy); + if (err < 0) + return err; rcu_read_lock(); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); @@ -1452,9 +1461,12 @@ static const struct nla_policy nat_seq_policy[CTA_NAT_SEQ_MAX+1] = { static inline int change_nat_seq_adj(struct nf_nat_seq *natseq, const struct nlattr * const attr) { + int err; struct nlattr *cda[CTA_NAT_SEQ_MAX+1]; - nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, nat_seq_policy); + err = nla_parse_nested(cda, CTA_NAT_SEQ_MAX, attr, nat_seq_policy); + if (err < 0) + return err; if (!cda[CTA_NAT_SEQ_CORRECTION_POS]) return -EINVAL; @@ -2116,7 +2128,9 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct) struct nlattr *cda[CTA_MAX+1]; int ret; - nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy); + ret = nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy); + if (ret < 0) + return ret; spin_lock_bh(&nf_conntrack_lock); ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct); @@ -2711,7 +2725,9 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr, struct nf_conntrack_tuple nat_tuple = {}; int err; - nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, exp_nat_nla_policy); + err = nla_parse_nested(tb, CTA_EXPECT_NAT_MAX, attr, exp_nat_nla_policy); + if (err < 0) + return err; if (!tb[CTA_EXPECT_NAT_DIR] || !tb[CTA_EXPECT_NAT_TUPLE]) return -EINVAL; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 4d4d8f1d01fc..7dcc376eea5f 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1043,6 +1043,12 @@ static int tcp_packet(struct nf_conn *ct, nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } + /* ESTABLISHED without SEEN_REPLY, i.e. mid-connection + * pickup with loose=1. Avoid large ESTABLISHED timeout. + */ + if (new_state == TCP_CONNTRACK_ESTABLISHED && + timeout > timeouts[TCP_CONNTRACK_UNACK]) + timeout = timeouts[TCP_CONNTRACK_UNACK]; } else if (!test_bit(IPS_ASSURED_BIT, &ct->status) && (old_state == TCP_CONNTRACK_SYN_RECV || old_state == TCP_CONNTRACK_ESTABLISHED) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index bd700b4013c1..f641751dba9d 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -408,7 +408,7 @@ static int log_invalid_proto_max = 255; static struct ctl_table_header *nf_ct_netfilter_header; -static ctl_table nf_ct_sysctl_table[] = { +static struct ctl_table nf_ct_sysctl_table[] = { { .procname = "nf_conntrack_max", .data = &nf_conntrack_max, @@ -458,7 +458,7 @@ static ctl_table nf_ct_sysctl_table[] = { #define NET_NF_CONNTRACK_MAX 2089 -static ctl_table nf_ct_netfilter_table[] = { +static struct ctl_table nf_ct_netfilter_table[] = { { .procname = "nf_conntrack_max", .data = &nf_conntrack_max, diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 3b18dd1be7d9..85296d4eac0e 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -245,7 +245,7 @@ static const struct file_operations nflog_file_ops = { static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; -static int nf_log_proc_dostring(ctl_table *table, int write, +static int nf_log_proc_dostring(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { const struct nf_logger *logger; @@ -369,9 +369,7 @@ static int __net_init nf_log_net_init(struct net *net) out_sysctl: #ifdef CONFIG_PROC_FS - /* For init_net: errors will trigger panic, don't unroll on error. */ - if (!net_eq(net, &init_net)) - remove_proc_entry("nf_log", net->nf.proc_netfilter); + remove_proc_entry("nf_log", net->nf.proc_netfilter); #endif return ret; } diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index 5fea563afe30..85e20a919081 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -104,7 +104,7 @@ static void mangle_contents(struct sk_buff *skb, /* move post-replacement */ memmove(data + match_offset + rep_len, data + match_offset + match_len, - skb->tail - (skb->network_header + dataoff + + skb_tail_pointer(skb) - (skb_network_header(skb) + dataoff + match_offset + match_len)); /* insert data from buffer */ diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index a191b6db657e..9e287cb56a04 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -67,9 +67,12 @@ static int nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, const struct nlattr *attr) { + int err; struct nlattr *tb[NFCTH_TUPLE_MAX+1]; - nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol); + err = nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol); + if (err < 0) + return err; if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) return -EINVAL; @@ -121,9 +124,12 @@ static int nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, const struct nlattr *attr) { + int err; struct nlattr *tb[NFCTH_POLICY_MAX+1]; - nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol); + err = nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol); + if (err < 0) + return err; if (!tb[NFCTH_POLICY_NAME] || !tb[NFCTH_POLICY_EXPECT_MAX] || @@ -153,8 +159,10 @@ nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, struct nf_conntrack_expect_policy *expect_policy; struct nlattr *tb[NFCTH_POLICY_SET_MAX+1]; - nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, - nfnl_cthelper_expect_policy_set); + ret = nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, + nfnl_cthelper_expect_policy_set); + if (ret < 0) + return ret; if (!tb[NFCTH_POLICY_SET_NUM]) return -EINVAL; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 65074dfb9383..50580494148d 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -59,8 +59,10 @@ ctnl_timeout_parse_policy(struct ctnl_timeout *timeout, if (likely(l4proto->ctnl_timeout.nlattr_to_obj)) { struct nlattr *tb[l4proto->ctnl_timeout.nlattr_max+1]; - nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, - attr, l4proto->ctnl_timeout.nla_policy); + ret = nla_parse_nested(tb, l4proto->ctnl_timeout.nlattr_max, + attr, l4proto->ctnl_timeout.nla_policy); + if (ret < 0) + return ret; ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, &timeout->data); diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 5352b2d2d5bf..971ea145ab3e 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -41,6 +41,14 @@ #define NFQNL_QMAX_DEFAULT 1024 +/* We're using struct nlattr which has 16bit nla_len. Note that nla_len + * includes the header length. Thus, the maximum packet length that we + * support is 65531 bytes. We send truncated packets if the specified length + * is larger than that. Userspace can check for presence of NFQA_CAP_LEN + * attribute to detect truncation. + */ +#define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN) + struct nfqnl_instance { struct hlist_node hlist; /* global list of queues */ struct rcu_head rcu; @@ -122,7 +130,7 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, inst->queue_num = queue_num; inst->peer_portid = portid; inst->queue_maxlen = NFQNL_QMAX_DEFAULT; - inst->copy_range = 0xffff; + inst->copy_range = NFQNL_MAX_COPY_RANGE; inst->copy_mode = NFQNL_COPY_NONE; spin_lock_init(&inst->lock); INIT_LIST_HEAD(&inst->queue_list); @@ -272,12 +280,17 @@ nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) skb_shinfo(to)->nr_frags = j; } -static int nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet) +static int +nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet, + bool csum_verify) { __u32 flags = 0; if (packet->ip_summed == CHECKSUM_PARTIAL) flags = NFQA_SKB_CSUMNOTREADY; + else if (csum_verify) + flags = NFQA_SKB_CSUM_NOTVERIFIED; + if (skb_is_gso(packet)) flags |= NFQA_SKB_GSO; @@ -302,6 +315,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, struct net_device *outdev; struct nf_conn *ct = NULL; enum ip_conntrack_info uninitialized_var(ctinfo); + bool csum_verify; size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) @@ -319,6 +333,12 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (entskb->tstamp.tv64) size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp)); + if (entry->hook <= NF_INET_FORWARD || + (entry->hook == NF_INET_POST_ROUTING && entskb->sk == NULL)) + csum_verify = !skb_csum_unnecessary(entskb); + else + csum_verify = false; + outdev = entry->outdev; switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) { @@ -333,10 +353,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, return NULL; data_len = ACCESS_ONCE(queue->copy_range); - if (data_len == 0 || data_len > entskb->len) + if (data_len > entskb->len) data_len = entskb->len; - if (!entskb->head_frag || skb_headlen(entskb) < L1_CACHE_BYTES || skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS) @@ -465,10 +484,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) goto nla_put_failure; - if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) + if (cap_len > data_len && + nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) goto nla_put_failure; - if (nfqnl_put_packet_info(skb, entskb)) + if (nfqnl_put_packet_info(skb, entskb, csum_verify)) goto nla_put_failure; if (data_len) { @@ -509,10 +529,6 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, } spin_lock_bh(&queue->lock); - if (!queue->peer_portid) { - err = -EINVAL; - goto err_out_free_nskb; - } if (queue->queue_total >= queue->queue_maxlen) { if (queue->flags & NFQA_CFG_F_FAIL_OPEN) { failopen = 1; @@ -731,13 +747,8 @@ nfqnl_set_mode(struct nfqnl_instance *queue, case NFQNL_COPY_PACKET: queue->copy_mode = mode; - /* We're using struct nlattr which has 16bit nla_len. Note that - * nla_len includes the header length. Thus, the maximum packet - * length that we support is 65531 bytes. We send truncated - * packets if the specified length is larger than that. - */ - if (range > 0xffff - NLA_HDRLEN) - queue->copy_range = 0xffff - NLA_HDRLEN; + if (range == 0 || range > NFQNL_MAX_COPY_RANGE) + queue->copy_range = NFQNL_MAX_COPY_RANGE; else queue->copy_range = range; break; @@ -800,7 +811,7 @@ static int nfqnl_rcv_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); /* Drop any packets associated with the downed device */ if (event == NETDEV_DOWN) diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index a60261cb0e80..da35ac06a975 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -26,6 +26,9 @@ static inline int xt_ct_target(struct sk_buff *skb, struct nf_conn *ct) if (skb->nfct != NULL) return XT_CONTINUE; + /* special case the untracked ct : we want the percpu object */ + if (!ct) + ct = nf_ct_untracked_get(); atomic_inc(&ct->ct_general.use); skb->nfct = &ct->ct_general; skb->nfctinfo = IP_CT_NEW; @@ -186,8 +189,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, int ret = -EOPNOTSUPP; if (info->flags & XT_CT_NOTRACK) { - ct = nf_ct_untracked_get(); - atomic_inc(&ct->ct_general.use); + ct = NULL; goto out; } @@ -311,7 +313,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, struct nf_conn *ct = info->ct; struct nf_conn_help *help; - if (!nf_ct_is_untracked(ct)) { + if (ct && !nf_ct_is_untracked(ct)) { help = nfct_help(ct); if (help) module_put(help->helper->me); @@ -319,8 +321,8 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, nf_ct_l3proto_module_put(par->family); xt_ct_destroy_timeout(ct); + nf_ct_put(info->ct); } - nf_ct_put(info->ct); } static void xt_ct_tg_destroy_v0(const struct xt_tgdtor_param *par) diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index bd93e51d30ac..292934d23482 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -200,7 +200,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) static int tee_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct xt_tee_priv *priv; priv = container_of(this, struct xt_tee_priv, notifier); diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index ed0db15ab00e..7720b036d76a 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -18,7 +18,7 @@ static bool xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rateest_match_info *info = par->matchinfo; - struct gnet_stats_rate_est *r; + struct gnet_stats_rate_est64 *r; u_int32_t bps1, bps2, pps1, pps2; bool ret = true; diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 63b2bdb59e95..20b15916f403 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -107,7 +107,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, { const struct iphdr *iph = ip_hdr(skb); struct udphdr _hdr, *hp = NULL; - struct sock *sk; + struct sock *sk = skb->sk; __be32 uninitialized_var(daddr), uninitialized_var(saddr); __be16 uninitialized_var(dport), uninitialized_var(sport); u8 uninitialized_var(protocol); @@ -155,25 +155,31 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, } #endif - sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, - saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY); - if (sk != NULL) { + if (!sk) + sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, + saddr, daddr, sport, dport, + par->in, NFT_LOOKUP_ANY); + if (sk) { bool wildcard; bool transparent = true; - /* Ignore sockets listening on INADDR_ANY */ - wildcard = (sk->sk_state != TCP_TIME_WAIT && + /* Ignore sockets listening on INADDR_ANY, + * unless XT_SOCKET_NOWILDCARD is set + */ + wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && + sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->inet_rcv_saddr == 0); /* Ignore non-transparent sockets, if XT_SOCKET_TRANSPARENT is used */ - if (info && info->flags & XT_SOCKET_TRANSPARENT) + if (info->flags & XT_SOCKET_TRANSPARENT) transparent = ((sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->transparent) || (sk->sk_state == TCP_TIME_WAIT && inet_twsk(sk)->tw_transparent)); - xt_socket_put_sk(sk); + if (sk != skb->sk) + xt_socket_put_sk(sk); if (wildcard || !transparent) sk = NULL; @@ -190,11 +196,15 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, static bool socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par) { - return socket_match(skb, par, NULL); + static struct xt_socket_mtinfo1 xt_info_v0 = { + .flags = 0, + }; + + return socket_match(skb, par, &xt_info_v0); } static bool -socket_mt4_v1(const struct sk_buff *skb, struct xt_action_param *par) +socket_mt4_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) { return socket_match(skb, par, par->matchinfo); } @@ -256,11 +266,11 @@ extract_icmp6_fields(const struct sk_buff *skb, } static bool -socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) +socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) { struct ipv6hdr *iph = ipv6_hdr(skb); struct udphdr _hdr, *hp = NULL; - struct sock *sk; + struct sock *sk = skb->sk; struct in6_addr *daddr = NULL, *saddr = NULL; __be16 uninitialized_var(dport), uninitialized_var(sport); int thoff = 0, uninitialized_var(tproto); @@ -291,25 +301,31 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) return false; } - sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, - saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY); - if (sk != NULL) { + if (!sk) + sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto, + saddr, daddr, sport, dport, + par->in, NFT_LOOKUP_ANY); + if (sk) { bool wildcard; bool transparent = true; - /* Ignore sockets listening on INADDR_ANY */ - wildcard = (sk->sk_state != TCP_TIME_WAIT && + /* Ignore sockets listening on INADDR_ANY + * unless XT_SOCKET_NOWILDCARD is set + */ + wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && + sk->sk_state != TCP_TIME_WAIT && ipv6_addr_any(&inet6_sk(sk)->rcv_saddr)); /* Ignore non-transparent sockets, if XT_SOCKET_TRANSPARENT is used */ - if (info && info->flags & XT_SOCKET_TRANSPARENT) + if (info->flags & XT_SOCKET_TRANSPARENT) transparent = ((sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->transparent) || (sk->sk_state == TCP_TIME_WAIT && inet_twsk(sk)->tw_transparent)); - xt_socket_put_sk(sk); + if (sk != skb->sk) + xt_socket_put_sk(sk); if (wildcard || !transparent) sk = NULL; @@ -325,6 +341,28 @@ socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par) } #endif +static int socket_mt_v1_check(const struct xt_mtchk_param *par) +{ + const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; + + if (info->flags & ~XT_SOCKET_FLAGS_V1) { + pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1); + return -EINVAL; + } + return 0; +} + +static int socket_mt_v2_check(const struct xt_mtchk_param *par) +{ + const struct xt_socket_mtinfo2 *info = (struct xt_socket_mtinfo2 *) par->matchinfo; + + if (info->flags & ~XT_SOCKET_FLAGS_V2) { + pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2); + return -EINVAL; + } + return 0; +} + static struct xt_match socket_mt_reg[] __read_mostly = { { .name = "socket", @@ -339,7 +377,8 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .name = "socket", .revision = 1, .family = NFPROTO_IPV4, - .match = socket_mt4_v1, + .match = socket_mt4_v1_v2, + .checkentry = socket_mt_v1_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN), @@ -350,7 +389,32 @@ static struct xt_match socket_mt_reg[] __read_mostly = { .name = "socket", .revision = 1, .family = NFPROTO_IPV6, - .match = socket_mt6_v1, + .match = socket_mt6_v1_v2, + .checkentry = socket_mt_v1_check, + .matchsize = sizeof(struct xt_socket_mtinfo1), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + }, +#endif + { + .name = "socket", + .revision = 2, + .family = NFPROTO_IPV4, + .match = socket_mt4_v1_v2, + .checkentry = socket_mt_v2_check, + .matchsize = sizeof(struct xt_socket_mtinfo1), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + }, +#ifdef XT_SOCKET_HAVE_IPV6 + { + .name = "socket", + .revision = 2, + .family = NFPROTO_IPV6, + .match = socket_mt6_v1_v2, + .checkentry = socket_mt_v2_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN), diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index c15042f987bd..a1100640495d 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -691,8 +691,8 @@ static int netlbl_cipsov4_remove_cb(struct netlbl_dom_map *entry, void *arg) { struct netlbl_domhsh_walk_arg *cb_arg = arg; - if (entry->type == NETLBL_NLTYPE_CIPSOV4 && - entry->type_def.cipsov4->doi == cb_arg->doi) + if (entry->def.type == NETLBL_NLTYPE_CIPSOV4 && + entry->def.cipso->doi == cb_arg->doi) return netlbl_domhsh_remove_entry(entry, cb_arg->audit_info); return 0; diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 6bb1d42f0fac..85d842e6e431 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -84,15 +84,15 @@ static void netlbl_domhsh_free_entry(struct rcu_head *entry) #endif /* IPv6 */ ptr = container_of(entry, struct netlbl_dom_map, rcu); - if (ptr->type == NETLBL_NLTYPE_ADDRSELECT) { + if (ptr->def.type == NETLBL_NLTYPE_ADDRSELECT) { netlbl_af4list_foreach_safe(iter4, tmp4, - &ptr->type_def.addrsel->list4) { + &ptr->def.addrsel->list4) { netlbl_af4list_remove_entry(iter4); kfree(netlbl_domhsh_addr4_entry(iter4)); } #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_safe(iter6, tmp6, - &ptr->type_def.addrsel->list6) { + &ptr->def.addrsel->list6) { netlbl_af6list_remove_entry(iter6); kfree(netlbl_domhsh_addr6_entry(iter6)); } @@ -213,21 +213,21 @@ static void netlbl_domhsh_audit_add(struct netlbl_dom_map *entry, if (addr4 != NULL) { struct netlbl_domaddr4_map *map4; map4 = netlbl_domhsh_addr4_entry(addr4); - type = map4->type; - cipsov4 = map4->type_def.cipsov4; + type = map4->def.type; + cipsov4 = map4->def.cipso; netlbl_af4list_audit_addr(audit_buf, 0, NULL, addr4->addr, addr4->mask); #if IS_ENABLED(CONFIG_IPV6) } else if (addr6 != NULL) { struct netlbl_domaddr6_map *map6; map6 = netlbl_domhsh_addr6_entry(addr6); - type = map6->type; + type = map6->def.type; netlbl_af6list_audit_addr(audit_buf, 0, NULL, &addr6->addr, &addr6->mask); #endif /* IPv6 */ } else { - type = entry->type; - cipsov4 = entry->type_def.cipsov4; + type = entry->def.type; + cipsov4 = entry->def.cipso; } switch (type) { case NETLBL_NLTYPE_UNLABELED: @@ -265,26 +265,25 @@ static int netlbl_domhsh_validate(const struct netlbl_dom_map *entry) if (entry == NULL) return -EINVAL; - switch (entry->type) { + switch (entry->def.type) { case NETLBL_NLTYPE_UNLABELED: - if (entry->type_def.cipsov4 != NULL || - entry->type_def.addrsel != NULL) + if (entry->def.cipso != NULL || entry->def.addrsel != NULL) return -EINVAL; break; case NETLBL_NLTYPE_CIPSOV4: - if (entry->type_def.cipsov4 == NULL) + if (entry->def.cipso == NULL) return -EINVAL; break; case NETLBL_NLTYPE_ADDRSELECT: - netlbl_af4list_foreach(iter4, &entry->type_def.addrsel->list4) { + netlbl_af4list_foreach(iter4, &entry->def.addrsel->list4) { map4 = netlbl_domhsh_addr4_entry(iter4); - switch (map4->type) { + switch (map4->def.type) { case NETLBL_NLTYPE_UNLABELED: - if (map4->type_def.cipsov4 != NULL) + if (map4->def.cipso != NULL) return -EINVAL; break; case NETLBL_NLTYPE_CIPSOV4: - if (map4->type_def.cipsov4 == NULL) + if (map4->def.cipso == NULL) return -EINVAL; break; default: @@ -292,9 +291,9 @@ static int netlbl_domhsh_validate(const struct netlbl_dom_map *entry) } } #if IS_ENABLED(CONFIG_IPV6) - netlbl_af6list_foreach(iter6, &entry->type_def.addrsel->list6) { + netlbl_af6list_foreach(iter6, &entry->def.addrsel->list6) { map6 = netlbl_domhsh_addr6_entry(iter6); - switch (map6->type) { + switch (map6->def.type) { case NETLBL_NLTYPE_UNLABELED: break; default: @@ -402,32 +401,31 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, rcu_assign_pointer(netlbl_domhsh_def, entry); } - if (entry->type == NETLBL_NLTYPE_ADDRSELECT) { + if (entry->def.type == NETLBL_NLTYPE_ADDRSELECT) { netlbl_af4list_foreach_rcu(iter4, - &entry->type_def.addrsel->list4) + &entry->def.addrsel->list4) netlbl_domhsh_audit_add(entry, iter4, NULL, ret_val, audit_info); #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(iter6, - &entry->type_def.addrsel->list6) + &entry->def.addrsel->list6) netlbl_domhsh_audit_add(entry, NULL, iter6, ret_val, audit_info); #endif /* IPv6 */ } else netlbl_domhsh_audit_add(entry, NULL, NULL, ret_val, audit_info); - } else if (entry_old->type == NETLBL_NLTYPE_ADDRSELECT && - entry->type == NETLBL_NLTYPE_ADDRSELECT) { + } else if (entry_old->def.type == NETLBL_NLTYPE_ADDRSELECT && + entry->def.type == NETLBL_NLTYPE_ADDRSELECT) { struct list_head *old_list4; struct list_head *old_list6; - old_list4 = &entry_old->type_def.addrsel->list4; - old_list6 = &entry_old->type_def.addrsel->list6; + old_list4 = &entry_old->def.addrsel->list4; + old_list6 = &entry_old->def.addrsel->list6; /* we only allow the addition of address selectors if all of * the selectors do not exist in the existing domain map */ - netlbl_af4list_foreach_rcu(iter4, - &entry->type_def.addrsel->list4) + netlbl_af4list_foreach_rcu(iter4, &entry->def.addrsel->list4) if (netlbl_af4list_search_exact(iter4->addr, iter4->mask, old_list4)) { @@ -435,8 +433,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, goto add_return; } #if IS_ENABLED(CONFIG_IPV6) - netlbl_af6list_foreach_rcu(iter6, - &entry->type_def.addrsel->list6) + netlbl_af6list_foreach_rcu(iter6, &entry->def.addrsel->list6) if (netlbl_af6list_search_exact(&iter6->addr, &iter6->mask, old_list6)) { @@ -446,7 +443,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, #endif /* IPv6 */ netlbl_af4list_foreach_safe(iter4, tmp4, - &entry->type_def.addrsel->list4) { + &entry->def.addrsel->list4) { netlbl_af4list_remove_entry(iter4); iter4->valid = 1; ret_val = netlbl_af4list_add(iter4, old_list4); @@ -457,7 +454,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, } #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_safe(iter6, tmp6, - &entry->type_def.addrsel->list6) { + &entry->def.addrsel->list6) { netlbl_af6list_remove_entry(iter6); iter6->valid = 1; ret_val = netlbl_af6list_add(iter6, old_list6); @@ -538,18 +535,18 @@ int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, struct netlbl_af4list *iter4; struct netlbl_domaddr4_map *map4; - switch (entry->type) { + switch (entry->def.type) { case NETLBL_NLTYPE_ADDRSELECT: netlbl_af4list_foreach_rcu(iter4, - &entry->type_def.addrsel->list4) { + &entry->def.addrsel->list4) { map4 = netlbl_domhsh_addr4_entry(iter4); - cipso_v4_doi_putdef(map4->type_def.cipsov4); + cipso_v4_doi_putdef(map4->def.cipso); } /* no need to check the IPv6 list since we currently * support only unlabeled protocols for IPv6 */ break; case NETLBL_NLTYPE_CIPSOV4: - cipso_v4_doi_putdef(entry->type_def.cipsov4); + cipso_v4_doi_putdef(entry->def.cipso); break; } call_rcu(&entry->rcu, netlbl_domhsh_free_entry); @@ -590,20 +587,21 @@ int netlbl_domhsh_remove_af4(const char *domain, entry_map = netlbl_domhsh_search(domain); else entry_map = netlbl_domhsh_search_def(domain); - if (entry_map == NULL || entry_map->type != NETLBL_NLTYPE_ADDRSELECT) + if (entry_map == NULL || + entry_map->def.type != NETLBL_NLTYPE_ADDRSELECT) goto remove_af4_failure; spin_lock(&netlbl_domhsh_lock); entry_addr = netlbl_af4list_remove(addr->s_addr, mask->s_addr, - &entry_map->type_def.addrsel->list4); + &entry_map->def.addrsel->list4); spin_unlock(&netlbl_domhsh_lock); if (entry_addr == NULL) goto remove_af4_failure; - netlbl_af4list_foreach_rcu(iter4, &entry_map->type_def.addrsel->list4) + netlbl_af4list_foreach_rcu(iter4, &entry_map->def.addrsel->list4) goto remove_af4_single_addr; #if IS_ENABLED(CONFIG_IPV6) - netlbl_af6list_foreach_rcu(iter6, &entry_map->type_def.addrsel->list6) + netlbl_af6list_foreach_rcu(iter6, &entry_map->def.addrsel->list6) goto remove_af4_single_addr; #endif /* IPv6 */ /* the domain mapping is empty so remove it from the mapping table */ @@ -616,7 +614,7 @@ remove_af4_single_addr: * shouldn't be a problem */ synchronize_rcu(); entry = netlbl_domhsh_addr4_entry(entry_addr); - cipso_v4_doi_putdef(entry->type_def.cipsov4); + cipso_v4_doi_putdef(entry->def.cipso); kfree(entry); return 0; @@ -693,8 +691,8 @@ struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain) * responsible for ensuring that rcu_read_[un]lock() is called. * */ -struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, - __be32 addr) +struct netlbl_dommap_def *netlbl_domhsh_getentry_af4(const char *domain, + __be32 addr) { struct netlbl_dom_map *dom_iter; struct netlbl_af4list *addr_iter; @@ -702,15 +700,13 @@ struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, dom_iter = netlbl_domhsh_search_def(domain); if (dom_iter == NULL) return NULL; - if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT) - return NULL; - addr_iter = netlbl_af4list_search(addr, - &dom_iter->type_def.addrsel->list4); + if (dom_iter->def.type != NETLBL_NLTYPE_ADDRSELECT) + return &dom_iter->def; + addr_iter = netlbl_af4list_search(addr, &dom_iter->def.addrsel->list4); if (addr_iter == NULL) return NULL; - - return netlbl_domhsh_addr4_entry(addr_iter); + return &(netlbl_domhsh_addr4_entry(addr_iter)->def); } #if IS_ENABLED(CONFIG_IPV6) @@ -725,7 +721,7 @@ struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, * responsible for ensuring that rcu_read_[un]lock() is called. * */ -struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, +struct netlbl_dommap_def *netlbl_domhsh_getentry_af6(const char *domain, const struct in6_addr *addr) { struct netlbl_dom_map *dom_iter; @@ -734,15 +730,13 @@ struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, dom_iter = netlbl_domhsh_search_def(domain); if (dom_iter == NULL) return NULL; - if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT) - return NULL; - addr_iter = netlbl_af6list_search(addr, - &dom_iter->type_def.addrsel->list6); + if (dom_iter->def.type != NETLBL_NLTYPE_ADDRSELECT) + return &dom_iter->def; + addr_iter = netlbl_af6list_search(addr, &dom_iter->def.addrsel->list6); if (addr_iter == NULL) return NULL; - - return netlbl_domhsh_addr6_entry(addr_iter); + return &(netlbl_domhsh_addr6_entry(addr_iter)->def); } #endif /* IPv6 */ diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 90872c4ca30f..b9be0eed8980 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -43,37 +43,35 @@ #define NETLBL_DOMHSH_BITSIZE 7 /* Domain mapping definition structures */ +struct netlbl_domaddr_map { + struct list_head list4; + struct list_head list6; +}; +struct netlbl_dommap_def { + u32 type; + union { + struct netlbl_domaddr_map *addrsel; + struct cipso_v4_doi *cipso; + }; +}; #define netlbl_domhsh_addr4_entry(iter) \ container_of(iter, struct netlbl_domaddr4_map, list) struct netlbl_domaddr4_map { - u32 type; - union { - struct cipso_v4_doi *cipsov4; - } type_def; + struct netlbl_dommap_def def; struct netlbl_af4list list; }; #define netlbl_domhsh_addr6_entry(iter) \ container_of(iter, struct netlbl_domaddr6_map, list) struct netlbl_domaddr6_map { - u32 type; - - /* NOTE: no 'type_def' union needed at present since we don't currently - * support any IPv6 labeling protocols */ + struct netlbl_dommap_def def; struct netlbl_af6list list; }; -struct netlbl_domaddr_map { - struct list_head list4; - struct list_head list6; -}; + struct netlbl_dom_map { char *domain; - u32 type; - union { - struct cipso_v4_doi *cipsov4; - struct netlbl_domaddr_map *addrsel; - } type_def; + struct netlbl_dommap_def def; u32 valid; struct list_head list; @@ -97,16 +95,16 @@ int netlbl_domhsh_remove_af4(const char *domain, int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info); int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info); struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); -struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, - __be32 addr); +struct netlbl_dommap_def *netlbl_domhsh_getentry_af4(const char *domain, + __be32 addr); +#if IS_ENABLED(CONFIG_IPV6) +struct netlbl_dommap_def *netlbl_domhsh_getentry_af6(const char *domain, + const struct in6_addr *addr); +#endif /* IPv6 */ + int netlbl_domhsh_walk(u32 *skip_bkt, u32 *skip_chain, int (*callback) (struct netlbl_dom_map *entry, void *arg), void *cb_arg); -#if IS_ENABLED(CONFIG_IPV6) -struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, - const struct in6_addr *addr); -#endif /* IPv6 */ - #endif diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 7c94aedd0912..96a458e12f60 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -122,7 +122,7 @@ int netlbl_cfg_unlbl_map_add(const char *domain, } if (addr == NULL && mask == NULL) - entry->type = NETLBL_NLTYPE_UNLABELED; + entry->def.type = NETLBL_NLTYPE_UNLABELED; else if (addr != NULL && mask != NULL) { addrmap = kzalloc(sizeof(*addrmap), GFP_ATOMIC); if (addrmap == NULL) @@ -137,7 +137,7 @@ int netlbl_cfg_unlbl_map_add(const char *domain, map4 = kzalloc(sizeof(*map4), GFP_ATOMIC); if (map4 == NULL) goto cfg_unlbl_map_add_failure; - map4->type = NETLBL_NLTYPE_UNLABELED; + map4->def.type = NETLBL_NLTYPE_UNLABELED; map4->list.addr = addr4->s_addr & mask4->s_addr; map4->list.mask = mask4->s_addr; map4->list.valid = 1; @@ -154,7 +154,7 @@ int netlbl_cfg_unlbl_map_add(const char *domain, map6 = kzalloc(sizeof(*map6), GFP_ATOMIC); if (map6 == NULL) goto cfg_unlbl_map_add_failure; - map6->type = NETLBL_NLTYPE_UNLABELED; + map6->def.type = NETLBL_NLTYPE_UNLABELED; map6->list.addr = *addr6; map6->list.addr.s6_addr32[0] &= mask6->s6_addr32[0]; map6->list.addr.s6_addr32[1] &= mask6->s6_addr32[1]; @@ -174,8 +174,8 @@ int netlbl_cfg_unlbl_map_add(const char *domain, break; } - entry->type_def.addrsel = addrmap; - entry->type = NETLBL_NLTYPE_ADDRSELECT; + entry->def.addrsel = addrmap; + entry->def.type = NETLBL_NLTYPE_ADDRSELECT; } else { ret_val = -EINVAL; goto cfg_unlbl_map_add_failure; @@ -355,8 +355,8 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, } if (addr == NULL && mask == NULL) { - entry->type_def.cipsov4 = doi_def; - entry->type = NETLBL_NLTYPE_CIPSOV4; + entry->def.cipso = doi_def; + entry->def.type = NETLBL_NLTYPE_CIPSOV4; } else if (addr != NULL && mask != NULL) { addrmap = kzalloc(sizeof(*addrmap), GFP_ATOMIC); if (addrmap == NULL) @@ -367,8 +367,8 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, addrinfo = kzalloc(sizeof(*addrinfo), GFP_ATOMIC); if (addrinfo == NULL) goto out_addrinfo; - addrinfo->type_def.cipsov4 = doi_def; - addrinfo->type = NETLBL_NLTYPE_CIPSOV4; + addrinfo->def.cipso = doi_def; + addrinfo->def.type = NETLBL_NLTYPE_CIPSOV4; addrinfo->list.addr = addr->s_addr & mask->s_addr; addrinfo->list.mask = mask->s_addr; addrinfo->list.valid = 1; @@ -376,8 +376,8 @@ int netlbl_cfg_cipsov4_map_add(u32 doi, if (ret_val != 0) goto cfg_cipsov4_map_add_failure; - entry->type_def.addrsel = addrmap; - entry->type = NETLBL_NLTYPE_ADDRSELECT; + entry->def.addrsel = addrmap; + entry->def.type = NETLBL_NLTYPE_ADDRSELECT; } else { ret_val = -EINVAL; goto out_addrmap; @@ -657,14 +657,14 @@ int netlbl_sock_setattr(struct sock *sk, } switch (family) { case AF_INET: - switch (dom_entry->type) { + switch (dom_entry->def.type) { case NETLBL_NLTYPE_ADDRSELECT: ret_val = -EDESTADDRREQ; break; case NETLBL_NLTYPE_CIPSOV4: ret_val = cipso_v4_sock_setattr(sk, - dom_entry->type_def.cipsov4, - secattr); + dom_entry->def.cipso, + secattr); break; case NETLBL_NLTYPE_UNLABELED: ret_val = 0; @@ -754,23 +754,22 @@ int netlbl_conn_setattr(struct sock *sk, { int ret_val; struct sockaddr_in *addr4; - struct netlbl_domaddr4_map *af4_entry; + struct netlbl_dommap_def *entry; rcu_read_lock(); switch (addr->sa_family) { case AF_INET: addr4 = (struct sockaddr_in *)addr; - af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, - addr4->sin_addr.s_addr); - if (af4_entry == NULL) { + entry = netlbl_domhsh_getentry_af4(secattr->domain, + addr4->sin_addr.s_addr); + if (entry == NULL) { ret_val = -ENOENT; goto conn_setattr_return; } - switch (af4_entry->type) { + switch (entry->type) { case NETLBL_NLTYPE_CIPSOV4: ret_val = cipso_v4_sock_setattr(sk, - af4_entry->type_def.cipsov4, - secattr); + entry->cipso, secattr); break; case NETLBL_NLTYPE_UNLABELED: /* just delete the protocols we support for right now @@ -812,36 +811,21 @@ int netlbl_req_setattr(struct request_sock *req, const struct netlbl_lsm_secattr *secattr) { int ret_val; - struct netlbl_dom_map *dom_entry; - struct netlbl_domaddr4_map *af4_entry; - u32 proto_type; - struct cipso_v4_doi *proto_cv4; + struct netlbl_dommap_def *entry; rcu_read_lock(); - dom_entry = netlbl_domhsh_getentry(secattr->domain); - if (dom_entry == NULL) { - ret_val = -ENOENT; - goto req_setattr_return; - } switch (req->rsk_ops->family) { case AF_INET: - if (dom_entry->type == NETLBL_NLTYPE_ADDRSELECT) { - struct inet_request_sock *req_inet = inet_rsk(req); - af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, - req_inet->rmt_addr); - if (af4_entry == NULL) { - ret_val = -ENOENT; - goto req_setattr_return; - } - proto_type = af4_entry->type; - proto_cv4 = af4_entry->type_def.cipsov4; - } else { - proto_type = dom_entry->type; - proto_cv4 = dom_entry->type_def.cipsov4; + entry = netlbl_domhsh_getentry_af4(secattr->domain, + inet_rsk(req)->rmt_addr); + if (entry == NULL) { + ret_val = -ENOENT; + goto req_setattr_return; } - switch (proto_type) { + switch (entry->type) { case NETLBL_NLTYPE_CIPSOV4: - ret_val = cipso_v4_req_setattr(req, proto_cv4, secattr); + ret_val = cipso_v4_req_setattr(req, + entry->cipso, secattr); break; case NETLBL_NLTYPE_UNLABELED: /* just delete the protocols we support for right now @@ -899,23 +883,21 @@ int netlbl_skbuff_setattr(struct sk_buff *skb, { int ret_val; struct iphdr *hdr4; - struct netlbl_domaddr4_map *af4_entry; + struct netlbl_dommap_def *entry; rcu_read_lock(); switch (family) { case AF_INET: hdr4 = ip_hdr(skb); - af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, - hdr4->daddr); - if (af4_entry == NULL) { + entry = netlbl_domhsh_getentry_af4(secattr->domain,hdr4->daddr); + if (entry == NULL) { ret_val = -ENOENT; goto skbuff_setattr_return; } - switch (af4_entry->type) { + switch (entry->type) { case NETLBL_NLTYPE_CIPSOV4: - ret_val = cipso_v4_skbuff_setattr(skb, - af4_entry->type_def.cipsov4, - secattr); + ret_val = cipso_v4_skbuff_setattr(skb, entry->cipso, + secattr); break; case NETLBL_NLTYPE_UNLABELED: /* just delete the protocols we support for right now diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index c5384ffc6146..dd1c37d7acbc 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -104,7 +104,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, ret_val = -ENOMEM; goto add_failure; } - entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); + entry->def.type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); if (info->attrs[NLBL_MGMT_A_DOMAIN]) { size_t tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]); entry->domain = kmalloc(tmp_size, GFP_KERNEL); @@ -116,12 +116,12 @@ static int netlbl_mgmt_add_common(struct genl_info *info, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size); } - /* NOTE: internally we allow/use a entry->type value of + /* NOTE: internally we allow/use a entry->def.type value of * NETLBL_NLTYPE_ADDRSELECT but we don't currently allow users * to pass that as a protocol value because we need to know the * "real" protocol */ - switch (entry->type) { + switch (entry->def.type) { case NETLBL_NLTYPE_UNLABELED: break; case NETLBL_NLTYPE_CIPSOV4: @@ -132,7 +132,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, cipsov4 = cipso_v4_doi_getdef(tmp_val); if (cipsov4 == NULL) goto add_failure; - entry->type_def.cipsov4 = cipsov4; + entry->def.cipso = cipsov4; break; default: goto add_failure; @@ -172,9 +172,9 @@ static int netlbl_mgmt_add_common(struct genl_info *info, map->list.addr = addr->s_addr & mask->s_addr; map->list.mask = mask->s_addr; map->list.valid = 1; - map->type = entry->type; + map->def.type = entry->def.type; if (cipsov4) - map->type_def.cipsov4 = cipsov4; + map->def.cipso = cipsov4; ret_val = netlbl_af4list_add(&map->list, &addrmap->list4); if (ret_val != 0) { @@ -182,8 +182,8 @@ static int netlbl_mgmt_add_common(struct genl_info *info, goto add_failure; } - entry->type = NETLBL_NLTYPE_ADDRSELECT; - entry->type_def.addrsel = addrmap; + entry->def.type = NETLBL_NLTYPE_ADDRSELECT; + entry->def.addrsel = addrmap; #if IS_ENABLED(CONFIG_IPV6) } else if (info->attrs[NLBL_MGMT_A_IPV6ADDR]) { struct in6_addr *addr; @@ -223,7 +223,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info, map->list.addr.s6_addr32[3] &= mask->s6_addr32[3]; map->list.mask = *mask; map->list.valid = 1; - map->type = entry->type; + map->def.type = entry->def.type; ret_val = netlbl_af6list_add(&map->list, &addrmap->list6); if (ret_val != 0) { @@ -231,8 +231,8 @@ static int netlbl_mgmt_add_common(struct genl_info *info, goto add_failure; } - entry->type = NETLBL_NLTYPE_ADDRSELECT; - entry->type_def.addrsel = addrmap; + entry->def.type = NETLBL_NLTYPE_ADDRSELECT; + entry->def.addrsel = addrmap; #endif /* IPv6 */ } @@ -281,14 +281,13 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, return ret_val; } - switch (entry->type) { + switch (entry->def.type) { case NETLBL_NLTYPE_ADDRSELECT: nla_a = nla_nest_start(skb, NLBL_MGMT_A_SELECTORLIST); if (nla_a == NULL) return -ENOMEM; - netlbl_af4list_foreach_rcu(iter4, - &entry->type_def.addrsel->list4) { + netlbl_af4list_foreach_rcu(iter4, &entry->def.addrsel->list4) { struct netlbl_domaddr4_map *map4; struct in_addr addr_struct; @@ -310,13 +309,13 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, return ret_val; map4 = netlbl_domhsh_addr4_entry(iter4); ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, - map4->type); + map4->def.type); if (ret_val != 0) return ret_val; - switch (map4->type) { + switch (map4->def.type) { case NETLBL_NLTYPE_CIPSOV4: ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI, - map4->type_def.cipsov4->doi); + map4->def.cipso->doi); if (ret_val != 0) return ret_val; break; @@ -325,8 +324,7 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, nla_nest_end(skb, nla_b); } #if IS_ENABLED(CONFIG_IPV6) - netlbl_af6list_foreach_rcu(iter6, - &entry->type_def.addrsel->list6) { + netlbl_af6list_foreach_rcu(iter6, &entry->def.addrsel->list6) { struct netlbl_domaddr6_map *map6; nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR); @@ -345,7 +343,7 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, return ret_val; map6 = netlbl_domhsh_addr6_entry(iter6); ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, - map6->type); + map6->def.type); if (ret_val != 0) return ret_val; @@ -356,14 +354,14 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb, nla_nest_end(skb, nla_a); break; case NETLBL_NLTYPE_UNLABELED: - ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type); + ret_val = nla_put_u32(skb,NLBL_MGMT_A_PROTOCOL,entry->def.type); break; case NETLBL_NLTYPE_CIPSOV4: - ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type); + ret_val = nla_put_u32(skb,NLBL_MGMT_A_PROTOCOL,entry->def.type); if (ret_val != 0) return ret_val; ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI, - entry->type_def.cipsov4->doi); + entry->def.cipso->doi); break; } diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 8a6c6ea466d8..8f0897407a2c 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -708,7 +708,7 @@ unlhsh_remove_return: * netlbl_unlhsh_netdev_handler - Network device notification handler * @this: notifier block * @event: the event - * @ptr: the network device (cast to void) + * @ptr: the netdevice notifier info (cast to void) * * Description: * Handle network device events, although at present all we care about is a @@ -717,10 +717,9 @@ unlhsh_remove_return: * */ static int netlbl_unlhsh_netdev_handler(struct notifier_block *this, - unsigned long event, - void *ptr) + unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netlbl_unlhsh_iface *iface = NULL; if (!net_eq(dev_net(dev), &init_net)) @@ -1542,7 +1541,7 @@ int __init netlbl_unlabel_defconf(void) entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) return -ENOMEM; - entry->type = NETLBL_NLTYPE_UNLABELED; + entry->def.type = NETLBL_NLTYPE_UNLABELED; ret_val = netlbl_domhsh_add_default(entry, &audit_info); if (ret_val != 0) return ret_val; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 57ee84d21470..0c61b59175dc 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -57,6 +57,7 @@ #include <linux/audit.h> #include <linux/mutex.h> #include <linux/vmalloc.h> +#include <linux/if_arp.h> #include <asm/cacheflush.h> #include <net/net_namespace.h> @@ -101,6 +102,9 @@ static atomic_t nl_table_users = ATOMIC_INIT(0); static ATOMIC_NOTIFIER_HEAD(netlink_chain); +static DEFINE_SPINLOCK(netlink_tap_lock); +static struct list_head netlink_tap_all __read_mostly; + static inline u32 netlink_group_mask(u32 group) { return group ? 1 << (group - 1) : 0; @@ -111,6 +115,100 @@ static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask]; } +int netlink_add_tap(struct netlink_tap *nt) +{ + if (unlikely(nt->dev->type != ARPHRD_NETLINK)) + return -EINVAL; + + spin_lock(&netlink_tap_lock); + list_add_rcu(&nt->list, &netlink_tap_all); + spin_unlock(&netlink_tap_lock); + + if (nt->module) + __module_get(nt->module); + + return 0; +} +EXPORT_SYMBOL_GPL(netlink_add_tap); + +int __netlink_remove_tap(struct netlink_tap *nt) +{ + bool found = false; + struct netlink_tap *tmp; + + spin_lock(&netlink_tap_lock); + + list_for_each_entry(tmp, &netlink_tap_all, list) { + if (nt == tmp) { + list_del_rcu(&nt->list); + found = true; + goto out; + } + } + + pr_warn("__netlink_remove_tap: %p not found\n", nt); +out: + spin_unlock(&netlink_tap_lock); + + if (found && nt->module) + module_put(nt->module); + + return found ? 0 : -ENODEV; +} +EXPORT_SYMBOL_GPL(__netlink_remove_tap); + +int netlink_remove_tap(struct netlink_tap *nt) +{ + int ret; + + ret = __netlink_remove_tap(nt); + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL_GPL(netlink_remove_tap); + +static int __netlink_deliver_tap_skb(struct sk_buff *skb, + struct net_device *dev) +{ + struct sk_buff *nskb; + int ret = -ENOMEM; + + dev_hold(dev); + nskb = skb_clone(skb, GFP_ATOMIC); + if (nskb) { + nskb->dev = dev; + ret = dev_queue_xmit(nskb); + if (unlikely(ret > 0)) + ret = net_xmit_errno(ret); + } + + dev_put(dev); + return ret; +} + +static void __netlink_deliver_tap(struct sk_buff *skb) +{ + int ret; + struct netlink_tap *tmp; + + list_for_each_entry_rcu(tmp, &netlink_tap_all, list) { + ret = __netlink_deliver_tap_skb(skb, tmp->dev); + if (unlikely(ret)) + break; + } +} + +static void netlink_deliver_tap(struct sk_buff *skb) +{ + rcu_read_lock(); + + if (unlikely(!list_empty(&netlink_tap_all))) + __netlink_deliver_tap(skb); + + rcu_read_unlock(); +} + static void netlink_overrun(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); @@ -750,6 +848,13 @@ static void netlink_skb_destructor(struct sk_buff *skb) skb->head = NULL; } #endif + if (is_vmalloc_addr(skb->head)) { + if (!skb->cloned || + !atomic_dec_return(&(skb_shinfo(skb)->dataref))) + vfree(skb->head); + + skb->head = NULL; + } if (skb->sk != NULL) sock_rfree(skb); } @@ -854,16 +959,23 @@ netlink_unlock_table(void) wake_up(&nl_table_wait); } +static bool netlink_compare(struct net *net, struct sock *sk) +{ + return net_eq(sock_net(sk), net); +} + static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) { - struct nl_portid_hash *hash = &nl_table[protocol].hash; + struct netlink_table *table = &nl_table[protocol]; + struct nl_portid_hash *hash = &table->hash; struct hlist_head *head; struct sock *sk; read_lock(&nl_table_lock); head = nl_portid_hashfn(hash, portid); sk_for_each(sk, head) { - if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) { + if (table->compare(net, sk) && + (nlk_sk(sk)->portid == portid)) { sock_hold(sk); goto found; } @@ -976,7 +1088,8 @@ netlink_update_listeners(struct sock *sk) static int netlink_insert(struct sock *sk, struct net *net, u32 portid) { - struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; + struct netlink_table *table = &nl_table[sk->sk_protocol]; + struct nl_portid_hash *hash = &table->hash; struct hlist_head *head; int err = -EADDRINUSE; struct sock *osk; @@ -986,7 +1099,8 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid) head = nl_portid_hashfn(hash, portid); len = 0; sk_for_each(osk, head) { - if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid)) + if (table->compare(net, osk) && + (nlk_sk(osk)->portid == portid)) break; len++; } @@ -1183,7 +1297,8 @@ static int netlink_autobind(struct socket *sock) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); - struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; + struct netlink_table *table = &nl_table[sk->sk_protocol]; + struct nl_portid_hash *hash = &table->hash; struct hlist_head *head; struct sock *osk; s32 portid = task_tgid_vnr(current); @@ -1195,7 +1310,7 @@ retry: netlink_table_grab(); head = nl_portid_hashfn(hash, portid); sk_for_each(osk, head) { - if (!net_eq(sock_net(osk), net)) + if (!table->compare(net, osk)) continue; if (nlk_sk(osk)->portid == portid) { /* Bind collision, search negative portid values. */ @@ -1420,6 +1535,33 @@ struct sock *netlink_getsockbyfilp(struct file *filp) return sock; } +static struct sk_buff *netlink_alloc_large_skb(unsigned int size, + int broadcast) +{ + struct sk_buff *skb; + void *data; + + if (size <= NLMSG_GOODSIZE || broadcast) + return alloc_skb(size, GFP_KERNEL); + + size = SKB_DATA_ALIGN(size) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + data = vmalloc(size); + if (data == NULL) + return NULL; + + skb = build_skb(data, size); + if (skb == NULL) + vfree(data); + else { + skb->head_frag = 0; + skb->destructor = netlink_skb_destructor; + } + + return skb; +} + /* * Attach a skb to a netlink socket. * The caller must hold a reference to the destination socket. On error, the @@ -1475,6 +1617,8 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb) { int len = skb->len; + netlink_deliver_tap(skb); + #ifdef CONFIG_NETLINK_MMAP if (netlink_skb_is_mmaped(skb)) netlink_queue_mmaped_skb(sk, skb); @@ -1510,7 +1654,7 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) return skb; delta = skb->end - skb->tail; - if (delta * 2 < skb->truesize) + if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) return skb; if (skb_shared(skb)) { @@ -1535,6 +1679,11 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, ret = -ECONNREFUSED; if (nlk->netlink_rcv != NULL) { + /* We could do a netlink_deliver_tap(skb) here as well + * but since this is intended for the kernel only, we + * should rather let it stay under the hood. + */ + ret = skb->len; netlink_skb_set_owner_r(skb, sk); NETLINK_CB(skb).sk = ssk; @@ -2096,7 +2245,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (len > sk->sk_sndbuf - 32) goto out; err = -ENOBUFS; - skb = alloc_skb(len, GFP_KERNEL); + skb = netlink_alloc_large_skb(len, dst_group); if (skb == NULL) goto out; @@ -2285,6 +2434,8 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, if (cfg) { nl_table[unit].bind = cfg->bind; nl_table[unit].flags = cfg->flags; + if (cfg->compare) + nl_table[unit].compare = cfg->compare; } nl_table[unit].registered = 1; } else { @@ -2707,6 +2858,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sock *s; struct nl_seq_iter *iter; + struct net *net; int i, j; ++*pos; @@ -2714,11 +2866,12 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (v == SEQ_START_TOKEN) return netlink_seq_socket_idx(seq, 0); + net = seq_file_net(seq); iter = seq->private; s = v; do { s = sk_next(s); - } while (s && sock_net(s) != seq_file_net(seq)); + } while (s && !nl_table[s->sk_protocol].compare(net, s)); if (s) return s; @@ -2730,7 +2883,8 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) for (; j <= hash->mask; j++) { s = sk_head(&hash->table[j]); - while (s && sock_net(s) != seq_file_net(seq)) + + while (s && !nl_table[s->sk_protocol].compare(net, s)) s = sk_next(s); if (s) { iter->link = i; @@ -2923,8 +3077,12 @@ static int __init netlink_proto_init(void) hash->shift = 0; hash->mask = 0; hash->rehash_time = jiffies; + + nl_table[i].compare = netlink_compare; } + INIT_LIST_HEAD(&netlink_tap_all); + netlink_add_usersock_entry(); sock_register(&netlink_family_ops); diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index ed8522265f4e..eaa88d187cdc 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -73,6 +73,7 @@ struct netlink_table { struct mutex *cb_mutex; struct module *module; void (*bind)(int group); + bool (*compare)(struct net *net, struct sock *sock); int registered; }; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 2fd6dbea327a..512718adb0d5 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -571,7 +571,7 @@ static int genl_family_rcv_msg(struct genl_family *family, !capable(CAP_NET_ADMIN)) return -EPERM; - if (nlh->nlmsg_flags & NLM_F_DUMP) { + if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) { struct netlink_dump_control c = { .dump = ops->dumpit, .done = ops->done, @@ -877,8 +877,10 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) #ifdef CONFIG_MODULES if (res == NULL) { genl_unlock(); + up_read(&cb_lock); request_module("net-pf-%d-proto-%d-family-%s", PF_NETLINK, NETLINK_GENERIC, name); + down_read(&cb_lock); genl_lock(); res = genl_family_find_byname(name); } diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ec0c80fde69f..698814bfa7ad 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -117,7 +117,7 @@ static void nr_kill_by_device(struct net_device *dev) */ static int nr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = (struct net_device *)ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/netrom/sysctl_net_netrom.c b/net/netrom/sysctl_net_netrom.c index 42f630b9a698..ba1c368b3f18 100644 --- a/net/netrom/sysctl_net_netrom.c +++ b/net/netrom/sysctl_net_netrom.c @@ -34,7 +34,7 @@ static int min_reset[] = {0}, max_reset[] = {1}; static struct ctl_table_header *nr_table_header; -static ctl_table nr_table[] = { +static struct ctl_table nr_table[] = { { .procname = "default_path_quality", .data = &sysctl_netrom_default_path_quality, diff --git a/net/nfc/core.c b/net/nfc/core.c index 40d2527693da..1d074dd1650f 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -44,6 +44,47 @@ DEFINE_MUTEX(nfc_devlist_mutex); /* NFC device ID bitmap */ static DEFINE_IDA(nfc_index_ida); +int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name) +{ + int rc = 0; + + pr_debug("%s do firmware %s\n", dev_name(&dev->dev), firmware_name); + + device_lock(&dev->dev); + + if (!device_is_registered(&dev->dev)) { + rc = -ENODEV; + goto error; + } + + if (dev->dev_up) { + rc = -EBUSY; + goto error; + } + + if (!dev->ops->fw_download) { + rc = -EOPNOTSUPP; + goto error; + } + + dev->fw_download_in_progress = true; + rc = dev->ops->fw_download(dev, firmware_name); + if (rc) + dev->fw_download_in_progress = false; + +error: + device_unlock(&dev->dev); + return rc; +} + +int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name) +{ + dev->fw_download_in_progress = false; + + return nfc_genl_fw_download_done(dev, firmware_name); +} +EXPORT_SYMBOL(nfc_fw_download_done); + /** * nfc_dev_up - turn on the NFC device * @@ -69,6 +110,11 @@ int nfc_dev_up(struct nfc_dev *dev) goto error; } + if (dev->fw_download_in_progress) { + rc = -EBUSY; + goto error; + } + if (dev->dev_up) { rc = -EALREADY; goto error; @@ -80,6 +126,13 @@ int nfc_dev_up(struct nfc_dev *dev) if (!rc) dev->dev_up = true; + /* We have to enable the device before discovering SEs */ + if (dev->ops->discover_se) { + rc = dev->ops->discover_se(dev); + if (!rc) + pr_warn("SE discovery failed\n"); + } + error: device_unlock(&dev->dev); return rc; @@ -475,6 +528,108 @@ error: return rc; } +static struct nfc_se *find_se(struct nfc_dev *dev, u32 se_idx) +{ + struct nfc_se *se, *n; + + list_for_each_entry_safe(se, n, &dev->secure_elements, list) + if (se->idx == se_idx) + return se; + + return NULL; +} + +int nfc_enable_se(struct nfc_dev *dev, u32 se_idx) +{ + + struct nfc_se *se; + int rc; + + pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); + + device_lock(&dev->dev); + + if (!device_is_registered(&dev->dev)) { + rc = -ENODEV; + goto error; + } + + if (!dev->dev_up) { + rc = -ENODEV; + goto error; + } + + if (dev->polling) { + rc = -EBUSY; + goto error; + } + + if (!dev->ops->enable_se || !dev->ops->disable_se) { + rc = -EOPNOTSUPP; + goto error; + } + + se = find_se(dev, se_idx); + if (!se) { + rc = -EINVAL; + goto error; + } + + if (se->type == NFC_SE_ENABLED) { + rc = -EALREADY; + goto error; + } + + rc = dev->ops->enable_se(dev, se_idx); + +error: + device_unlock(&dev->dev); + return rc; +} + +int nfc_disable_se(struct nfc_dev *dev, u32 se_idx) +{ + + struct nfc_se *se; + int rc; + + pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); + + device_lock(&dev->dev); + + if (!device_is_registered(&dev->dev)) { + rc = -ENODEV; + goto error; + } + + if (!dev->dev_up) { + rc = -ENODEV; + goto error; + } + + if (!dev->ops->enable_se || !dev->ops->disable_se) { + rc = -EOPNOTSUPP; + goto error; + } + + se = find_se(dev, se_idx); + if (!se) { + rc = -EINVAL; + goto error; + } + + if (se->type == NFC_SE_DISABLED) { + rc = -EALREADY; + goto error; + } + + rc = dev->ops->disable_se(dev, se_idx); + +error: + device_unlock(&dev->dev); + return rc; +} + int nfc_set_remote_general_bytes(struct nfc_dev *dev, u8 *gb, u8 gb_len) { pr_debug("dev_name=%s gb_len=%d\n", dev_name(&dev->dev), gb_len); @@ -707,14 +862,79 @@ inline void nfc_driver_failure(struct nfc_dev *dev, int err) } EXPORT_SYMBOL(nfc_driver_failure); +int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type) +{ + struct nfc_se *se; + int rc; + + pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); + + se = find_se(dev, se_idx); + if (se) + return -EALREADY; + + se = kzalloc(sizeof(struct nfc_se), GFP_KERNEL); + if (!se) + return -ENOMEM; + + se->idx = se_idx; + se->type = type; + se->state = NFC_SE_DISABLED; + INIT_LIST_HEAD(&se->list); + + list_add(&se->list, &dev->secure_elements); + + rc = nfc_genl_se_added(dev, se_idx, type); + if (rc < 0) { + list_del(&se->list); + kfree(se); + + return rc; + } + + return 0; +} +EXPORT_SYMBOL(nfc_add_se); + +int nfc_remove_se(struct nfc_dev *dev, u32 se_idx) +{ + struct nfc_se *se, *n; + int rc; + + pr_debug("%s se index %d\n", dev_name(&dev->dev), se_idx); + + list_for_each_entry_safe(se, n, &dev->secure_elements, list) + if (se->idx == se_idx) { + rc = nfc_genl_se_removed(dev, se_idx); + if (rc < 0) + return rc; + + list_del(&se->list); + kfree(se); + + return 0; + } + + return -EINVAL; +} +EXPORT_SYMBOL(nfc_remove_se); + static void nfc_release(struct device *d) { struct nfc_dev *dev = to_nfc_dev(d); + struct nfc_se *se, *n; pr_debug("dev_name=%s\n", dev_name(&dev->dev)); nfc_genl_data_exit(&dev->genl_data); kfree(dev->targets); + + list_for_each_entry_safe(se, n, &dev->secure_elements, list) { + nfc_genl_se_removed(dev, se->idx); + list_del(&se->list); + kfree(se); + } + kfree(dev); } @@ -786,7 +1006,6 @@ struct nfc_dev *nfc_get_device(unsigned int idx) */ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, u32 supported_protocols, - u32 supported_se, int tx_headroom, int tx_tailroom) { struct nfc_dev *dev; @@ -804,10 +1023,9 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, dev->ops = ops; dev->supported_protocols = supported_protocols; - dev->supported_se = supported_se; - dev->active_se = NFC_SE_NONE; dev->tx_headroom = tx_headroom; dev->tx_tailroom = tx_tailroom; + INIT_LIST_HEAD(&dev->secure_elements); nfc_genl_data_init(&dev->genl_data); diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 91020b210d87..fe66908401f5 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -570,21 +570,21 @@ static int hci_dep_link_up(struct nfc_dev *nfc_dev, struct nfc_target *target, { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); - if (hdev->ops->dep_link_up) - return hdev->ops->dep_link_up(hdev, target, comm_mode, - gb, gb_len); + if (!hdev->ops->dep_link_up) + return 0; - return 0; + return hdev->ops->dep_link_up(hdev, target, comm_mode, + gb, gb_len); } static int hci_dep_link_down(struct nfc_dev *nfc_dev) { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); - if (hdev->ops->dep_link_down) - return hdev->ops->dep_link_down(hdev); + if (!hdev->ops->dep_link_down) + return 0; - return 0; + return hdev->ops->dep_link_down(hdev); } static int hci_activate_target(struct nfc_dev *nfc_dev, @@ -673,12 +673,12 @@ static int hci_tm_send(struct nfc_dev *nfc_dev, struct sk_buff *skb) { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); - if (hdev->ops->tm_send) - return hdev->ops->tm_send(hdev, skb); - - kfree_skb(skb); + if (!hdev->ops->tm_send) { + kfree_skb(skb); + return -ENOTSUPP; + } - return -ENOTSUPP; + return hdev->ops->tm_send(hdev, skb); } static int hci_check_presence(struct nfc_dev *nfc_dev, @@ -686,8 +686,38 @@ static int hci_check_presence(struct nfc_dev *nfc_dev, { struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); - if (hdev->ops->check_presence) - return hdev->ops->check_presence(hdev, target); + if (!hdev->ops->check_presence) + return 0; + + return hdev->ops->check_presence(hdev, target); +} + +static int hci_discover_se(struct nfc_dev *nfc_dev) +{ + struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); + + if (hdev->ops->discover_se) + return hdev->ops->discover_se(hdev); + + return 0; +} + +static int hci_enable_se(struct nfc_dev *nfc_dev, u32 se_idx) +{ + struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); + + if (hdev->ops->enable_se) + return hdev->ops->enable_se(hdev, se_idx); + + return 0; +} + +static int hci_disable_se(struct nfc_dev *nfc_dev, u32 se_idx) +{ + struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); + + if (hdev->ops->disable_se) + return hdev->ops->enable_se(hdev, se_idx); return 0; } @@ -779,6 +809,16 @@ static void nfc_hci_recv_from_llc(struct nfc_hci_dev *hdev, struct sk_buff *skb) } } +static int hci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name) +{ + struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev); + + if (!hdev->ops->fw_download) + return -ENOTSUPP; + + return hdev->ops->fw_download(hdev, firmware_name); +} + static struct nfc_ops hci_nfc_ops = { .dev_up = hci_dev_up, .dev_down = hci_dev_down, @@ -791,13 +831,16 @@ static struct nfc_ops hci_nfc_ops = { .im_transceive = hci_transceive, .tm_send = hci_tm_send, .check_presence = hci_check_presence, + .fw_download = hci_fw_download, + .discover_se = hci_discover_se, + .enable_se = hci_enable_se, + .disable_se = hci_disable_se, }; struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, struct nfc_hci_init_data *init_data, unsigned long quirks, u32 protocols, - u32 supported_se, const char *llc_name, int tx_headroom, int tx_tailroom, @@ -823,7 +866,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops, return NULL; } - hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, supported_se, + hdev->ndev = nfc_allocate_device(&hci_nfc_ops, protocols, tx_headroom + HCI_CMDS_HEADROOM, tx_tailroom); if (!hdev->ndev) { diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index ff8c434f7df8..f4d48b57ea11 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -19,6 +19,8 @@ enum llcp_state { LLCP_CONNECTED = 1, /* wait_for_packet() wants that */ + LLCP_CONNECTING, + LLCP_DISCONNECTING, LLCP_CLOSED, LLCP_BOUND, LLCP_LISTEN, @@ -246,7 +248,6 @@ struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri, void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp); void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head); void nfc_llcp_recv(void *data, struct sk_buff *skb, int err); -int nfc_llcp_disconnect(struct nfc_llcp_sock *sock); int nfc_llcp_send_symm(struct nfc_dev *dev); int nfc_llcp_send_connect(struct nfc_llcp_sock *sock); int nfc_llcp_send_cc(struct nfc_llcp_sock *sock); diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c index c1b23eef83ca..1017894807c0 100644 --- a/net/nfc/llcp_commands.c +++ b/net/nfc/llcp_commands.c @@ -339,7 +339,7 @@ static struct sk_buff *llcp_allocate_pdu(struct nfc_llcp_sock *sock, return skb; } -int nfc_llcp_disconnect(struct nfc_llcp_sock *sock) +int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock) { struct sk_buff *skb; struct nfc_dev *dev; @@ -630,26 +630,6 @@ int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason) return 0; } -int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock) -{ - struct sk_buff *skb; - struct nfc_llcp_local *local; - - pr_debug("Send DISC\n"); - - local = sock->local; - if (local == NULL) - return -ENODEV; - - skb = llcp_allocate_pdu(sock, LLCP_PDU_DISC, 0); - if (skb == NULL) - return -ENOMEM; - - skb_queue_head(&local->tx_queue, skb); - - return 0; -} - int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock, struct msghdr *msg, size_t len) { diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 158bdbf668cc..81cd3416c7d4 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -537,6 +537,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) u8 *lto_tlv, lto_length; u8 *wks_tlv, wks_length; u8 *miux_tlv, miux_length; + __be16 wks = cpu_to_be16(local->local_wks); u8 gb_len = 0; int ret = 0; @@ -549,8 +550,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) gb_len += lto_length; pr_debug("Local wks 0x%lx\n", local->local_wks); - wks_tlv = nfc_llcp_build_tlv(LLCP_TLV_WKS, (u8 *)&local->local_wks, 2, - &wks_length); + wks_tlv = nfc_llcp_build_tlv(LLCP_TLV_WKS, (u8 *)&wks, 2, &wks_length); gb_len += wks_length; miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, @@ -719,6 +719,10 @@ static void nfc_llcp_tx_work(struct work_struct *work) llcp_sock = nfc_llcp_sock(sk); if (llcp_sock == NULL && nfc_llcp_ptype(skb) == LLCP_PDU_I) { + kfree_skb(skb); + nfc_llcp_send_symm(local->dev); + } else if (llcp_sock && !llcp_sock->remote_ready) { + skb_queue_head(&local->tx_queue, skb); nfc_llcp_send_symm(local->dev); } else { struct sk_buff *copy_skb = NULL; @@ -730,6 +734,13 @@ static void nfc_llcp_tx_work(struct work_struct *work) DUMP_PREFIX_OFFSET, 16, 1, skb->data, skb->len, true); + if (ptype == LLCP_PDU_DISC && sk != NULL && + sk->sk_state == LLCP_DISCONNECTING) { + nfc_llcp_sock_unlink(&local->sockets, sk); + sock_orphan(sk); + sock_put(sk); + } + if (ptype == LLCP_PDU_I) copy_skb = skb_copy(skb, GFP_ATOMIC); @@ -1579,6 +1590,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) local->lto = 150; /* 1500 ms */ local->rw = LLCP_MAX_RW; local->miux = cpu_to_be16(LLCP_MAX_MIUX); + local->local_wks = 0x1; /* LLC Link Management */ nfc_llcp_build_gb(local); diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 380253eccb74..d308402b67d8 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -571,7 +571,7 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock, if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= POLLHUP; - if (sock_writeable(sk)) + if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); @@ -603,7 +603,7 @@ static int llcp_sock_release(struct socket *sock) /* Send a DISC */ if (sk->sk_state == LLCP_CONNECTED) - nfc_llcp_disconnect(llcp_sock); + nfc_llcp_send_disconnect(llcp_sock); if (sk->sk_state == LLCP_LISTEN) { struct nfc_llcp_sock *lsk, *n; @@ -614,7 +614,7 @@ static int llcp_sock_release(struct socket *sock) accept_sk = &lsk->sk; lock_sock(accept_sk); - nfc_llcp_disconnect(lsk); + nfc_llcp_send_disconnect(lsk); nfc_llcp_accept_unlink(accept_sk); release_sock(accept_sk); @@ -626,6 +626,13 @@ static int llcp_sock_release(struct socket *sock) release_sock(sk); + /* Keep this sock alive and therefore do not remove it from the sockets + * list until the DISC PDU has been actually sent. Otherwise we would + * reply with DM PDUs before sending the DISC one. + */ + if (sk->sk_state == LLCP_DISCONNECTING) + return err; + if (sock->type == SOCK_RAW) nfc_llcp_sock_unlink(&local->raw_sockets, sk); else @@ -722,14 +729,16 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, if (ret) goto sock_unlink; + sk->sk_state = LLCP_CONNECTING; + ret = sock_wait_state(sk, LLCP_CONNECTED, sock_sndtimeo(sk, flags & O_NONBLOCK)); - if (ret) + if (ret && ret != -EINPROGRESS) goto sock_unlink; release_sock(sk); - return 0; + return ret; sock_unlink: nfc_llcp_put_ssap(local, llcp_sock->ssap); diff --git a/net/nfc/nci/Kconfig b/net/nfc/nci/Kconfig index 6d69b5f0f19b..a4f1e42e3481 100644 --- a/net/nfc/nci/Kconfig +++ b/net/nfc/nci/Kconfig @@ -8,3 +8,14 @@ config NFC_NCI Say Y here to compile NCI support into the kernel or say M to compile it as module (nci). + +config NFC_NCI_SPI + depends on NFC_NCI && SPI + select CRC_CCITT + bool "NCI over SPI protocol support" + default n + help + NCI (NFC Controller Interface) is a communication protocol between + an NFC Controller (NFCC) and a Device Host (DH). + + Say yes if you use an NCI driver that requires SPI link layer. diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile index cdb3a2e44471..7aeedc43187d 100644 --- a/net/nfc/nci/Makefile +++ b/net/nfc/nci/Makefile @@ -4,4 +4,6 @@ obj-$(CONFIG_NFC_NCI) += nci.o -nci-objs := core.o data.o lib.o ntf.o rsp.o
\ No newline at end of file +nci-objs := core.o data.o lib.o ntf.o rsp.o + +nci-$(CONFIG_NFC_NCI_SPI) += spi.o diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 48ada0ec749e..b943d46a1644 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -636,6 +636,21 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target, return rc; } +static int nci_enable_se(struct nfc_dev *nfc_dev, u32 se_idx) +{ + return 0; +} + +static int nci_disable_se(struct nfc_dev *nfc_dev, u32 se_idx) +{ + return 0; +} + +static int nci_discover_se(struct nfc_dev *nfc_dev) +{ + return 0; +} + static struct nfc_ops nci_nfc_ops = { .dev_up = nci_dev_up, .dev_down = nci_dev_down, @@ -646,6 +661,9 @@ static struct nfc_ops nci_nfc_ops = { .activate_target = nci_activate_target, .deactivate_target = nci_deactivate_target, .im_transceive = nci_transceive, + .enable_se = nci_enable_se, + .disable_se = nci_disable_se, + .discover_se = nci_discover_se, }; /* ---- Interface to NCI drivers ---- */ @@ -658,7 +676,6 @@ static struct nfc_ops nci_nfc_ops = { */ struct nci_dev *nci_allocate_device(struct nci_ops *ops, __u32 supported_protocols, - __u32 supported_se, int tx_headroom, int tx_tailroom) { struct nci_dev *ndev; @@ -681,7 +698,6 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops, ndev->nfc_dev = nfc_allocate_device(&nci_nfc_ops, supported_protocols, - supported_se, tx_headroom + NCI_DATA_HDR_SIZE, tx_tailroom); if (!ndev->nfc_dev) @@ -797,12 +813,11 @@ EXPORT_SYMBOL(nci_unregister_device); /** * nci_recv_frame - receive frame from NCI drivers * + * @ndev: The nci device * @skb: The sk_buff to receive */ -int nci_recv_frame(struct sk_buff *skb) +int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb) { - struct nci_dev *ndev = (struct nci_dev *) skb->dev; - pr_debug("len %d\n", skb->len); if (!ndev || (!test_bit(NCI_UP, &ndev->flags) && @@ -819,10 +834,8 @@ int nci_recv_frame(struct sk_buff *skb) } EXPORT_SYMBOL(nci_recv_frame); -static int nci_send_frame(struct sk_buff *skb) +static int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb) { - struct nci_dev *ndev = (struct nci_dev *) skb->dev; - pr_debug("len %d\n", skb->len); if (!ndev) { @@ -833,7 +846,7 @@ static int nci_send_frame(struct sk_buff *skb) /* Get rid of skb owner, prior to sending to the driver. */ skb_orphan(skb); - return ndev->ops->send(skb); + return ndev->ops->send(ndev, skb); } /* Send NCI command */ @@ -861,8 +874,6 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload) if (plen) memcpy(skb_put(skb, plen), payload, plen); - skb->dev = (void *) ndev; - skb_queue_tail(&ndev->cmd_q, skb); queue_work(ndev->cmd_wq, &ndev->cmd_work); @@ -894,7 +905,7 @@ static void nci_tx_work(struct work_struct *work) nci_conn_id(skb->data), nci_plen(skb->data)); - nci_send_frame(skb); + nci_send_frame(ndev, skb); mod_timer(&ndev->data_timer, jiffies + msecs_to_jiffies(NCI_DATA_TIMEOUT)); @@ -963,7 +974,7 @@ static void nci_cmd_work(struct work_struct *work) nci_opcode_oid(nci_opcode(skb->data)), nci_plen(skb->data)); - nci_send_frame(skb); + nci_send_frame(ndev, skb); mod_timer(&ndev->cmd_timer, jiffies + msecs_to_jiffies(NCI_CMD_TIMEOUT)); diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c index 76c48c5324f8..2a9399dd6c68 100644 --- a/net/nfc/nci/data.c +++ b/net/nfc/nci/data.c @@ -80,8 +80,6 @@ static inline void nci_push_data_hdr(struct nci_dev *ndev, nci_mt_set((__u8 *)hdr, NCI_MT_DATA_PKT); nci_pbf_set((__u8 *)hdr, pbf); - - skb->dev = (void *) ndev; } static int nci_queue_tx_data_frags(struct nci_dev *ndev, diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c new file mode 100644 index 000000000000..c7cf37ba7298 --- /dev/null +++ b/net/nfc/nci/spi.c @@ -0,0 +1,378 @@ +/* + * Copyright (C) 2013 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +#define pr_fmt(fmt) "nci_spi: %s: " fmt, __func__ + +#include <linux/export.h> +#include <linux/spi/spi.h> +#include <linux/crc-ccitt.h> +#include <linux/nfc.h> +#include <net/nfc/nci_core.h> + +#define NCI_SPI_HDR_LEN 4 +#define NCI_SPI_CRC_LEN 2 +#define NCI_SPI_ACK_SHIFT 6 +#define NCI_SPI_MSB_PAYLOAD_MASK 0x3F + +#define NCI_SPI_SEND_TIMEOUT (NCI_CMD_TIMEOUT > NCI_DATA_TIMEOUT ? \ + NCI_CMD_TIMEOUT : NCI_DATA_TIMEOUT) + +#define NCI_SPI_DIRECT_WRITE 0x01 +#define NCI_SPI_DIRECT_READ 0x02 + +#define ACKNOWLEDGE_NONE 0 +#define ACKNOWLEDGE_ACK 1 +#define ACKNOWLEDGE_NACK 2 + +#define CRC_INIT 0xFFFF + +static int nci_spi_open(struct nci_dev *nci_dev) +{ + struct nci_spi_dev *ndev = nci_get_drvdata(nci_dev); + + return ndev->ops->open(ndev); +} + +static int nci_spi_close(struct nci_dev *nci_dev) +{ + struct nci_spi_dev *ndev = nci_get_drvdata(nci_dev); + + return ndev->ops->close(ndev); +} + +static int __nci_spi_send(struct nci_spi_dev *ndev, struct sk_buff *skb) +{ + struct spi_message m; + struct spi_transfer t; + + t.tx_buf = skb->data; + t.len = skb->len; + t.cs_change = 0; + t.delay_usecs = ndev->xfer_udelay; + + spi_message_init(&m); + spi_message_add_tail(&t, &m); + + return spi_sync(ndev->spi, &m); +} + +static int nci_spi_send(struct nci_dev *nci_dev, struct sk_buff *skb) +{ + struct nci_spi_dev *ndev = nci_get_drvdata(nci_dev); + unsigned int payload_len = skb->len; + unsigned char *hdr; + int ret; + long completion_rc; + + ndev->ops->deassert_int(ndev); + + /* add the NCI SPI header to the start of the buffer */ + hdr = skb_push(skb, NCI_SPI_HDR_LEN); + hdr[0] = NCI_SPI_DIRECT_WRITE; + hdr[1] = ndev->acknowledge_mode; + hdr[2] = payload_len >> 8; + hdr[3] = payload_len & 0xFF; + + if (ndev->acknowledge_mode == NCI_SPI_CRC_ENABLED) { + u16 crc; + + crc = crc_ccitt(CRC_INIT, skb->data, skb->len); + *skb_put(skb, 1) = crc >> 8; + *skb_put(skb, 1) = crc & 0xFF; + } + + ret = __nci_spi_send(ndev, skb); + + kfree_skb(skb); + ndev->ops->assert_int(ndev); + + if (ret != 0 || ndev->acknowledge_mode == NCI_SPI_CRC_DISABLED) + goto done; + + init_completion(&ndev->req_completion); + completion_rc = + wait_for_completion_interruptible_timeout(&ndev->req_completion, + NCI_SPI_SEND_TIMEOUT); + + if (completion_rc <= 0 || ndev->req_result == ACKNOWLEDGE_NACK) + ret = -EIO; + +done: + return ret; +} + +static struct nci_ops nci_spi_ops = { + .open = nci_spi_open, + .close = nci_spi_close, + .send = nci_spi_send, +}; + +/* ---- Interface to NCI SPI drivers ---- */ + +/** + * nci_spi_allocate_device - allocate a new nci spi device + * + * @spi: SPI device + * @ops: device operations + * @supported_protocols: NFC protocols supported by the device + * @supported_se: NFC Secure Elements supported by the device + * @acknowledge_mode: Acknowledge mode used by the device + * @delay: delay between transactions in us + */ +struct nci_spi_dev *nci_spi_allocate_device(struct spi_device *spi, + struct nci_spi_ops *ops, + u32 supported_protocols, + u32 supported_se, + u8 acknowledge_mode, + unsigned int delay) +{ + struct nci_spi_dev *ndev; + int tailroom = 0; + + if (!ops->open || !ops->close || !ops->assert_int || !ops->deassert_int) + return NULL; + + if (!supported_protocols) + return NULL; + + ndev = devm_kzalloc(&spi->dev, sizeof(struct nci_dev), GFP_KERNEL); + if (!ndev) + return NULL; + + ndev->ops = ops; + ndev->acknowledge_mode = acknowledge_mode; + ndev->xfer_udelay = delay; + + if (acknowledge_mode == NCI_SPI_CRC_ENABLED) + tailroom += NCI_SPI_CRC_LEN; + + ndev->nci_dev = nci_allocate_device(&nci_spi_ops, supported_protocols, + NCI_SPI_HDR_LEN, tailroom); + if (!ndev->nci_dev) + return NULL; + + nci_set_drvdata(ndev->nci_dev, ndev); + + return ndev; +} +EXPORT_SYMBOL_GPL(nci_spi_allocate_device); + +/** + * nci_spi_free_device - deallocate nci spi device + * + * @ndev: The nci spi device to deallocate + */ +void nci_spi_free_device(struct nci_spi_dev *ndev) +{ + nci_free_device(ndev->nci_dev); +} +EXPORT_SYMBOL_GPL(nci_spi_free_device); + +/** + * nci_spi_register_device - register a nci spi device in the nfc subsystem + * + * @pdev: The nci spi device to register + */ +int nci_spi_register_device(struct nci_spi_dev *ndev) +{ + return nci_register_device(ndev->nci_dev); +} +EXPORT_SYMBOL_GPL(nci_spi_register_device); + +/** + * nci_spi_unregister_device - unregister a nci spi device in the nfc subsystem + * + * @dev: The nci spi device to unregister + */ +void nci_spi_unregister_device(struct nci_spi_dev *ndev) +{ + nci_unregister_device(ndev->nci_dev); +} +EXPORT_SYMBOL_GPL(nci_spi_unregister_device); + +static int send_acknowledge(struct nci_spi_dev *ndev, u8 acknowledge) +{ + struct sk_buff *skb; + unsigned char *hdr; + u16 crc; + int ret; + + skb = nci_skb_alloc(ndev->nci_dev, 0, GFP_KERNEL); + + /* add the NCI SPI header to the start of the buffer */ + hdr = skb_push(skb, NCI_SPI_HDR_LEN); + hdr[0] = NCI_SPI_DIRECT_WRITE; + hdr[1] = NCI_SPI_CRC_ENABLED; + hdr[2] = acknowledge << NCI_SPI_ACK_SHIFT; + hdr[3] = 0; + + crc = crc_ccitt(CRC_INIT, skb->data, skb->len); + *skb_put(skb, 1) = crc >> 8; + *skb_put(skb, 1) = crc & 0xFF; + + ret = __nci_spi_send(ndev, skb); + + kfree_skb(skb); + + return ret; +} + +static struct sk_buff *__nci_spi_recv_frame(struct nci_spi_dev *ndev) +{ + struct sk_buff *skb; + struct spi_message m; + unsigned char req[2], resp_hdr[2]; + struct spi_transfer tx, rx; + unsigned short rx_len = 0; + int ret; + + spi_message_init(&m); + req[0] = NCI_SPI_DIRECT_READ; + req[1] = ndev->acknowledge_mode; + tx.tx_buf = req; + tx.len = 2; + tx.cs_change = 0; + spi_message_add_tail(&tx, &m); + rx.rx_buf = resp_hdr; + rx.len = 2; + rx.cs_change = 1; + spi_message_add_tail(&rx, &m); + ret = spi_sync(ndev->spi, &m); + + if (ret) + return NULL; + + if (ndev->acknowledge_mode == NCI_SPI_CRC_ENABLED) + rx_len = ((resp_hdr[0] & NCI_SPI_MSB_PAYLOAD_MASK) << 8) + + resp_hdr[1] + NCI_SPI_CRC_LEN; + else + rx_len = (resp_hdr[0] << 8) | resp_hdr[1]; + + skb = nci_skb_alloc(ndev->nci_dev, rx_len, GFP_KERNEL); + if (!skb) + return NULL; + + spi_message_init(&m); + rx.rx_buf = skb_put(skb, rx_len); + rx.len = rx_len; + rx.cs_change = 0; + rx.delay_usecs = ndev->xfer_udelay; + spi_message_add_tail(&rx, &m); + ret = spi_sync(ndev->spi, &m); + + if (ret) + goto receive_error; + + if (ndev->acknowledge_mode == NCI_SPI_CRC_ENABLED) { + *skb_push(skb, 1) = resp_hdr[1]; + *skb_push(skb, 1) = resp_hdr[0]; + } + + return skb; + +receive_error: + kfree_skb(skb); + + return NULL; +} + +static int nci_spi_check_crc(struct sk_buff *skb) +{ + u16 crc_data = (skb->data[skb->len - 2] << 8) | + skb->data[skb->len - 1]; + int ret; + + ret = (crc_ccitt(CRC_INIT, skb->data, skb->len - NCI_SPI_CRC_LEN) + == crc_data); + + skb_trim(skb, skb->len - NCI_SPI_CRC_LEN); + + return ret; +} + +static u8 nci_spi_get_ack(struct sk_buff *skb) +{ + u8 ret; + + ret = skb->data[0] >> NCI_SPI_ACK_SHIFT; + + /* Remove NFCC part of the header: ACK, NACK and MSB payload len */ + skb_pull(skb, 2); + + return ret; +} + +/** + * nci_spi_recv_frame - receive frame from NCI SPI drivers + * + * @ndev: The nci spi device + * Context: can sleep + * + * This call may only be used from a context that may sleep. The sleep + * is non-interruptible, and has no timeout. + * + * It returns zero on success, else a negative error code. + */ +int nci_spi_recv_frame(struct nci_spi_dev *ndev) +{ + struct sk_buff *skb; + int ret = 0; + + ndev->ops->deassert_int(ndev); + + /* Retrieve frame from SPI */ + skb = __nci_spi_recv_frame(ndev); + if (!skb) { + ret = -EIO; + goto done; + } + + if (ndev->acknowledge_mode == NCI_SPI_CRC_ENABLED) { + if (!nci_spi_check_crc(skb)) { + send_acknowledge(ndev, ACKNOWLEDGE_NACK); + goto done; + } + + /* In case of acknowledged mode: if ACK or NACK received, + * unblock completion of latest frame sent. + */ + ndev->req_result = nci_spi_get_ack(skb); + if (ndev->req_result) + complete(&ndev->req_completion); + } + + /* If there is no payload (ACK/NACK only frame), + * free the socket buffer + */ + if (skb->len == 0) { + kfree_skb(skb); + goto done; + } + + if (ndev->acknowledge_mode == NCI_SPI_CRC_ENABLED) + send_acknowledge(ndev, ACKNOWLEDGE_ACK); + + /* Forward skb to NCI core layer */ + ret = nci_recv_frame(ndev->nci_dev, skb); + +done: + ndev->ops->assert_int(ndev); + + return ret; +} +EXPORT_SYMBOL_GPL(nci_spi_recv_frame); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index f0c4d61f37c0..f16fd59d4160 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -56,6 +56,8 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_LLC_PARAM_RW] = { .type = NLA_U8 }, [NFC_ATTR_LLC_PARAM_MIUX] = { .type = NLA_U16 }, [NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED }, + [NFC_ATTR_FIRMWARE_NAME] = { .type = NLA_STRING, + .len = NFC_FIRMWARE_NAME_MAXSIZE }, }; static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = { @@ -424,6 +426,69 @@ free_msg: return rc; } +int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_EVENT_SE_ADDED); + if (!hdr) + goto free_msg; + + if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || + nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) || + nla_put_u8(msg, NFC_ATTR_SE_TYPE, type)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + +int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_EVENT_SE_REMOVED); + if (!hdr) + goto free_msg; + + if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || + nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, u32 portid, u32 seq, struct netlink_callback *cb, @@ -442,7 +507,6 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, if (nla_put_string(msg, NFC_ATTR_DEVICE_NAME, nfc_device_name(dev)) || nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_PROTOCOLS, dev->supported_protocols) || - nla_put_u32(msg, NFC_ATTR_SE, dev->supported_se) || nla_put_u8(msg, NFC_ATTR_DEVICE_POWERED, dev->dev_up) || nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode)) goto nla_put_failure; @@ -1025,6 +1089,108 @@ exit: return rc; } +static int nfc_genl_fw_download(struct sk_buff *skb, struct genl_info *info) +{ + struct nfc_dev *dev; + int rc; + u32 idx; + char firmware_name[NFC_FIRMWARE_NAME_MAXSIZE + 1]; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + return -EINVAL; + + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + + dev = nfc_get_device(idx); + if (!dev) + return -ENODEV; + + nla_strlcpy(firmware_name, info->attrs[NFC_ATTR_FIRMWARE_NAME], + sizeof(firmware_name)); + + rc = nfc_fw_download(dev, firmware_name); + + nfc_put_device(dev); + return rc; +} + +int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, + NFC_CMD_FW_DOWNLOAD); + if (!hdr) + goto free_msg; + + if (nla_put_string(msg, NFC_ATTR_FIRMWARE_NAME, firmware_name) || + nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_KERNEL); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); +free_msg: + nlmsg_free(msg); + return -EMSGSIZE; +} + +static int nfc_genl_enable_se(struct sk_buff *skb, struct genl_info *info) +{ + struct nfc_dev *dev; + int rc; + u32 idx, se_idx; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_SE_INDEX]) + return -EINVAL; + + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + se_idx = nla_get_u32(info->attrs[NFC_ATTR_SE_INDEX]); + + dev = nfc_get_device(idx); + if (!dev) + return -ENODEV; + + rc = nfc_enable_se(dev, se_idx); + + nfc_put_device(dev); + return rc; +} + +static int nfc_genl_disable_se(struct sk_buff *skb, struct genl_info *info) +{ + struct nfc_dev *dev; + int rc; + u32 idx, se_idx; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_SE_INDEX]) + return -EINVAL; + + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + se_idx = nla_get_u32(info->attrs[NFC_ATTR_SE_INDEX]); + + dev = nfc_get_device(idx); + if (!dev) + return -ENODEV; + + rc = nfc_disable_se(dev, se_idx); + + nfc_put_device(dev); + return rc; +} + static struct genl_ops nfc_genl_ops[] = { { .cmd = NFC_CMD_GET_DEVICE, @@ -1084,6 +1250,21 @@ static struct genl_ops nfc_genl_ops[] = { .doit = nfc_genl_llc_sdreq, .policy = nfc_genl_policy, }, + { + .cmd = NFC_CMD_FW_DOWNLOAD, + .doit = nfc_genl_fw_download, + .policy = nfc_genl_policy, + }, + { + .cmd = NFC_CMD_ENABLE_SE, + .doit = nfc_genl_enable_se, + .policy = nfc_genl_policy, + }, + { + .cmd = NFC_CMD_DISABLE_SE, + .doit = nfc_genl_disable_se, + .policy = nfc_genl_policy, + }, }; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index afa1f84ba040..820a7850c36a 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -94,6 +94,9 @@ int nfc_genl_tm_deactivated(struct nfc_dev *dev); int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list); +int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type); +int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx); + struct nfc_dev *nfc_get_device(unsigned int idx); static inline void nfc_put_device(struct nfc_dev *dev) @@ -120,6 +123,11 @@ static inline void nfc_device_iter_exit(struct class_dev_iter *iter) class_dev_iter_exit(iter); } +int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name); +int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name); + +int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name); + int nfc_dev_up(struct nfc_dev *dev); int nfc_dev_down(struct nfc_dev *dev); @@ -139,4 +147,7 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx); int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb, data_exchange_cb_t cb, void *cb_context); +int nfc_enable_se(struct nfc_dev *dev, u32 se_idx); +int nfc_disable_se(struct nfc_dev *dev, u32 se_idx); + #endif /* __LOCAL_NFC_H */ diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index d9ea33c361be..27ee56b688a3 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -26,3 +26,17 @@ config OPENVSWITCH called openvswitch. If unsure, say N. + +config OPENVSWITCH_GRE + bool "Open vSwitch GRE tunneling support" + depends on INET + depends on OPENVSWITCH + depends on NET_IPGRE_DEMUX && !(OPENVSWITCH=y && NET_IPGRE_DEMUX=m) + default y + ---help--- + If you say Y here, then the Open vSwitch will be able create GRE + vport. + + Say N to exclude this support and reduce the binary size. + + If unsure, say Y. diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile index 15e7384745c1..01bddb2991e3 100644 --- a/net/openvswitch/Makefile +++ b/net/openvswitch/Makefile @@ -10,5 +10,6 @@ openvswitch-y := \ dp_notify.o \ flow.o \ vport.o \ + vport-gre.o \ vport-internal_dev.o \ - vport-netdev.o \ + vport-netdev.o diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 894b6cbdd929..22c5f399f1cf 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -130,9 +130,13 @@ static int set_eth_addr(struct sk_buff *skb, if (unlikely(err)) return err; + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); + memcpy(eth_hdr(skb)->h_source, eth_key->eth_src, ETH_ALEN); memcpy(eth_hdr(skb)->h_dest, eth_key->eth_dst, ETH_ALEN); + ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2); + return 0; } @@ -432,6 +436,10 @@ static int execute_set_action(struct sk_buff *skb, skb->mark = nla_get_u32(nested_attr); break; + case OVS_KEY_ATTR_IPV4_TUNNEL: + OVS_CB(skb)->tun_key = nla_data(nested_attr); + break; + case OVS_KEY_ATTR_ETHERNET: err = set_eth_addr(skb, nla_data(nested_attr)); break; diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index d12d6b8b5e8b..f7e3a0d84c40 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -362,6 +362,14 @@ static int queue_gso_packets(struct net *net, int dp_ifindex, static size_t key_attr_size(void) { return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ + + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ + + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ + + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ + + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ + + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ @@ -464,16 +472,89 @@ static int flush_flows(struct datapath *dp) return 0; } -static int validate_actions(const struct nlattr *attr, - const struct sw_flow_key *key, int depth); +static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len) +{ + + struct sw_flow_actions *acts; + int new_acts_size; + int req_size = NLA_ALIGN(attr_len); + int next_offset = offsetof(struct sw_flow_actions, actions) + + (*sfa)->actions_len; + + if (req_size <= (ksize(*sfa) - next_offset)) + goto out; + + new_acts_size = ksize(*sfa) * 2; + + if (new_acts_size > MAX_ACTIONS_BUFSIZE) { + if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) + return ERR_PTR(-EMSGSIZE); + new_acts_size = MAX_ACTIONS_BUFSIZE; + } + + acts = ovs_flow_actions_alloc(new_acts_size); + if (IS_ERR(acts)) + return (void *)acts; + + memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); + acts->actions_len = (*sfa)->actions_len; + kfree(*sfa); + *sfa = acts; + +out: + (*sfa)->actions_len += req_size; + return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); +} + +static int add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len) +{ + struct nlattr *a; + + a = reserve_sfa_size(sfa, nla_attr_size(len)); + if (IS_ERR(a)) + return PTR_ERR(a); + + a->nla_type = attrtype; + a->nla_len = nla_attr_size(len); + + if (data) + memcpy(nla_data(a), data, len); + memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); + + return 0; +} + +static inline int add_nested_action_start(struct sw_flow_actions **sfa, int attrtype) +{ + int used = (*sfa)->actions_len; + int err; + + err = add_action(sfa, attrtype, NULL, 0); + if (err) + return err; + + return used; +} -static int validate_sample(const struct nlattr *attr, - const struct sw_flow_key *key, int depth) +static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_offset) +{ + struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + st_offset); + + a->nla_len = sfa->actions_len - st_offset; +} + +static int validate_and_copy_actions(const struct nlattr *attr, + const struct sw_flow_key *key, int depth, + struct sw_flow_actions **sfa); + +static int validate_and_copy_sample(const struct nlattr *attr, + const struct sw_flow_key *key, int depth, + struct sw_flow_actions **sfa) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; const struct nlattr *a; - int rem; + int rem, start, err, st_acts; memset(attrs, 0, sizeof(attrs)); nla_for_each_nested(a, attr, rem) { @@ -492,7 +573,26 @@ static int validate_sample(const struct nlattr *attr, actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) return -EINVAL; - return validate_actions(actions, key, depth + 1); + + /* validation done, copy sample action. */ + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE); + if (start < 0) + return start; + err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY, nla_data(probability), sizeof(u32)); + if (err) + return err; + st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS); + if (st_acts < 0) + return st_acts; + + err = validate_and_copy_actions(actions, key, depth + 1, sfa); + if (err) + return err; + + add_nested_action_end(*sfa, st_acts); + add_nested_action_end(*sfa, start); + + return 0; } static int validate_tp_port(const struct sw_flow_key *flow_key) @@ -508,8 +608,30 @@ static int validate_tp_port(const struct sw_flow_key *flow_key) return -EINVAL; } +static int validate_and_copy_set_tun(const struct nlattr *attr, + struct sw_flow_actions **sfa) +{ + struct ovs_key_ipv4_tunnel tun_key; + int err, start; + + err = ovs_ipv4_tun_from_nlattr(nla_data(attr), &tun_key); + if (err) + return err; + + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET); + if (start < 0) + return start; + + err = add_action(sfa, OVS_KEY_ATTR_IPV4_TUNNEL, &tun_key, sizeof(tun_key)); + add_nested_action_end(*sfa, start); + + return err; +} + static int validate_set(const struct nlattr *a, - const struct sw_flow_key *flow_key) + const struct sw_flow_key *flow_key, + struct sw_flow_actions **sfa, + bool *set_tun) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); @@ -519,18 +641,27 @@ static int validate_set(const struct nlattr *a, return -EINVAL; if (key_type > OVS_KEY_ATTR_MAX || - nla_len(ovs_key) != ovs_key_lens[key_type]) + (ovs_key_lens[key_type] != nla_len(ovs_key) && + ovs_key_lens[key_type] != -1)) return -EINVAL; switch (key_type) { const struct ovs_key_ipv4 *ipv4_key; const struct ovs_key_ipv6 *ipv6_key; + int err; case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_SKB_MARK: case OVS_KEY_ATTR_ETHERNET: break; + case OVS_KEY_ATTR_TUNNEL: + *set_tun = true; + err = validate_and_copy_set_tun(a, sfa); + if (err) + return err; + break; + case OVS_KEY_ATTR_IPV4: if (flow_key->eth.type != htons(ETH_P_IP)) return -EINVAL; @@ -606,8 +737,24 @@ static int validate_userspace(const struct nlattr *attr) return 0; } -static int validate_actions(const struct nlattr *attr, - const struct sw_flow_key *key, int depth) +static int copy_action(const struct nlattr *from, + struct sw_flow_actions **sfa) +{ + int totlen = NLA_ALIGN(from->nla_len); + struct nlattr *to; + + to = reserve_sfa_size(sfa, from->nla_len); + if (IS_ERR(to)) + return PTR_ERR(to); + + memcpy(to, from, totlen); + return 0; +} + +static int validate_and_copy_actions(const struct nlattr *attr, + const struct sw_flow_key *key, + int depth, + struct sw_flow_actions **sfa) { const struct nlattr *a; int rem, err; @@ -627,12 +774,14 @@ static int validate_actions(const struct nlattr *attr, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); + bool skip_copy; if (type > OVS_ACTION_ATTR_MAX || (action_lens[type] != nla_len(a) && action_lens[type] != (u32)-1)) return -EINVAL; + skip_copy = false; switch (type) { case OVS_ACTION_ATTR_UNSPEC: return -EINVAL; @@ -661,20 +810,26 @@ static int validate_actions(const struct nlattr *attr, break; case OVS_ACTION_ATTR_SET: - err = validate_set(a, key); + err = validate_set(a, key, sfa, &skip_copy); if (err) return err; break; case OVS_ACTION_ATTR_SAMPLE: - err = validate_sample(a, key, depth); + err = validate_and_copy_sample(a, key, depth, sfa); if (err) return err; + skip_copy = true; break; default: return -EINVAL; } + if (!skip_copy) { + err = copy_action(a, sfa); + if (err) + return err; + } } if (rem > 0) @@ -739,24 +894,18 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) if (err) goto err_flow_free; - err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority, - &flow->key.phy.skb_mark, - &flow->key.phy.in_port, - a[OVS_PACKET_ATTR_KEY]); - if (err) - goto err_flow_free; - - err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0); + err = ovs_flow_metadata_from_nlattrs(flow, key_len, a[OVS_PACKET_ATTR_KEY]); if (err) goto err_flow_free; - - flow->hash = ovs_flow_hash(&flow->key, key_len); - - acts = ovs_flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]); + acts = ovs_flow_actions_alloc(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); err = PTR_ERR(acts); if (IS_ERR(acts)) goto err_flow_free; + + err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0, &acts); rcu_assign_pointer(flow->sf_acts, acts); + if (err) + goto err_flow_free; OVS_CB(packet)->flow = flow; packet->priority = flow->key.phy.priority; @@ -846,6 +995,99 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = { .name = OVS_FLOW_MCGROUP }; +static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb); +static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) +{ + const struct nlattr *a; + struct nlattr *start; + int err = 0, rem; + + start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE); + if (!start) + return -EMSGSIZE; + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + struct nlattr *st_sample; + + switch (type) { + case OVS_SAMPLE_ATTR_PROBABILITY: + if (nla_put(skb, OVS_SAMPLE_ATTR_PROBABILITY, sizeof(u32), nla_data(a))) + return -EMSGSIZE; + break; + case OVS_SAMPLE_ATTR_ACTIONS: + st_sample = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS); + if (!st_sample) + return -EMSGSIZE; + err = actions_to_attr(nla_data(a), nla_len(a), skb); + if (err) + return err; + nla_nest_end(skb, st_sample); + break; + } + } + + nla_nest_end(skb, start); + return err; +} + +static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) +{ + const struct nlattr *ovs_key = nla_data(a); + int key_type = nla_type(ovs_key); + struct nlattr *start; + int err; + + switch (key_type) { + case OVS_KEY_ATTR_IPV4_TUNNEL: + start = nla_nest_start(skb, OVS_ACTION_ATTR_SET); + if (!start) + return -EMSGSIZE; + + err = ovs_ipv4_tun_to_nlattr(skb, nla_data(ovs_key)); + if (err) + return err; + nla_nest_end(skb, start); + break; + default: + if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) + return -EMSGSIZE; + break; + } + + return 0; +} + +static int actions_to_attr(const struct nlattr *attr, int len, struct sk_buff *skb) +{ + const struct nlattr *a; + int rem, err; + + nla_for_each_attr(a, attr, len, rem) { + int type = nla_type(a); + + switch (type) { + case OVS_ACTION_ATTR_SET: + err = set_action_to_attr(a, skb); + if (err) + return err; + break; + + case OVS_ACTION_ATTR_SAMPLE: + err = sample_action_to_attr(a, skb); + if (err) + return err; + break; + default: + if (nla_put(skb, type, nla_len(a), nla_data(a))) + return -EMSGSIZE; + break; + } + } + + return 0; +} + static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) { return NLMSG_ALIGN(sizeof(struct ovs_header)) @@ -863,6 +1105,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, { const int skb_orig_len = skb->len; const struct sw_flow_actions *sf_acts; + struct nlattr *start; struct ovs_flow_stats stats; struct ovs_header *ovs_header; struct nlattr *nla; @@ -916,10 +1159,19 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, * This can only fail for dump operations because the skb is always * properly sized for single flows. */ - err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len, - sf_acts->actions); - if (err < 0 && skb_orig_len) - goto error; + start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS); + if (start) { + err = actions_to_attr(sf_acts->actions, sf_acts->actions_len, skb); + if (!err) + nla_nest_end(skb, start); + else { + if (skb_orig_len) + goto error; + + nla_nest_cancel(skb, start); + } + } else if (skb_orig_len) + goto nla_put_failure; return genlmsg_end(skb, ovs_header); @@ -964,6 +1216,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) struct sk_buff *reply; struct datapath *dp; struct flow_table *table; + struct sw_flow_actions *acts = NULL; int error; int key_len; @@ -977,9 +1230,14 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) /* Validate actions. */ if (a[OVS_FLOW_ATTR_ACTIONS]) { - error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0); - if (error) + acts = ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); + error = PTR_ERR(acts); + if (IS_ERR(acts)) goto error; + + error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, 0, &acts); + if (error) + goto err_kfree; } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) { error = -EINVAL; goto error; @@ -994,8 +1252,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) table = ovsl_dereference(dp->table); flow = ovs_flow_tbl_lookup(table, &key, key_len); if (!flow) { - struct sw_flow_actions *acts; - /* Bail out if we're not allowed to create a new flow. */ error = -ENOENT; if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) @@ -1019,19 +1275,12 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) error = PTR_ERR(flow); goto err_unlock_ovs; } - flow->key = key; clear_stats(flow); - /* Obtain actions. */ - acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]); - error = PTR_ERR(acts); - if (IS_ERR(acts)) - goto error_free_flow; rcu_assign_pointer(flow->sf_acts, acts); /* Put flow in bucket. */ - flow->hash = ovs_flow_hash(&key, key_len); - ovs_flow_tbl_insert(table, flow); + ovs_flow_tbl_insert(table, flow, &key, key_len); reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, @@ -1039,7 +1288,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) } else { /* We found a matching flow. */ struct sw_flow_actions *old_acts; - struct nlattr *acts_attrs; /* Bail out if we're not allowed to modify an existing flow. * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL @@ -1054,21 +1302,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) /* Update actions. */ old_acts = ovsl_dereference(flow->sf_acts); - acts_attrs = a[OVS_FLOW_ATTR_ACTIONS]; - if (acts_attrs && - (old_acts->actions_len != nla_len(acts_attrs) || - memcmp(old_acts->actions, nla_data(acts_attrs), - old_acts->actions_len))) { - struct sw_flow_actions *new_acts; - - new_acts = ovs_flow_actions_alloc(acts_attrs); - error = PTR_ERR(new_acts); - if (IS_ERR(new_acts)) - goto err_unlock_ovs; - - rcu_assign_pointer(flow->sf_acts, new_acts); - ovs_flow_deferred_free_acts(old_acts); - } + rcu_assign_pointer(flow->sf_acts, acts); + ovs_flow_deferred_free_acts(old_acts); reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); @@ -1089,10 +1324,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); return 0; -error_free_flow: - ovs_flow_free(flow); err_unlock_ovs: ovs_unlock(); +err_kfree: + kfree(acts); error: return error; } @@ -1812,10 +2047,11 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(vport)) goto exit_unlock; - err = 0; if (a[OVS_VPORT_ATTR_TYPE] && - nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) + nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) { err = -EINVAL; + goto exit_unlock; + } reply = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!reply) { @@ -1823,10 +2059,11 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) goto exit_unlock; } - if (!err && a[OVS_VPORT_ATTR_OPTIONS]) + if (a[OVS_VPORT_ATTR_OPTIONS]) { err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]); - if (err) - goto exit_free; + if (err) + goto exit_free; + } if (a[OVS_VPORT_ATTR_UPCALL_PID]) vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); @@ -1867,8 +2104,8 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) goto exit_unlock; } - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, - OVS_VPORT_CMD_DEL); + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, + info->snd_seq, OVS_VPORT_CMD_DEL); err = PTR_ERR(reply); if (IS_ERR(reply)) goto exit_unlock; @@ -1897,8 +2134,8 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(vport)) goto exit_unlock; - reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, - OVS_VPORT_CMD_NEW); + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, + info->snd_seq, OVS_VPORT_CMD_NEW); err = PTR_ERR(reply); if (IS_ERR(reply)) goto exit_unlock; diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 16b840695216..a91486484916 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -88,9 +88,12 @@ struct datapath { /** * struct ovs_skb_cb - OVS data in skb CB * @flow: The flow associated with this packet. May be %NULL if no flow. + * @tun_key: Key for the tunnel that encapsulated this packet. NULL if the + * packet is not being tunneled. */ struct ovs_skb_cb { struct sw_flow *flow; + struct ovs_key_ipv4_tunnel *tun_key; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) @@ -119,6 +122,7 @@ struct dp_upcall_info { struct ovs_net { struct list_head dps; struct work_struct dp_notify_work; + struct vport_net vport_net; }; extern int ovs_net_id; diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c index ef4feec6cd84..c3235675f359 100644 --- a/net/openvswitch/dp_notify.c +++ b/net/openvswitch/dp_notify.c @@ -78,7 +78,7 @@ static int dp_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct ovs_net *ovs_net; - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct vport *vport = NULL; if (!ovs_is_internal_dev(dev)) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index b15321a2228c..5c519b121e1b 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -40,6 +40,7 @@ #include <linux/icmpv6.h> #include <linux/rculist.h> #include <net/ip.h> +#include <net/ip_tunnels.h> #include <net/ipv6.h> #include <net/ndisc.h> @@ -198,20 +199,18 @@ void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb) spin_unlock(&flow->lock); } -struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions) +struct sw_flow_actions *ovs_flow_actions_alloc(int size) { - int actions_len = nla_len(actions); struct sw_flow_actions *sfa; - if (actions_len > MAX_ACTIONS_BUFSIZE) + if (size > MAX_ACTIONS_BUFSIZE) return ERR_PTR(-EINVAL); - sfa = kmalloc(sizeof(*sfa) + actions_len, GFP_KERNEL); + sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); if (!sfa) return ERR_PTR(-ENOMEM); - sfa->actions_len = actions_len; - nla_memcpy(sfa->actions, actions, actions_len); + sfa->actions_len = 0; return sfa; } @@ -354,6 +353,14 @@ struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *la return NULL; } +static void __flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) +{ + struct hlist_head *head; + head = find_bucket(table, flow->hash); + hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); + table->count++; +} + static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new) { int old_ver; @@ -370,7 +377,7 @@ static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new head = flex_array_get(old->buckets, i); hlist_for_each_entry(flow, head, hash_node[old_ver]) - ovs_flow_tbl_insert(new, flow); + __flow_tbl_insert(new, flow); } old->keep_flows = true; } @@ -590,10 +597,10 @@ out: * - skb->network_header: just past the Ethernet header, or just past the * VLAN header, to the first byte of the Ethernet payload. * - * - skb->transport_header: If key->dl_type is ETH_P_IP or ETH_P_IPV6 + * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6 * on output, then just past the IP header, if one is present and * of a correct length, otherwise the same as skb->network_header. - * For other key->dl_type values it is left untouched. + * For other key->eth.type values it is left untouched. */ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, int *key_lenp) @@ -605,6 +612,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, memset(key, 0, sizeof(*key)); key->phy.priority = skb->priority; + if (OVS_CB(skb)->tun_key) + memcpy(&key->tun_key, OVS_CB(skb)->tun_key, sizeof(key->tun_key)); key->phy.in_port = in_port; key->phy.skb_mark = skb->mark; @@ -618,6 +627,9 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, memcpy(key->eth.dst, eth->h_dest, ETH_ALEN); __skb_pull(skb, 2 * ETH_ALEN); + /* We are going to push all headers that we pull, so no need to + * update skb->csum here. + */ if (vlan_tx_tag_present(skb)) key->eth.tci = htons(skb->vlan_tci); @@ -759,9 +771,18 @@ out: return error; } -u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len) +static u32 ovs_flow_hash(const struct sw_flow_key *key, int key_start, int key_len) +{ + return jhash2((u32 *)((u8 *)key + key_start), + DIV_ROUND_UP(key_len - key_start, sizeof(u32)), 0); +} + +static int flow_key_start(struct sw_flow_key *key) { - return jhash2((u32 *)key, DIV_ROUND_UP(key_len, sizeof(u32)), 0); + if (key->tun_key.ipv4_dst) + return 0; + else + return offsetof(struct sw_flow_key, phy); } struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, @@ -769,28 +790,31 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table, { struct sw_flow *flow; struct hlist_head *head; + u8 *_key; + int key_start; u32 hash; - hash = ovs_flow_hash(key, key_len); + key_start = flow_key_start(key); + hash = ovs_flow_hash(key, key_start, key_len); + _key = (u8 *) key + key_start; head = find_bucket(table, hash); hlist_for_each_entry_rcu(flow, head, hash_node[table->node_ver]) { if (flow->hash == hash && - !memcmp(&flow->key, key, key_len)) { + !memcmp((u8 *)&flow->key + key_start, _key, key_len - key_start)) { return flow; } } return NULL; } -void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow) +void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, + struct sw_flow_key *key, int key_len) { - struct hlist_head *head; - - head = find_bucket(table, flow->hash); - hlist_add_head_rcu(&flow->hash_node[table->node_ver], head); - table->count++; + flow->hash = ovs_flow_hash(key, flow_key_start(key), key_len); + memcpy(&flow->key, key, sizeof(flow->key)); + __flow_tbl_insert(table, flow); } void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) @@ -817,6 +841,7 @@ const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), + [OVS_KEY_ATTR_TUNNEL] = -1, }; static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len, @@ -954,6 +979,105 @@ static int parse_flow_nlattrs(const struct nlattr *attr, return 0; } +int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, + struct ovs_key_ipv4_tunnel *tun_key) +{ + struct nlattr *a; + int rem; + bool ttl = false; + + memset(tun_key, 0, sizeof(*tun_key)); + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { + [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), + [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), + [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), + [OVS_TUNNEL_KEY_ATTR_TOS] = 1, + [OVS_TUNNEL_KEY_ATTR_TTL] = 1, + [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, + [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, + }; + + if (type > OVS_TUNNEL_KEY_ATTR_MAX || + ovs_tunnel_key_lens[type] != nla_len(a)) + return -EINVAL; + + switch (type) { + case OVS_TUNNEL_KEY_ATTR_ID: + tun_key->tun_id = nla_get_be64(a); + tun_key->tun_flags |= TUNNEL_KEY; + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: + tun_key->ipv4_src = nla_get_be32(a); + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_DST: + tun_key->ipv4_dst = nla_get_be32(a); + break; + case OVS_TUNNEL_KEY_ATTR_TOS: + tun_key->ipv4_tos = nla_get_u8(a); + break; + case OVS_TUNNEL_KEY_ATTR_TTL: + tun_key->ipv4_ttl = nla_get_u8(a); + ttl = true; + break; + case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: + tun_key->tun_flags |= TUNNEL_DONT_FRAGMENT; + break; + case OVS_TUNNEL_KEY_ATTR_CSUM: + tun_key->tun_flags |= TUNNEL_CSUM; + break; + default: + return -EINVAL; + + } + } + if (rem > 0) + return -EINVAL; + + if (!tun_key->ipv4_dst) + return -EINVAL; + + if (!ttl) + return -EINVAL; + + return 0; +} + +int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, + const struct ovs_key_ipv4_tunnel *tun_key) +{ + struct nlattr *nla; + + nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL); + if (!nla) + return -EMSGSIZE; + + if (tun_key->tun_flags & TUNNEL_KEY && + nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, tun_key->tun_id)) + return -EMSGSIZE; + if (tun_key->ipv4_src && + nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, tun_key->ipv4_src)) + return -EMSGSIZE; + if (nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, tun_key->ipv4_dst)) + return -EMSGSIZE; + if (tun_key->ipv4_tos && + nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, tun_key->ipv4_tos)) + return -EMSGSIZE; + if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, tun_key->ipv4_ttl)) + return -EMSGSIZE; + if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) && + nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) + return -EMSGSIZE; + if ((tun_key->tun_flags & TUNNEL_CSUM) && + nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) + return -EMSGSIZE; + + nla_nest_end(skb, nla); + return 0; +} + /** * ovs_flow_from_nlattrs - parses Netlink attributes into a flow key. * @swkey: receives the extracted flow key. @@ -996,6 +1120,14 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); } + if (attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { + err = ovs_ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], &swkey->tun_key); + if (err) + return err; + + attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); + } + /* Data attributes. */ if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET))) return -EINVAL; @@ -1122,10 +1254,9 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, /** * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. - * @priority: receives the skb priority - * @mark: receives the skb mark - * @in_port: receives the extracted input port. - * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute + * @flow: Receives extracted in_port, priority, tun_key and skb_mark. + * @key_len: Length of key in @flow. Used for calculating flow hash. + * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute * sequence. * * This parses a series of Netlink attributes that form a flow key, which must @@ -1133,42 +1264,56 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, * get the metadata, that is, the parts of the flow key that cannot be * extracted from the packet itself. */ -int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port, - const struct nlattr *attr) +int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, + const struct nlattr *attr) { + struct ovs_key_ipv4_tunnel *tun_key = &flow->key.tun_key; const struct nlattr *nla; int rem; - *in_port = DP_MAX_PORTS; - *priority = 0; - *mark = 0; + flow->key.phy.in_port = DP_MAX_PORTS; + flow->key.phy.priority = 0; + flow->key.phy.skb_mark = 0; + memset(tun_key, 0, sizeof(flow->key.tun_key)); nla_for_each_nested(nla, attr, rem) { int type = nla_type(nla); if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] > 0) { + int err; + if (nla_len(nla) != ovs_key_lens[type]) return -EINVAL; switch (type) { case OVS_KEY_ATTR_PRIORITY: - *priority = nla_get_u32(nla); + flow->key.phy.priority = nla_get_u32(nla); + break; + + case OVS_KEY_ATTR_TUNNEL: + err = ovs_ipv4_tun_from_nlattr(nla, tun_key); + if (err) + return err; break; case OVS_KEY_ATTR_IN_PORT: if (nla_get_u32(nla) >= DP_MAX_PORTS) return -EINVAL; - *in_port = nla_get_u32(nla); + flow->key.phy.in_port = nla_get_u32(nla); break; case OVS_KEY_ATTR_SKB_MARK: - *mark = nla_get_u32(nla); + flow->key.phy.skb_mark = nla_get_u32(nla); break; } } } if (rem) return -EINVAL; + + flow->hash = ovs_flow_hash(&flow->key, + flow_key_start(&flow->key), key_len); + return 0; } @@ -1181,6 +1326,10 @@ int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority)) goto nla_put_failure; + if (swkey->tun_key.ipv4_dst && + ovs_ipv4_tun_to_nlattr(skb, &swkey->tun_key)) + goto nla_put_failure; + if (swkey->phy.in_port != DP_MAX_PORTS && nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port)) goto nla_put_failure; diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 0875fde65b9c..66ef7220293e 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -40,7 +40,38 @@ struct sw_flow_actions { struct nlattr actions[]; }; +/* Used to memset ovs_key_ipv4_tunnel padding. */ +#define OVS_TUNNEL_KEY_SIZE \ + (offsetof(struct ovs_key_ipv4_tunnel, ipv4_ttl) + \ + FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, ipv4_ttl)) + +struct ovs_key_ipv4_tunnel { + __be64 tun_id; + __be32 ipv4_src; + __be32 ipv4_dst; + __be16 tun_flags; + u8 ipv4_tos; + u8 ipv4_ttl; +}; + +static inline void ovs_flow_tun_key_init(struct ovs_key_ipv4_tunnel *tun_key, + const struct iphdr *iph, __be64 tun_id, + __be16 tun_flags) +{ + tun_key->tun_id = tun_id; + tun_key->ipv4_src = iph->saddr; + tun_key->ipv4_dst = iph->daddr; + tun_key->ipv4_tos = iph->tos; + tun_key->ipv4_ttl = iph->ttl; + tun_key->tun_flags = tun_flags; + + /* clear struct padding. */ + memset((unsigned char *) tun_key + OVS_TUNNEL_KEY_SIZE, 0, + sizeof(*tun_key) - OVS_TUNNEL_KEY_SIZE); +} + struct sw_flow_key { + struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */ struct { u32 priority; /* Packet QoS priority. */ u32 skb_mark; /* SKB mark. */ @@ -130,7 +161,7 @@ struct sw_flow *ovs_flow_alloc(void); void ovs_flow_deferred_free(struct sw_flow *); void ovs_flow_free(struct sw_flow *flow); -struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *); +struct sw_flow_actions *ovs_flow_actions_alloc(int actions_len); void ovs_flow_deferred_free_acts(struct sw_flow_actions *); int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, @@ -141,10 +172,10 @@ u64 ovs_flow_used_time(unsigned long flow_jiffies); int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, const struct nlattr *); -int ovs_flow_metadata_from_nlattrs(u32 *priority, u32 *mark, u16 *in_port, - const struct nlattr *); +int ovs_flow_metadata_from_nlattrs(struct sw_flow *flow, int key_len, + const struct nlattr *attr); -#define MAX_ACTIONS_BUFSIZE (16 * 1024) +#define MAX_ACTIONS_BUFSIZE (32 * 1024) #define TBL_MIN_BUCKETS 1024 struct flow_table { @@ -173,11 +204,15 @@ void ovs_flow_tbl_deferred_destroy(struct flow_table *table); struct flow_table *ovs_flow_tbl_alloc(int new_size); struct flow_table *ovs_flow_tbl_expand(struct flow_table *table); struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table); -void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow); +void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, + struct sw_flow_key *key, int key_len); void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow); -u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len); struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx); extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1]; +int ovs_ipv4_tun_from_nlattr(const struct nlattr *attr, + struct ovs_key_ipv4_tunnel *tun_key); +int ovs_ipv4_tun_to_nlattr(struct sk_buff *skb, + const struct ovs_key_ipv4_tunnel *tun_key); #endif /* flow.h */ diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c new file mode 100644 index 000000000000..493e9775dcda --- /dev/null +++ b/net/openvswitch/vport-gre.c @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2007-2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifdef CONFIG_OPENVSWITCH_GRE +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/if.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/if_tunnel.h> +#include <linux/if_vlan.h> +#include <linux/in.h> +#include <linux/if_vlan.h> +#include <linux/in.h> +#include <linux/in_route.h> +#include <linux/inetdevice.h> +#include <linux/jhash.h> +#include <linux/list.h> +#include <linux/kernel.h> +#include <linux/workqueue.h> +#include <linux/rculist.h> +#include <net/route.h> +#include <net/xfrm.h> + +#include <net/icmp.h> +#include <net/ip.h> +#include <net/ip_tunnels.h> +#include <net/gre.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> +#include <net/protocol.h> + +#include "datapath.h" +#include "vport.h" + +/* Returns the least-significant 32 bits of a __be64. */ +static __be32 be64_get_low32(__be64 x) +{ +#ifdef __BIG_ENDIAN + return (__force __be32)x; +#else + return (__force __be32)((__force u64)x >> 32); +#endif +} + +static __be16 filter_tnl_flags(__be16 flags) +{ + return flags & (TUNNEL_CSUM | TUNNEL_KEY); +} + +static struct sk_buff *__build_header(struct sk_buff *skb, + int tunnel_hlen) +{ + const struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(skb)->tun_key; + struct tnl_ptk_info tpi; + + skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM)); + if (IS_ERR(skb)) + return NULL; + + tpi.flags = filter_tnl_flags(tun_key->tun_flags); + tpi.proto = htons(ETH_P_TEB); + tpi.key = be64_get_low32(tun_key->tun_id); + tpi.seq = 0; + gre_build_header(skb, &tpi, tunnel_hlen); + + return skb; +} + +static __be64 key_to_tunnel_id(__be32 key, __be32 seq) +{ +#ifdef __BIG_ENDIAN + return (__force __be64)((__force u64)seq << 32 | (__force u32)key); +#else + return (__force __be64)((__force u64)key << 32 | (__force u32)seq); +#endif +} + +/* Called with rcu_read_lock and BH disabled. */ +static int gre_rcv(struct sk_buff *skb, + const struct tnl_ptk_info *tpi) +{ + struct ovs_key_ipv4_tunnel tun_key; + struct ovs_net *ovs_net; + struct vport *vport; + __be64 key; + + ovs_net = net_generic(dev_net(skb->dev), ovs_net_id); + vport = rcu_dereference(ovs_net->vport_net.gre_vport); + if (unlikely(!vport)) + return PACKET_REJECT; + + key = key_to_tunnel_id(tpi->key, tpi->seq); + ovs_flow_tun_key_init(&tun_key, ip_hdr(skb), key, + filter_tnl_flags(tpi->flags)); + + ovs_vport_receive(vport, skb, &tun_key); + return PACKET_RCVD; +} + +static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) +{ + struct net *net = ovs_dp_get_net(vport->dp); + struct flowi4 fl; + struct rtable *rt; + int min_headroom; + int tunnel_hlen; + __be16 df; + int err; + + if (unlikely(!OVS_CB(skb)->tun_key)) { + err = -EINVAL; + goto error; + } + + /* Route lookup */ + memset(&fl, 0, sizeof(fl)); + fl.daddr = OVS_CB(skb)->tun_key->ipv4_dst; + fl.saddr = OVS_CB(skb)->tun_key->ipv4_src; + fl.flowi4_tos = RT_TOS(OVS_CB(skb)->tun_key->ipv4_tos); + fl.flowi4_mark = skb->mark; + fl.flowi4_proto = IPPROTO_GRE; + + rt = ip_route_output_key(net, &fl); + if (IS_ERR(rt)) + return PTR_ERR(rt); + + tunnel_hlen = ip_gre_calc_hlen(OVS_CB(skb)->tun_key->tun_flags); + + min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + + tunnel_hlen + sizeof(struct iphdr) + + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); + if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { + int head_delta = SKB_DATA_ALIGN(min_headroom - + skb_headroom(skb) + + 16); + err = pskb_expand_head(skb, max_t(int, head_delta, 0), + 0, GFP_ATOMIC); + if (unlikely(err)) + goto err_free_rt; + } + + if (vlan_tx_tag_present(skb)) { + if (unlikely(!__vlan_put_tag(skb, + skb->vlan_proto, + vlan_tx_tag_get(skb)))) { + err = -ENOMEM; + goto err_free_rt; + } + skb->vlan_tci = 0; + } + + /* Push Tunnel header. */ + skb = __build_header(skb, tunnel_hlen); + if (unlikely(!skb)) { + err = 0; + goto err_free_rt; + } + + df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? + htons(IP_DF) : 0; + + skb->local_df = 1; + + return iptunnel_xmit(net, rt, skb, fl.saddr, + OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE, + OVS_CB(skb)->tun_key->ipv4_tos, + OVS_CB(skb)->tun_key->ipv4_ttl, df); +err_free_rt: + ip_rt_put(rt); +error: + return err; +} + +static struct gre_cisco_protocol gre_protocol = { + .handler = gre_rcv, + .priority = 1, +}; + +static int gre_ports; +static int gre_init(void) +{ + int err; + + gre_ports++; + if (gre_ports > 1) + return 0; + + err = gre_cisco_register(&gre_protocol); + if (err) + pr_warn("cannot register gre protocol handler\n"); + + return err; +} + +static void gre_exit(void) +{ + gre_ports--; + if (gre_ports > 0) + return; + + gre_cisco_unregister(&gre_protocol); +} + +static const char *gre_get_name(const struct vport *vport) +{ + return vport_priv(vport); +} + +static struct vport *gre_create(const struct vport_parms *parms) +{ + struct net *net = ovs_dp_get_net(parms->dp); + struct ovs_net *ovs_net; + struct vport *vport; + int err; + + err = gre_init(); + if (err) + return ERR_PTR(err); + + ovs_net = net_generic(net, ovs_net_id); + if (ovsl_dereference(ovs_net->vport_net.gre_vport)) { + vport = ERR_PTR(-EEXIST); + goto error; + } + + vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre_vport_ops, parms); + if (IS_ERR(vport)) + goto error; + + strncpy(vport_priv(vport), parms->name, IFNAMSIZ); + rcu_assign_pointer(ovs_net->vport_net.gre_vport, vport); + return vport; + +error: + gre_exit(); + return vport; +} + +static void gre_tnl_destroy(struct vport *vport) +{ + struct net *net = ovs_dp_get_net(vport->dp); + struct ovs_net *ovs_net; + + ovs_net = net_generic(net, ovs_net_id); + + rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL); + ovs_vport_deferred_free(vport); + gre_exit(); +} + +const struct vport_ops ovs_gre_vport_ops = { + .type = OVS_VPORT_TYPE_GRE, + .create = gre_create, + .destroy = gre_tnl_destroy, + .get_name = gre_get_name, + .send = gre_tnl_send, +}; + +#endif /* OPENVSWITCH_GRE */ diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 84e0a0379186..98d3edbbc235 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -67,7 +67,7 @@ static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netde static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) { rcu_read_lock(); - ovs_vport_receive(internal_dev_priv(netdev)->vport, skb); + ovs_vport_receive(internal_dev_priv(netdev)->vport, skb, NULL); rcu_read_unlock(); return 0; } @@ -221,6 +221,7 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb) skb->dev = netdev; skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, netdev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); netif_rx(skb); diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 4f01c6d2ffa4..5982f3f62835 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -49,7 +49,9 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) return; skb_push(skb, ETH_HLEN); - ovs_vport_receive(vport, skb); + ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN); + + ovs_vport_receive(vport, skb, NULL); return; error: @@ -170,7 +172,7 @@ static int netdev_send(struct vport *vport, struct sk_buff *skb) net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n", netdev_vport->dev->name, packet_length(skb), mtu); - goto error; + goto drop; } skb->dev = netdev_vport->dev; @@ -179,9 +181,8 @@ static int netdev_send(struct vport *vport, struct sk_buff *skb) return len; -error: +drop: kfree_skb(skb); - ovs_vport_record_error(vport, VPORT_E_TX_DROPPED); return 0; } diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h index a3cb3a32cd77..dd298b5c5cdb 100644 --- a/net/openvswitch/vport-netdev.h +++ b/net/openvswitch/vport-netdev.h @@ -39,6 +39,5 @@ netdev_vport_priv(const struct vport *vport) } const char *ovs_netdev_get_name(const struct vport *); -const char *ovs_netdev_get_config(const struct vport *); #endif /* vport_netdev.h */ diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 720623190eaa..d4c7fa04ce08 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -38,6 +38,10 @@ static const struct vport_ops *vport_ops_list[] = { &ovs_netdev_vport_ops, &ovs_internal_vport_ops, + +#ifdef CONFIG_OPENVSWITCH_GRE + &ovs_gre_vport_ops, +#endif }; /* Protected by RCU read lock for reading, ovs_mutex for writing. */ @@ -325,7 +329,8 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb) * Must be called with rcu_read_lock. The packet cannot be shared and * skb->data should point to the Ethernet header. */ -void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) +void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, + struct ovs_key_ipv4_tunnel *tun_key) { struct pcpu_tstats *stats; @@ -335,6 +340,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb) stats->rx_bytes += skb->len; u64_stats_update_end(&stats->syncp); + OVS_CB(skb)->tun_key = tun_key; ovs_dp_process_received_packet(vport, skb); } @@ -351,7 +357,7 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) { int sent = vport->ops->send(vport, skb); - if (likely(sent)) { + if (likely(sent > 0)) { struct pcpu_tstats *stats; stats = this_cpu_ptr(vport->percpu_stats); @@ -360,7 +366,12 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) stats->tx_packets++; stats->tx_bytes += sent; u64_stats_update_end(&stats->syncp); - } + } else if (sent < 0) { + ovs_vport_record_error(vport, VPORT_E_TX_ERROR); + kfree_skb(skb); + } else + ovs_vport_record_error(vport, VPORT_E_TX_DROPPED); + return sent; } @@ -371,7 +382,7 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb) * @err_type: one of enum vport_err_type types to indicate the error type * * If using the vport generic stats layer indicate that an error of the given - * type has occured. + * type has occurred. */ void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type) { @@ -397,3 +408,18 @@ void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type) spin_unlock(&vport->stats_lock); } + +static void free_vport_rcu(struct rcu_head *rcu) +{ + struct vport *vport = container_of(rcu, struct vport, rcu); + + ovs_vport_free(vport); +} + +void ovs_vport_deferred_free(struct vport *vport) +{ + if (!vport) + return; + + call_rcu(&vport->rcu, free_vport_rcu); +} diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 68a377bc0841..376045c42f8b 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -34,6 +34,11 @@ struct vport_parms; /* The following definitions are for users of the vport subsytem: */ +/* The following definitions are for users of the vport subsytem: */ +struct vport_net { + struct vport __rcu *gre_vport; +}; + int ovs_vport_init(void); void ovs_vport_exit(void); @@ -123,9 +128,8 @@ struct vport_parms { * existing vport to a &struct sk_buff. May be %NULL for a vport that does not * have any configuration. * @get_name: Get the device's name. - * @get_config: Get the device's configuration. - * May be null if the device does not have an ifindex. - * @send: Send a packet on the device. Returns the length of the packet sent. + * @send: Send a packet on the device. Returns the length of the packet sent, + * zero for dropped packets or negative for error. */ struct vport_ops { enum ovs_vport_type type; @@ -139,7 +143,6 @@ struct vport_ops { /* Called with rcu_read_lock or ovs_mutex. */ const char *(*get_name)(const struct vport *); - void (*get_config)(const struct vport *, void *); int (*send)(struct vport *, struct sk_buff *); }; @@ -154,6 +157,7 @@ enum vport_err_type { struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *, const struct vport_parms *); void ovs_vport_free(struct vport *); +void ovs_vport_deferred_free(struct vport *vport); #define VPORT_ALIGN 8 @@ -186,12 +190,21 @@ static inline struct vport *vport_from_priv(const void *priv) return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN)); } -void ovs_vport_receive(struct vport *, struct sk_buff *); +void ovs_vport_receive(struct vport *, struct sk_buff *, + struct ovs_key_ipv4_tunnel *); void ovs_vport_record_error(struct vport *, enum vport_err_type err_type); /* List of statically compiled vport implementations. Don't forget to also * add yours to the list at the top of vport.c. */ extern const struct vport_ops ovs_netdev_vport_ops; extern const struct vport_ops ovs_internal_vport_ops; +extern const struct vport_ops ovs_gre_vport_ops; + +static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb, + const void *start, unsigned int len) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); +} #endif /* vport.h */ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 20a1bd0e6549..4b66c752eae5 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3330,10 +3330,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, } -static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data) +static int packet_notifier(struct notifier_block *this, + unsigned long msg, void *ptr) { struct sock *sk; - struct net_device *dev = data; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); rcu_read_lock(); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 45a7df6575de..56a6146ac94b 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -292,9 +292,9 @@ static void phonet_route_autodel(struct net_device *dev) /* notify Phonet of device events */ static int phonet_device_notify(struct notifier_block *me, unsigned long what, - void *arg) + void *ptr) { - struct net_device *dev = arg; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (what) { case NETDEV_REGISTER: diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c index d6bbbbd0af18..c02a8c4bc11f 100644 --- a/net/phonet/sysctl.c +++ b/net/phonet/sysctl.c @@ -61,13 +61,13 @@ void phonet_get_local_port_range(int *min, int *max) } while (read_seqretry(&local_port_range_lock, seq)); } -static int proc_local_port_range(ctl_table *table, int write, +static int proc_local_port_range(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; int range[2] = {local_port_range[0], local_port_range[1]}; - ctl_table tmp = { + struct ctl_table tmp = { .data = &range, .maxlen = sizeof(range), .mode = table->mode, diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c index 7e643bafb4af..e4e41b3afce7 100644 --- a/net/rds/ib_sysctl.c +++ b/net/rds/ib_sysctl.c @@ -61,7 +61,7 @@ static unsigned long rds_ib_sysctl_max_unsig_wr_max = 64; */ unsigned int rds_ib_sysctl_flow_control = 0; -static ctl_table rds_ib_sysctl_table[] = { +static struct ctl_table rds_ib_sysctl_table[] = { { .procname = "max_send_wr", .data = &rds_ib_sysctl_max_send_wr, diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c index 5d5ebd576f3f..89c91515ed0c 100644 --- a/net/rds/iw_sysctl.c +++ b/net/rds/iw_sysctl.c @@ -55,7 +55,7 @@ static unsigned long rds_iw_sysctl_max_unsig_bytes_max = ~0UL; unsigned int rds_iw_sysctl_flow_control = 1; -static ctl_table rds_iw_sysctl_table[] = { +static struct ctl_table rds_iw_sysctl_table[] = { { .procname = "max_send_wr", .data = &rds_iw_sysctl_max_send_wr, diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index 907214b4c4d0..b5cb2aa08f33 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -49,7 +49,7 @@ unsigned int rds_sysctl_max_unacked_bytes = (16 << 20); unsigned int rds_sysctl_ping_enable = 1; -static ctl_table rds_sysctl_rds_table[] = { +static struct ctl_table rds_sysctl_rds_table[] = { { .procname = "reconnect_min_delay_ms", .data = &rds_sysctl_reconnect_min_jiffies, diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 9c8347451597..e98fcfbe6007 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -202,10 +202,10 @@ static void rose_kill_by_device(struct net_device *dev) /* * Handle device status changes. */ -static int rose_device_event(struct notifier_block *this, unsigned long event, - void *ptr) +static int rose_device_event(struct notifier_block *this, + unsigned long event, void *ptr) { - struct net_device *dev = (struct net_device *)ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; diff --git a/net/rose/sysctl_net_rose.c b/net/rose/sysctl_net_rose.c index 94ca9c2ccd69..89a9278795a9 100644 --- a/net/rose/sysctl_net_rose.c +++ b/net/rose/sysctl_net_rose.c @@ -24,7 +24,7 @@ static int min_window[] = {1}, max_window[] = {7}; static struct ctl_table_header *rose_table_header; -static ctl_table rose_table[] = { +static struct ctl_table rose_table[] = { { .procname = "restart_request_timeout", .data = &sysctl_rose_restart_request_timeout, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 5d676edc22a6..977c10e0631b 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -243,7 +243,7 @@ nla_put_failure: static int mirred_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct tcf_mirred *m; if (event == NETDEV_UNREGISTER) diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index ca8e0a57d945..1f9c31411f19 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -605,6 +605,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, struct sockaddr_atmpvc pvc; int state; + memset(&pvc, 0, sizeof(pvc)); pvc.sap_family = AF_ATMPVC; pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1; pvc.sap_addr.vpi = flow->vcc->vpi; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 1bc210ffcba2..7a42c81a19eb 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -130,7 +130,7 @@ struct cbq_class { psched_time_t penalized; struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; struct tc_cbq_xstats xstats; struct tcf_proto *filter_list; @@ -1465,6 +1465,7 @@ static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) unsigned char *b = skb_tail_pointer(skb); struct tc_cbq_wrropt opt; + memset(&opt, 0, sizeof(opt)); opt.flags = 0; opt.allot = cl->allot; opt.priority = cl->priority + 1; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 759b308d1a8d..8302717ea303 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -25,7 +25,7 @@ struct drr_class { struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; struct list_head alist; struct Qdisc *qdisc; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 20224086cc28..4626cef4b76e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -901,37 +901,33 @@ void dev_shutdown(struct net_device *dev) void psched_ratecfg_precompute(struct psched_ratecfg *r, const struct tc_ratespec *conf) { - u64 factor; - u64 mult; - int shift; - memset(r, 0, sizeof(*r)); r->overhead = conf->overhead; - r->rate_bps = (u64)conf->rate << 3; + r->rate_bytes_ps = conf->rate; r->mult = 1; /* - * Calibrate mult, shift so that token counting is accurate - * for smallest packet size (64 bytes). Token (time in ns) is - * computed as (bytes * 8) * NSEC_PER_SEC / rate_bps. It will - * work as long as the smallest packet transfer time can be - * accurately represented in nanosec. + * The deal here is to replace a divide by a reciprocal one + * in fast path (a reciprocal divide is a multiply and a shift) + * + * Normal formula would be : + * time_in_ns = (NSEC_PER_SEC * len) / rate_bps + * + * We compute mult/shift to use instead : + * time_in_ns = (len * mult) >> shift; + * + * We try to get the highest possible mult value for accuracy, + * but have to make sure no overflows will ever happen. */ - if (r->rate_bps > 0) { - /* - * Higher shift gives better accuracy. Find the largest - * shift such that mult fits in 32 bits. - */ - for (shift = 0; shift < 16; shift++) { - r->shift = shift; - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); - mult = div64_u64(factor, r->rate_bps); - if (mult > UINT_MAX) + if (r->rate_bytes_ps > 0) { + u64 factor = NSEC_PER_SEC; + + for (;;) { + r->mult = div64_u64(factor, r->rate_bytes_ps); + if (r->mult & (1U << 31) || factor & (1ULL << 63)) break; + factor <<= 1; + r->shift++; } - - r->shift = shift - 1; - factor = 8LLU * NSEC_PER_SEC * (1 << r->shift); - r->mult = div64_u64(factor, r->rate_bps); } } EXPORT_SYMBOL(psched_ratecfg_precompute); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 9facea03faeb..c4075610502c 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -114,7 +114,7 @@ struct hfsc_class { struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; unsigned int level; /* class level in hierarchy */ struct tcf_proto *filter_list; /* filter list */ unsigned int filter_cnt; /* filter count */ diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index adaedd79389c..45e751527dfc 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -65,6 +65,10 @@ static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis f module_param (htb_hysteresis, int, 0640); MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate"); +static int htb_rate_est = 0; /* htb classes have a default rate estimator */ +module_param(htb_rate_est, int, 0640); +MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes"); + /* used internaly to keep status of single class */ enum htb_cmode { HTB_CANT_SEND, /* class can't send and can't borrow */ @@ -72,95 +76,105 @@ enum htb_cmode { HTB_CAN_SEND /* class can send */ }; -/* interior & leaf nodes; props specific to leaves are marked L: */ +struct htb_prio { + union { + struct rb_root row; + struct rb_root feed; + }; + struct rb_node *ptr; + /* When class changes from state 1->2 and disconnects from + * parent's feed then we lost ptr value and start from the + * first child again. Here we store classid of the + * last valid ptr (used when ptr is NULL). + */ + u32 last_ptr_id; +}; + +/* interior & leaf nodes; props specific to leaves are marked L: + * To reduce false sharing, place mostly read fields at beginning, + * and mostly written ones at the end. + */ struct htb_class { struct Qdisc_class_common common; - /* general class parameters */ - struct gnet_stats_basic_packed bstats; - struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; - struct tc_htb_xstats xstats; /* our special stats */ - int refcnt; /* usage count of this class */ + struct psched_ratecfg rate; + struct psched_ratecfg ceil; + s64 buffer, cbuffer;/* token bucket depth/rate */ + s64 mbuffer; /* max wait time */ + u32 prio; /* these two are used only by leaves... */ + int quantum; /* but stored for parent-to-leaf return */ - /* topology */ - int level; /* our level (see above) */ - unsigned int children; - struct htb_class *parent; /* parent class */ + struct tcf_proto *filter_list; /* class attached filters */ + int filter_cnt; + int refcnt; /* usage count of this class */ - int prio; /* these two are used only by leaves... */ - int quantum; /* but stored for parent-to-leaf return */ + int level; /* our level (see above) */ + unsigned int children; + struct htb_class *parent; /* parent class */ + + struct gnet_stats_rate_est64 rate_est; + + /* + * Written often fields + */ + struct gnet_stats_basic_packed bstats; + struct gnet_stats_queue qstats; + struct tc_htb_xstats xstats; /* our special stats */ + + /* token bucket parameters */ + s64 tokens, ctokens;/* current number of tokens */ + s64 t_c; /* checkpoint time */ union { struct htb_class_leaf { - struct Qdisc *q; - int deficit[TC_HTB_MAXDEPTH]; struct list_head drop_list; + int deficit[TC_HTB_MAXDEPTH]; + struct Qdisc *q; } leaf; struct htb_class_inner { - struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ - struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */ - /* When class changes from state 1->2 and disconnects from - * parent's feed then we lost ptr value and start from the - * first child again. Here we store classid of the - * last valid ptr (used when ptr is NULL). - */ - u32 last_ptr_id[TC_HTB_NUMPRIO]; + struct htb_prio clprio[TC_HTB_NUMPRIO]; } inner; } un; - struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ - struct rb_node pq_node; /* node for event queue */ - s64 pq_key; + s64 pq_key; - int prio_activity; /* for which prios are we active */ - enum htb_cmode cmode; /* current mode of the class */ - - /* class attached filters */ - struct tcf_proto *filter_list; - int filter_cnt; + int prio_activity; /* for which prios are we active */ + enum htb_cmode cmode; /* current mode of the class */ + struct rb_node pq_node; /* node for event queue */ + struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ +}; - /* token bucket parameters */ - struct psched_ratecfg rate; - struct psched_ratecfg ceil; - s64 buffer, cbuffer; /* token bucket depth/rate */ - s64 mbuffer; /* max wait time */ - s64 tokens, ctokens; /* current number of tokens */ - s64 t_c; /* checkpoint time */ +struct htb_level { + struct rb_root wait_pq; + struct htb_prio hprio[TC_HTB_NUMPRIO]; }; struct htb_sched { struct Qdisc_class_hash clhash; - struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */ - - /* self list - roots of self generating tree */ - struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; - int row_mask[TC_HTB_MAXDEPTH]; - struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; - u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; + int defcls; /* class where unclassified flows go to */ + int rate2quantum; /* quant = rate / rate2quantum */ - /* self wait list - roots of wait PQs per row */ - struct rb_root wait_pq[TC_HTB_MAXDEPTH]; + /* filters for qdisc itself */ + struct tcf_proto *filter_list; - /* time of nearest event per level (row) */ - s64 near_ev_cache[TC_HTB_MAXDEPTH]; +#define HTB_WARN_TOOMANYEVENTS 0x1 + unsigned int warned; /* only one warning */ + int direct_qlen; + struct work_struct work; - int defcls; /* class where unclassified flows go to */ + /* non shaped skbs; let them go directly thru */ + struct sk_buff_head direct_queue; + long direct_pkts; - /* filters for qdisc itself */ - struct tcf_proto *filter_list; + struct qdisc_watchdog watchdog; - int rate2quantum; /* quant = rate / rate2quantum */ - s64 now; /* cached dequeue time */ - struct qdisc_watchdog watchdog; + s64 now; /* cached dequeue time */ + struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */ - /* non shaped skbs; let them go directly thru */ - struct sk_buff_head direct_queue; - int direct_qlen; /* max qlen of above */ + /* time of nearest event per level (row) */ + s64 near_ev_cache[TC_HTB_MAXDEPTH]; - long direct_pkts; + int row_mask[TC_HTB_MAXDEPTH]; -#define HTB_WARN_TOOMANYEVENTS 0x1 - unsigned int warned; /* only one warning */ - struct work_struct work; + struct htb_level hlevel[TC_HTB_MAXDEPTH]; }; /* find class in global hash table using given handle */ @@ -276,7 +290,7 @@ static void htb_add_to_id_tree(struct rb_root *root, static void htb_add_to_wait_tree(struct htb_sched *q, struct htb_class *cl, s64 delay) { - struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL; + struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL; cl->pq_key = q->now + delay; if (cl->pq_key == q->now) @@ -296,7 +310,7 @@ static void htb_add_to_wait_tree(struct htb_sched *q, p = &parent->rb_left; } rb_link_node(&cl->pq_node, parent, p); - rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]); + rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq); } /** @@ -323,7 +337,7 @@ static inline void htb_add_class_to_row(struct htb_sched *q, while (mask) { int prio = ffz(~mask); mask &= ~(1 << prio); - htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio); + htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio); } } @@ -349,16 +363,18 @@ static inline void htb_remove_class_from_row(struct htb_sched *q, struct htb_class *cl, int mask) { int m = 0; + struct htb_level *hlevel = &q->hlevel[cl->level]; while (mask) { int prio = ffz(~mask); + struct htb_prio *hprio = &hlevel->hprio[prio]; mask &= ~(1 << prio); - if (q->ptr[cl->level][prio] == cl->node + prio) - htb_next_rb_node(q->ptr[cl->level] + prio); + if (hprio->ptr == cl->node + prio) + htb_next_rb_node(&hprio->ptr); - htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio); - if (!q->row[cl->level][prio].rb_node) + htb_safe_rb_erase(cl->node + prio, &hprio->row); + if (!hprio->row.rb_node) m |= 1 << prio; } q->row_mask[cl->level] &= ~m; @@ -382,13 +398,13 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl) int prio = ffz(~m); m &= ~(1 << prio); - if (p->un.inner.feed[prio].rb_node) + if (p->un.inner.clprio[prio].feed.rb_node) /* parent already has its feed in use so that * reset bit in mask as parent is already ok */ mask &= ~(1 << prio); - htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio); + htb_add_to_id_tree(&p->un.inner.clprio[prio].feed, cl, prio); } p->prio_activity |= mask; cl = p; @@ -418,18 +434,19 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) int prio = ffz(~m); m &= ~(1 << prio); - if (p->un.inner.ptr[prio] == cl->node + prio) { + if (p->un.inner.clprio[prio].ptr == cl->node + prio) { /* we are removing child which is pointed to from * parent feed - forget the pointer but remember * classid */ - p->un.inner.last_ptr_id[prio] = cl->common.classid; - p->un.inner.ptr[prio] = NULL; + p->un.inner.clprio[prio].last_ptr_id = cl->common.classid; + p->un.inner.clprio[prio].ptr = NULL; } - htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio); + htb_safe_rb_erase(cl->node + prio, + &p->un.inner.clprio[prio].feed); - if (!p->un.inner.feed[prio].rb_node) + if (!p->un.inner.clprio[prio].feed.rb_node) mask |= 1 << prio; } @@ -644,7 +661,7 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, htb_change_class_mode(q, cl, &diff); if (old_mode != cl->cmode) { if (old_mode != HTB_CAN_SEND) - htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); + htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq); if (cl->cmode != HTB_CAN_SEND) htb_add_to_wait_tree(q, cl, diff); } @@ -664,7 +681,7 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, * next pending event (0 for no event in pq, q->now for too many events). * Note: Applied are events whose have cl->pq_key <= q->now. */ -static s64 htb_do_events(struct htb_sched *q, int level, +static s64 htb_do_events(struct htb_sched *q, const int level, unsigned long start) { /* don't run for longer than 2 jiffies; 2 is used instead of @@ -672,10 +689,12 @@ static s64 htb_do_events(struct htb_sched *q, int level, * too soon */ unsigned long stop_at = start + 2; + struct rb_root *wait_pq = &q->hlevel[level].wait_pq; + while (time_before(jiffies, stop_at)) { struct htb_class *cl; s64 diff; - struct rb_node *p = rb_first(&q->wait_pq[level]); + struct rb_node *p = rb_first(wait_pq); if (!p) return 0; @@ -684,7 +703,7 @@ static s64 htb_do_events(struct htb_sched *q, int level, if (cl->pq_key > q->now) return cl->pq_key; - htb_safe_rb_erase(p, q->wait_pq + level); + htb_safe_rb_erase(p, wait_pq); diff = min_t(s64, q->now - cl->t_c, cl->mbuffer); htb_change_class_mode(q, cl, &diff); if (cl->cmode != HTB_CAN_SEND) @@ -728,8 +747,7 @@ static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, * * Find leaf where current feed pointers points to. */ -static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, - struct rb_node **pptr, u32 * pid) +static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio) { int i; struct { @@ -738,10 +756,10 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, u32 *pid; } stk[TC_HTB_MAXDEPTH], *sp = stk; - BUG_ON(!tree->rb_node); - sp->root = tree->rb_node; - sp->pptr = pptr; - sp->pid = pid; + BUG_ON(!hprio->row.rb_node); + sp->root = hprio->row.rb_node; + sp->pptr = &hprio->ptr; + sp->pid = &hprio->last_ptr_id; for (i = 0; i < 65535; i++) { if (!*sp->pptr && *sp->pid) { @@ -768,12 +786,15 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, } } else { struct htb_class *cl; + struct htb_prio *clp; + cl = rb_entry(*sp->pptr, struct htb_class, node[prio]); if (!cl->level) return cl; - (++sp)->root = cl->un.inner.feed[prio].rb_node; - sp->pptr = cl->un.inner.ptr + prio; - sp->pid = cl->un.inner.last_ptr_id + prio; + clp = &cl->un.inner.clprio[prio]; + (++sp)->root = clp->feed.rb_node; + sp->pptr = &clp->ptr; + sp->pid = &clp->last_ptr_id; } } WARN_ON(1); @@ -783,15 +804,16 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, /* dequeues packet at given priority and level; call only if * you are sure that there is active class at prio/level */ -static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, - int level) +static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio, + const int level) { struct sk_buff *skb = NULL; struct htb_class *cl, *start; + struct htb_level *hlevel = &q->hlevel[level]; + struct htb_prio *hprio = &hlevel->hprio[prio]; + /* look initial class up in the row */ - start = cl = htb_lookup_leaf(q->row[level] + prio, prio, - q->ptr[level] + prio, - q->last_ptr_id[level] + prio); + start = cl = htb_lookup_leaf(hprio, prio); do { next: @@ -811,9 +833,7 @@ next: if ((q->row_mask[level] & (1 << prio)) == 0) return NULL; - next = htb_lookup_leaf(q->row[level] + prio, - prio, q->ptr[level] + prio, - q->last_ptr_id[level] + prio); + next = htb_lookup_leaf(hprio, prio); if (cl == start) /* fix start if we just deleted it */ start = next; @@ -826,11 +846,9 @@ next: break; qdisc_warn_nonwc("htb", cl->un.leaf.q); - htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> - ptr[0]) + prio); - cl = htb_lookup_leaf(q->row[level] + prio, prio, - q->ptr[level] + prio, - q->last_ptr_id[level] + prio); + htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr: + &q->hlevel[0].hprio[prio].ptr); + cl = htb_lookup_leaf(hprio, prio); } while (cl != start); @@ -839,8 +857,8 @@ next: cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb); if (cl->un.leaf.deficit[level] < 0) { cl->un.leaf.deficit[level] += cl->quantum; - htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> - ptr[0]) + prio); + htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr : + &q->hlevel[0].hprio[prio].ptr); } /* this used to be after charge_class but this constelation * gives us slightly better performance @@ -880,15 +898,14 @@ ok: for (level = 0; level < TC_HTB_MAXDEPTH; level++) { /* common case optimization - skip event handler quickly */ int m; - s64 event; + s64 event = q->near_ev_cache[level]; - if (q->now >= q->near_ev_cache[level]) { + if (q->now >= event) { event = htb_do_events(q, level, start_at); if (!event) event = q->now + NSEC_PER_SEC; q->near_ev_cache[level] = event; - } else - event = q->near_ev_cache[level]; + } if (next_event > event) next_event = event; @@ -968,10 +985,8 @@ static void htb_reset(struct Qdisc *sch) qdisc_watchdog_cancel(&q->watchdog); __skb_queue_purge(&q->direct_queue); sch->q.qlen = 0; - memset(q->row, 0, sizeof(q->row)); + memset(q->hlevel, 0, sizeof(q->hlevel)); memset(q->row_mask, 0, sizeof(q->row_mask)); - memset(q->wait_pq, 0, sizeof(q->wait_pq)); - memset(q->ptr, 0, sizeof(q->ptr)); for (i = 0; i < TC_HTB_NUMPRIO; i++) INIT_LIST_HEAD(q->drops + i); } @@ -1192,7 +1207,8 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity); if (parent->cmode != HTB_CAN_SEND) - htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level); + htb_safe_rb_erase(&parent->pq_node, + &q->hlevel[parent->level].wait_pq); parent->level = 0; memset(&parent->un.inner, 0, sizeof(parent->un.inner)); @@ -1281,7 +1297,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) htb_deactivate(q, cl); if (cl->cmode != HTB_CAN_SEND) - htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); + htb_safe_rb_erase(&cl->pq_node, + &q->hlevel[cl->level].wait_pq); if (last_child) htb_parent_to_leaf(q, cl, new_q); @@ -1366,12 +1383,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!cl) goto failure; - err = gen_new_estimator(&cl->bstats, &cl->rate_est, - qdisc_root_sleeping_lock(sch), - tca[TCA_RATE] ? : &est.nla); - if (err) { - kfree(cl); - goto failure; + if (htb_rate_est || tca[TCA_RATE]) { + err = gen_new_estimator(&cl->bstats, &cl->rate_est, + qdisc_root_sleeping_lock(sch), + tca[TCA_RATE] ? : &est.nla); + if (err) { + kfree(cl); + goto failure; + } } cl->refcnt = 1; @@ -1401,7 +1420,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* remove from evt list because of level change */ if (parent->cmode != HTB_CAN_SEND) { - htb_safe_rb_erase(&parent->pq_node, q->wait_pq); + htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq); parent->cmode = HTB_CAN_SEND; } parent->level = (parent->parent ? parent->parent->level diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 3d2acc7a9c80..82f6016d89ab 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -23,6 +23,7 @@ #include <linux/vmalloc.h> #include <linux/rtnetlink.h> #include <linux/reciprocal_div.h> +#include <linux/rbtree.h> #include <net/netlink.h> #include <net/pkt_sched.h> @@ -68,7 +69,8 @@ */ struct netem_sched_data { - /* internal t(ime)fifo qdisc uses sch->q and sch->limit */ + /* internal t(ime)fifo qdisc uses t_root and sch->limit */ + struct rb_root t_root; /* optional qdisc for classful handling (NULL at netem init) */ struct Qdisc *qdisc; @@ -128,10 +130,35 @@ struct netem_sched_data { */ struct netem_skb_cb { psched_time_t time_to_send; + ktime_t tstamp_save; }; +/* Because space in skb->cb[] is tight, netem overloads skb->next/prev/tstamp + * to hold a rb_node structure. + * + * If struct sk_buff layout is changed, the following checks will complain. + */ +static struct rb_node *netem_rb_node(struct sk_buff *skb) +{ + BUILD_BUG_ON(offsetof(struct sk_buff, next) != 0); + BUILD_BUG_ON(offsetof(struct sk_buff, prev) != + offsetof(struct sk_buff, next) + sizeof(skb->next)); + BUILD_BUG_ON(offsetof(struct sk_buff, tstamp) != + offsetof(struct sk_buff, prev) + sizeof(skb->prev)); + BUILD_BUG_ON(sizeof(struct rb_node) > sizeof(skb->next) + + sizeof(skb->prev) + + sizeof(skb->tstamp)); + return (struct rb_node *)&skb->next; +} + +static struct sk_buff *netem_rb_to_skb(struct rb_node *rb) +{ + return (struct sk_buff *)rb; +} + static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) { + /* we assume we can use skb next/prev/tstamp as storage for rb_node */ qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb)); return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; } @@ -333,20 +360,23 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) { - struct sk_buff_head *list = &sch->q; + struct netem_sched_data *q = qdisc_priv(sch); psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; - struct sk_buff *skb = skb_peek_tail(list); + struct rb_node **p = &q->t_root.rb_node, *parent = NULL; - /* Optimize for add at tail */ - if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send)) - return __skb_queue_tail(list, nskb); + while (*p) { + struct sk_buff *skb; - skb_queue_reverse_walk(list, skb) { + parent = *p; + skb = netem_rb_to_skb(parent); if (tnext >= netem_skb_cb(skb)->time_to_send) - break; + p = &parent->rb_right; + else + p = &parent->rb_left; } - - __skb_queue_after(list, skb, nskb); + rb_link_node(netem_rb_node(nskb), parent, p); + rb_insert_color(netem_rb_node(nskb), &q->t_root); + sch->q.qlen++; } /* @@ -436,23 +466,28 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) now = psched_get_time(); if (q->rate) { - struct sk_buff_head *list = &sch->q; + struct sk_buff *last; - if (!skb_queue_empty(list)) { + if (!skb_queue_empty(&sch->q)) + last = skb_peek_tail(&sch->q); + else + last = netem_rb_to_skb(rb_last(&q->t_root)); + if (last) { /* * Last packet in queue is reference point (now), * calculate this time bonus and subtract * from delay. */ - delay -= netem_skb_cb(skb_peek_tail(list))->time_to_send - now; + delay -= netem_skb_cb(last)->time_to_send - now; delay = max_t(psched_tdiff_t, 0, delay); - now = netem_skb_cb(skb_peek_tail(list))->time_to_send; + now = netem_skb_cb(last)->time_to_send; } delay += packet_len_2_sched_time(skb->len, q); } cb->time_to_send = now + delay; + cb->tstamp_save = skb->tstamp; ++q->counter; tfifo_enqueue(skb, sch); } else { @@ -476,6 +511,21 @@ static unsigned int netem_drop(struct Qdisc *sch) unsigned int len; len = qdisc_queue_drop(sch); + + if (!len) { + struct rb_node *p = rb_first(&q->t_root); + + if (p) { + struct sk_buff *skb = netem_rb_to_skb(p); + + rb_erase(p, &q->t_root); + sch->q.qlen--; + skb->next = NULL; + skb->prev = NULL; + len = qdisc_pkt_len(skb); + kfree_skb(skb); + } + } if (!len && q->qdisc && q->qdisc->ops->drop) len = q->qdisc->ops->drop(q->qdisc); if (len) @@ -488,19 +538,35 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; + struct rb_node *p; if (qdisc_is_throttled(sch)) return NULL; tfifo_dequeue: - skb = qdisc_peek_head(sch); + skb = __skb_dequeue(&sch->q); if (skb) { - const struct netem_skb_cb *cb = netem_skb_cb(skb); +deliver: + sch->qstats.backlog -= qdisc_pkt_len(skb); + qdisc_unthrottled(sch); + qdisc_bstats_update(sch, skb); + return skb; + } + p = rb_first(&q->t_root); + if (p) { + psched_time_t time_to_send; + + skb = netem_rb_to_skb(p); /* if more time remaining? */ - if (cb->time_to_send <= psched_get_time()) { - __skb_unlink(skb, &sch->q); - sch->qstats.backlog -= qdisc_pkt_len(skb); + time_to_send = netem_skb_cb(skb)->time_to_send; + if (time_to_send <= psched_get_time()) { + rb_erase(p, &q->t_root); + + sch->q.qlen--; + skb->next = NULL; + skb->prev = NULL; + skb->tstamp = netem_skb_cb(skb)->tstamp_save; #ifdef CONFIG_NET_CLS_ACT /* @@ -522,10 +588,7 @@ tfifo_dequeue: } goto tfifo_dequeue; } -deliver: - qdisc_unthrottled(sch); - qdisc_bstats_update(sch, skb); - return skb; + goto deliver; } if (q->qdisc) { @@ -533,7 +596,7 @@ deliver: if (skb) goto deliver; } - qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send); + qdisc_watchdog_schedule(&q->watchdog, time_to_send); } if (q->qdisc) { diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index d51852bba01c..8056fb4e618a 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -113,7 +113,6 @@ #define FRAC_BITS 30 /* fixed point arithmetic */ #define ONE_FP (1UL << FRAC_BITS) -#define IWSUM (ONE_FP/QFQ_MAX_WSUM) #define QFQ_MTU_SHIFT 16 /* to support TSO/GSO */ #define QFQ_MIN_LMAX 512 /* see qfq_slot_insert */ @@ -138,7 +137,7 @@ struct qfq_class { struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; + struct gnet_stats_rate_est64 rate_est; struct Qdisc *qdisc; struct list_head alist; /* Link for active-classes list. */ struct qfq_aggregate *agg; /* Parent aggregate. */ @@ -189,6 +188,7 @@ struct qfq_sched { struct qfq_aggregate *in_serv_agg; /* Aggregate being served. */ u32 num_active_agg; /* Num. of active aggregates */ u32 wsum; /* weight sum */ + u32 iwsum; /* inverse weight sum */ unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */ struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */ @@ -314,6 +314,7 @@ static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg, q->wsum += (int) agg->class_weight * (new_num_classes - agg->num_classes); + q->iwsum = ONE_FP / q->wsum; agg->num_classes = new_num_classes; } @@ -340,6 +341,10 @@ static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg) { if (!hlist_unhashed(&agg->nonfull_next)) hlist_del_init(&agg->nonfull_next); + q->wsum -= agg->class_weight; + if (q->wsum != 0) + q->iwsum = ONE_FP / q->wsum; + if (q->in_serv_agg == agg) q->in_serv_agg = qfq_choose_next_agg(q); kfree(agg); @@ -821,44 +826,73 @@ static void qfq_make_eligible(struct qfq_sched *q) unsigned long old_vslot = q->oldV >> q->min_slot_shift; if (vslot != old_vslot) { - unsigned long mask = (1ULL << fls(vslot ^ old_vslot)) - 1; + unsigned long mask; + int last_flip_pos = fls(vslot ^ old_vslot); + + if (last_flip_pos > 31) /* higher than the number of groups */ + mask = ~0UL; /* make all groups eligible */ + else + mask = (1UL << last_flip_pos) - 1; + qfq_move_groups(q, mask, IR, ER); qfq_move_groups(q, mask, IB, EB); } } - /* - * The index of the slot in which the aggregate is to be inserted must - * not be higher than QFQ_MAX_SLOTS-2. There is a '-2' and not a '-1' - * because the start time of the group may be moved backward by one - * slot after the aggregate has been inserted, and this would cause - * non-empty slots to be right-shifted by one position. + * The index of the slot in which the input aggregate agg is to be + * inserted must not be higher than QFQ_MAX_SLOTS-2. There is a '-2' + * and not a '-1' because the start time of the group may be moved + * backward by one slot after the aggregate has been inserted, and + * this would cause non-empty slots to be right-shifted by one + * position. * - * If the weight and lmax (max_pkt_size) of the classes do not change, - * then QFQ+ does meet the above contraint according to the current - * values of its parameters. In fact, if the weight and lmax of the - * classes do not change, then, from the theory, QFQ+ guarantees that - * the slot index is never higher than - * 2 + QFQ_MAX_AGG_CLASSES * ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) * - * (QFQ_MAX_WEIGHT/QFQ_MAX_WSUM) = 2 + 8 * 128 * (1 / 64) = 18 + * QFQ+ fully satisfies this bound to the slot index if the parameters + * of the classes are not changed dynamically, and if QFQ+ never + * happens to postpone the service of agg unjustly, i.e., it never + * happens that the aggregate becomes backlogged and eligible, or just + * eligible, while an aggregate with a higher approximated finish time + * is being served. In particular, in this case QFQ+ guarantees that + * the timestamps of agg are low enough that the slot index is never + * higher than 2. Unfortunately, QFQ+ cannot provide the same + * guarantee if it happens to unjustly postpone the service of agg, or + * if the parameters of some class are changed. * - * When the weight of a class is increased or the lmax of the class is - * decreased, a new aggregate with smaller slot size than the original - * parent aggregate of the class may happen to be activated. The - * activation of this aggregate should be properly delayed to when the - * service of the class has finished in the ideal system tracked by - * QFQ+. If the activation of the aggregate is not delayed to this - * reference time instant, then this aggregate may be unjustly served - * before other aggregates waiting for service. This may cause the - * above bound to the slot index to be violated for some of these - * unlucky aggregates. + * As for the first event, i.e., an out-of-order service, the + * upper bound to the slot index guaranteed by QFQ+ grows to + * 2 + + * QFQ_MAX_AGG_CLASSES * ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) * + * (current_max_weight/current_wsum) <= 2 + 8 * 128 * 1. + * + * The following function deals with this problem by backward-shifting + * the timestamps of agg, if needed, so as to guarantee that the slot + * index is never higher than QFQ_MAX_SLOTS-2. This backward-shift may + * cause the service of other aggregates to be postponed, yet the + * worst-case guarantees of these aggregates are not violated. In + * fact, in case of no out-of-order service, the timestamps of agg + * would have been even lower than they are after the backward shift, + * because QFQ+ would have guaranteed a maximum value equal to 2 for + * the slot index, and 2 < QFQ_MAX_SLOTS-2. Hence the aggregates whose + * service is postponed because of the backward-shift would have + * however waited for the service of agg before being served. + * + * The other event that may cause the slot index to be higher than 2 + * for agg is a recent change of the parameters of some class. If the + * weight of a class is increased or the lmax (max_pkt_size) of the + * class is decreased, then a new aggregate with smaller slot size + * than the original parent aggregate of the class may happen to be + * activated. The activation of this aggregate should be properly + * delayed to when the service of the class has finished in the ideal + * system tracked by QFQ+. If the activation of the aggregate is not + * delayed to this reference time instant, then this aggregate may be + * unjustly served before other aggregates waiting for service. This + * may cause the above bound to the slot index to be violated for some + * of these unlucky aggregates. * * Instead of delaying the activation of the new aggregate, which is - * quite complex, the following inaccurate but simple solution is used: - * if the slot index is higher than QFQ_MAX_SLOTS-2, then the - * timestamps of the aggregate are shifted backward so as to let the - * slot index become equal to QFQ_MAX_SLOTS-2. + * quite complex, the above-discussed capping of the slot index is + * used to handle also the consequences of a change of the parameters + * of a class. */ static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg, u64 roundedS) @@ -1003,9 +1037,61 @@ static inline void charge_actual_service(struct qfq_aggregate *agg) agg->F = agg->S + (u64)service_received * agg->inv_w; } -static inline void qfq_update_agg_ts(struct qfq_sched *q, - struct qfq_aggregate *agg, - enum update_reason reason); +/* Assign a reasonable start time for a new aggregate in group i. + * Admissible values for \hat(F) are multiples of \sigma_i + * no greater than V+\sigma_i . Larger values mean that + * we had a wraparound so we consider the timestamp to be stale. + * + * If F is not stale and F >= V then we set S = F. + * Otherwise we should assign S = V, but this may violate + * the ordering in EB (see [2]). So, if we have groups in ER, + * set S to the F_j of the first group j which would be blocking us. + * We are guaranteed not to move S backward because + * otherwise our group i would still be blocked. + */ +static void qfq_update_start(struct qfq_sched *q, struct qfq_aggregate *agg) +{ + unsigned long mask; + u64 limit, roundedF; + int slot_shift = agg->grp->slot_shift; + + roundedF = qfq_round_down(agg->F, slot_shift); + limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift); + + if (!qfq_gt(agg->F, q->V) || qfq_gt(roundedF, limit)) { + /* timestamp was stale */ + mask = mask_from(q->bitmaps[ER], agg->grp->index); + if (mask) { + struct qfq_group *next = qfq_ffs(q, mask); + if (qfq_gt(roundedF, next->F)) { + if (qfq_gt(limit, next->F)) + agg->S = next->F; + else /* preserve timestamp correctness */ + agg->S = limit; + return; + } + } + agg->S = q->V; + } else /* timestamp is not stale */ + agg->S = agg->F; +} + +/* Update the timestamps of agg before scheduling/rescheduling it for + * service. In particular, assign to agg->F its maximum possible + * value, i.e., the virtual finish time with which the aggregate + * should be labeled if it used all its budget once in service. + */ +static inline void +qfq_update_agg_ts(struct qfq_sched *q, + struct qfq_aggregate *agg, enum update_reason reason) +{ + if (reason != requeue) + qfq_update_start(q, agg); + else /* just charge agg for the service received */ + agg->S = agg->F; + + agg->F = agg->S + (u64)agg->budgetmax * agg->inv_w; +} static void qfq_schedule_agg(struct qfq_sched *q, struct qfq_aggregate *agg); @@ -1077,7 +1163,7 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch) else in_serv_agg->budget -= len; - q->V += (u64)len * IWSUM; + q->V += (u64)len * q->iwsum; pr_debug("qfq dequeue: len %u F %lld now %lld\n", len, (unsigned long long) in_serv_agg->F, (unsigned long long) q->V); @@ -1128,66 +1214,6 @@ static struct qfq_aggregate *qfq_choose_next_agg(struct qfq_sched *q) return agg; } -/* - * Assign a reasonable start time for a new aggregate in group i. - * Admissible values for \hat(F) are multiples of \sigma_i - * no greater than V+\sigma_i . Larger values mean that - * we had a wraparound so we consider the timestamp to be stale. - * - * If F is not stale and F >= V then we set S = F. - * Otherwise we should assign S = V, but this may violate - * the ordering in EB (see [2]). So, if we have groups in ER, - * set S to the F_j of the first group j which would be blocking us. - * We are guaranteed not to move S backward because - * otherwise our group i would still be blocked. - */ -static void qfq_update_start(struct qfq_sched *q, struct qfq_aggregate *agg) -{ - unsigned long mask; - u64 limit, roundedF; - int slot_shift = agg->grp->slot_shift; - - roundedF = qfq_round_down(agg->F, slot_shift); - limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift); - - if (!qfq_gt(agg->F, q->V) || qfq_gt(roundedF, limit)) { - /* timestamp was stale */ - mask = mask_from(q->bitmaps[ER], agg->grp->index); - if (mask) { - struct qfq_group *next = qfq_ffs(q, mask); - if (qfq_gt(roundedF, next->F)) { - if (qfq_gt(limit, next->F)) - agg->S = next->F; - else /* preserve timestamp correctness */ - agg->S = limit; - return; - } - } - agg->S = q->V; - } else /* timestamp is not stale */ - agg->S = agg->F; -} - -/* - * Update the timestamps of agg before scheduling/rescheduling it for - * service. In particular, assign to agg->F its maximum possible - * value, i.e., the virtual finish time with which the aggregate - * should be labeled if it used all its budget once in service. - */ -static inline void -qfq_update_agg_ts(struct qfq_sched *q, - struct qfq_aggregate *agg, enum update_reason reason) -{ - if (reason != requeue) - qfq_update_start(q, agg); - else /* just charge agg for the service received */ - agg->S = agg->F; - - agg->F = agg->S + (u64)agg->budgetmax * agg->inv_w; -} - -static void qfq_schedule_agg(struct qfq_sched *, struct qfq_aggregate *); - static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct qfq_sched *q = qdisc_priv(sch); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index e478d316602b..1aaf1b6e51a2 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -116,14 +116,57 @@ struct tbf_sched_data { struct qdisc_watchdog watchdog; /* Watchdog timer */ }; + +/* GSO packet is too big, segment it so that tbf can transmit + * each segment in time + */ +static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) +{ + struct tbf_sched_data *q = qdisc_priv(sch); + struct sk_buff *segs, *nskb; + netdev_features_t features = netif_skb_features(skb); + int ret, nb; + + segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); + + if (IS_ERR_OR_NULL(segs)) + return qdisc_reshape_fail(skb, sch); + + nb = 0; + while (segs) { + nskb = segs->next; + segs->next = NULL; + if (likely(segs->len <= q->max_size)) { + qdisc_skb_cb(segs)->pkt_len = segs->len; + ret = qdisc_enqueue(segs, q->qdisc); + } else { + ret = qdisc_reshape_fail(skb, sch); + } + if (ret != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; + } else { + nb++; + } + segs = nskb; + } + sch->q.qlen += nb; + if (nb > 1) + qdisc_tree_decrease_qlen(sch, 1 - nb); + consume_skb(skb); + return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; +} + static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); int ret; - if (qdisc_pkt_len(skb) > q->max_size) + if (qdisc_pkt_len(skb) > q->max_size) { + if (skb_is_gso(skb)) + return tbf_segment(skb, sch); return qdisc_reshape_fail(skb, sch); - + } ret = qdisc_enqueue(skb, q->qdisc); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index cf4852814e0c..71c1a598d9bc 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -30,7 +30,8 @@ menuconfig IP_SCTP homing at either or both ends of an association." To compile this protocol support as a module, choose M here: the - module will be called sctp. + module will be called sctp. Debug messages are handeled by the + kernel's dynamic debugging framework. If in doubt, say N. @@ -48,14 +49,6 @@ config NET_SCTPPROBE To compile this code as a module, choose M here: the module will be called sctp_probe. -config SCTP_DBG_MSG - bool "SCTP: Debug messages" - help - If you say Y, this will enable verbose debugging messages. - - If unsure, say N. However, if you are running into problems, use - this option to gather detailed trace information - config SCTP_DBG_OBJCNT bool "SCTP: Debug object counts" depends on PROC_FS diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 91cfd8f94a19..bce5b79662a6 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -86,10 +86,9 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Discarding const is appropriate here. */ asoc->ep = (struct sctp_endpoint *)ep; - sctp_endpoint_hold(asoc->ep); - - /* Hold the sock. */ asoc->base.sk = (struct sock *)sk; + + sctp_endpoint_hold(asoc->ep); sock_hold(asoc->base.sk); /* Initialize the common base substructure. */ @@ -103,13 +102,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port); asoc->state = SCTP_STATE_CLOSED; - - /* Set these values from the socket values, a conversion between - * millsecons to seconds/microseconds must also be done. - */ - asoc->cookie_life.tv_sec = sp->assocparams.sasoc_cookie_life / 1000; - asoc->cookie_life.tv_usec = (sp->assocparams.sasoc_cookie_life % 1000) - * 1000; + asoc->cookie_life = ms_to_ktime(sp->assocparams.sasoc_cookie_life); asoc->frag_point = 0; asoc->user_frag = sp->user_frag; @@ -343,8 +336,8 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a return asoc; fail_init: - sctp_endpoint_put(asoc->ep); sock_put(asoc->base.sk); + sctp_endpoint_put(asoc->ep); return NULL; } @@ -356,7 +349,7 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep, { struct sctp_association *asoc; - asoc = t_new(struct sctp_association, gfp); + asoc = kzalloc(sizeof(*asoc), gfp); if (!asoc) goto fail; @@ -364,7 +357,8 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep, goto fail_init; SCTP_DBG_OBJCNT_INC(assoc); - SCTP_DEBUG_PRINTK("Created asoc %p\n", asoc); + + pr_debug("Created asoc %p\n", asoc); return asoc; @@ -462,7 +456,10 @@ void sctp_association_free(struct sctp_association *asoc) /* Cleanup and free up an association. */ static void sctp_association_destroy(struct sctp_association *asoc) { - SCTP_ASSERT(asoc->base.dead, "Assoc is not dead", return); + if (unlikely(!asoc->base.dead)) { + WARN(1, "Attempt to destroy undead association %p!\n", asoc); + return; + } sctp_endpoint_put(asoc->ep); sock_put(asoc->base.sk); @@ -543,11 +540,8 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc, struct list_head *pos; struct sctp_transport *transport; - SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_rm_peer:association %p addr: ", - " port: %d\n", - asoc, - (&peer->ipaddr), - ntohs(peer->ipaddr.v4.sin_port)); + pr_debug("%s: association:%p addr:%pISpc\n", + __func__, asoc, &peer->ipaddr.sa); /* If we are to remove the current retran_path, update it * to the next peer before removing this peer from the list. @@ -643,12 +637,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, /* AF_INET and AF_INET6 share common port field. */ port = ntohs(addr->v4.sin_port); - SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_add_peer:association %p addr: ", - " port: %d state:%d\n", - asoc, - addr, - port, - peer_state); + pr_debug("%s: association:%p addr:%pISpc state:%d\n", __func__, + asoc, &addr->sa, peer_state); /* Set the port if it has not been set yet. */ if (0 == asoc->peer.port) @@ -715,8 +705,9 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, else asoc->pathmtu = peer->pathmtu; - SCTP_DEBUG_PRINTK("sctp_assoc_add_peer:association %p PMTU set to " - "%d\n", asoc, asoc->pathmtu); + pr_debug("%s: association:%p PMTU set to %d\n", __func__, asoc, + asoc->pathmtu); + peer->pmtu_pending = 0; asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu); @@ -1356,12 +1347,8 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) else t = asoc->peer.retran_path; - SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association" - " %p addr: ", - " port: %d\n", - asoc, - (&t->ipaddr), - ntohs(t->ipaddr.v4.sin_port)); + pr_debug("%s: association:%p addr:%pISpc\n", __func__, asoc, + &t->ipaddr.sa); } /* Choose the transport for sending retransmit packet. */ @@ -1408,8 +1395,8 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) asoc->frag_point = sctp_frag_point(asoc, pmtu); } - SCTP_DEBUG_PRINTK("%s: asoc:%p, pmtu:%d, frag_point:%d\n", - __func__, asoc, asoc->pathmtu, asoc->frag_point); + pr_debug("%s: asoc:%p, pmtu:%d, frag_point:%d\n", __func__, asoc, + asoc->pathmtu, asoc->frag_point); } /* Should we send a SACK to update our peer? */ @@ -1461,9 +1448,9 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len) asoc->rwnd_press -= change; } - SCTP_DEBUG_PRINTK("%s: asoc %p rwnd increased by %d to (%u, %u) " - "- %u\n", __func__, asoc, len, asoc->rwnd, - asoc->rwnd_over, asoc->a_rwnd); + pr_debug("%s: asoc:%p rwnd increased by %d to (%u, %u) - %u\n", + __func__, asoc, len, asoc->rwnd, asoc->rwnd_over, + asoc->a_rwnd); /* Send a window update SACK if the rwnd has increased by at least the * minimum of the association's PMTU and half of the receive buffer. @@ -1472,9 +1459,11 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len) */ if (sctp_peer_needs_update(asoc)) { asoc->a_rwnd = asoc->rwnd; - SCTP_DEBUG_PRINTK("%s: Sending window update SACK- asoc: %p " - "rwnd: %u a_rwnd: %u\n", __func__, - asoc, asoc->rwnd, asoc->a_rwnd); + + pr_debug("%s: sending window update SACK- asoc:%p rwnd:%u " + "a_rwnd:%u\n", __func__, asoc, asoc->rwnd, + asoc->a_rwnd); + sack = sctp_make_sack(asoc); if (!sack) return; @@ -1496,8 +1485,10 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len) int rx_count; int over = 0; - SCTP_ASSERT(asoc->rwnd, "rwnd zero", return); - SCTP_ASSERT(!asoc->rwnd_over, "rwnd_over not zero", return); + if (unlikely(!asoc->rwnd || asoc->rwnd_over)) + pr_debug("%s: association:%p has asoc->rwnd:%u, " + "asoc->rwnd_over:%u!\n", __func__, asoc, + asoc->rwnd, asoc->rwnd_over); if (asoc->ep->rcvbuf_policy) rx_count = atomic_read(&asoc->rmem_alloc); @@ -1522,9 +1513,10 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len) asoc->rwnd_over = len - asoc->rwnd; asoc->rwnd = 0; } - SCTP_DEBUG_PRINTK("%s: asoc %p rwnd decreased by %d to (%u, %u, %u)\n", - __func__, asoc, len, asoc->rwnd, - asoc->rwnd_over, asoc->rwnd_press); + + pr_debug("%s: asoc:%p rwnd decreased by %d to (%u, %u, %u)\n", + __func__, asoc, len, asoc->rwnd, asoc->rwnd_over, + asoc->rwnd_press); } /* Build the bind address list for the association based on info from the diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 41145fe31813..64977ea0f9c5 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -162,7 +162,7 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, struct sctp_sockaddr_entry *addr; /* Add the address to the bind address list. */ - addr = t_new(struct sctp_sockaddr_entry, gfp); + addr = kzalloc(sizeof(*addr), gfp); if (!addr) return -ENOMEM; diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 69ce21e3716f..5780565f5b7d 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -66,7 +66,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg) } /* Allocate and initialize datamsg. */ -SCTP_STATIC struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp) +static struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp) { struct sctp_datamsg *msg; msg = kmalloc(sizeof(struct sctp_datamsg), gfp); @@ -193,8 +193,9 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, msg->expires_at = jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive); msg->can_abandon = 1; - SCTP_DEBUG_PRINTK("%s: msg:%p expires_at: %ld jiffies:%ld\n", - __func__, msg, msg->expires_at, jiffies); + + pr_debug("%s: msg:%p expires_at:%ld jiffies:%ld\n", __func__, + msg, msg->expires_at, jiffies); } /* This is the biggest possible DATA chunk that can fit into diff --git a/net/sctp/debug.c b/net/sctp/debug.c index ec997cfe0a7e..f4998780d6df 100644 --- a/net/sctp/debug.c +++ b/net/sctp/debug.c @@ -47,10 +47,6 @@ #include <net/sctp/sctp.h> -#if SCTP_DEBUG -int sctp_debug_flag = 1; /* Initially enable DEBUG */ -#endif /* SCTP_DEBUG */ - /* These are printable forms of Chunk ID's from section 3.1. */ static const char *const sctp_cid_tbl[SCTP_NUM_BASE_CHUNK_TYPES] = { "DATA", diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 5fbd7bc6bb11..9e3d257de0e0 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -192,9 +192,10 @@ struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, gfp_t gfp) struct sctp_endpoint *ep; /* Build a local endpoint. */ - ep = t_new(struct sctp_endpoint, gfp); + ep = kzalloc(sizeof(*ep), gfp); if (!ep) goto fail; + if (!sctp_endpoint_init(ep, sk, gfp)) goto fail_init; @@ -246,10 +247,12 @@ void sctp_endpoint_free(struct sctp_endpoint *ep) /* Final destructor for endpoint. */ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) { - SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return); + struct sock *sk; - /* Free up the HMAC transform. */ - crypto_free_hash(sctp_sk(ep->base.sk)->hmac); + if (unlikely(!ep->base.dead)) { + WARN(1, "Attempt to destroy undead endpoint %p!\n", ep); + return; + } /* Free the digest buffer */ kfree(ep->digest); @@ -270,13 +273,15 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) memset(ep->secret_key, 0, sizeof(ep->secret_key)); - /* Remove and free the port */ - if (sctp_sk(ep->base.sk)->bind_hash) - sctp_put_port(ep->base.sk); - /* Give up our hold on the sock. */ - if (ep->base.sk) - sock_put(ep->base.sk); + sk = ep->base.sk; + if (sk != NULL) { + /* Remove and free the port */ + if (sctp_sk(sk)->bind_hash) + sctp_put_port(sk); + + sock_put(sk); + } kfree(ep); SCTP_DBG_OBJCNT_DEC(ep); diff --git a/net/sctp/input.c b/net/sctp/input.c index 4b2c83146aa7..3fa4d858c35a 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -454,8 +454,6 @@ void sctp_icmp_proto_unreachable(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t) { - SCTP_DEBUG_PRINTK("%s\n", __func__); - if (sock_owned_by_user(sk)) { if (timer_pending(&t->proto_unreach_timer)) return; @@ -464,10 +462,12 @@ void sctp_icmp_proto_unreachable(struct sock *sk, jiffies + (HZ/20))) sctp_association_hold(asoc); } - } else { struct net *net = sock_net(sk); + pr_debug("%s: unrecognized next header type " + "encountered!\n", __func__); + if (del_timer(&t->proto_unreach_timer)) sctp_association_put(asoc); @@ -589,7 +589,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) struct sctp_association *asoc = NULL; struct sctp_transport *transport; struct inet_sock *inet; - sk_buff_data_t saveip, savesctp; + __u16 saveip, savesctp; int err; struct net *net = dev_net(skb->dev); @@ -903,11 +903,11 @@ hit: } /* Look up an association. BH-safe. */ -SCTP_STATIC +static struct sctp_association *sctp_lookup_association(struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, - struct sctp_transport **transportp) + struct sctp_transport **transportp) { struct sctp_association *asoc; diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 3221d073448c..cb25f040fed0 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -219,10 +219,10 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) chunk->end_of_packet = 1; } - SCTP_DEBUG_PRINTK("+++sctp_inq_pop+++ chunk %p[%s]," - " length %d, skb->len %d\n",chunk, - sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)), - ntohs(chunk->chunk_hdr->length), chunk->skb->len); + pr_debug("+++sctp_inq_pop+++ chunk:%p[%s], length:%d, skb->len:%d\n", + chunk, sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)), + ntohs(chunk->chunk_hdr->length), chunk->skb->len); + return chunk; } @@ -238,4 +238,3 @@ void sctp_inq_set_th_handler(struct sctp_inq *q, work_func_t callback) { INIT_WORK(&q->immediate, callback); } - diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 391a245d5203..09ffcc912d23 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -145,15 +145,15 @@ static struct notifier_block sctp_inet6addr_notifier = { }; /* ICMP error handler. */ -SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) +static void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) { struct inet6_dev *idev; struct sock *sk; struct sctp_association *asoc; struct sctp_transport *transport; struct ipv6_pinfo *np; - sk_buff_data_t saveip, savesctp; + __u16 saveip, savesctp; int err; struct net *net = dev_net(skb->dev); @@ -239,9 +239,8 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) fl6.daddr = *rt0->addr; } - SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", - __func__, skb, skb->len, - &fl6.saddr, &fl6.daddr); + pr_debug("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb, + skb->len, &fl6.saddr, &fl6.daddr); SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); @@ -276,7 +275,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) fl6->flowi6_oif = daddr->v6.sin6_scope_id; - SCTP_DEBUG_PRINTK("%s: DST=%pI6 ", __func__, &fl6->daddr); + pr_debug("%s: dst=%pI6 ", __func__, &fl6->daddr); if (asoc) fl6->fl6_sport = htons(asoc->base.bind_addr.port); @@ -284,7 +283,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, if (saddr) { fl6->saddr = saddr->v6.sin6_addr; fl6->fl6_sport = saddr->v6.sin6_port; - SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl6->saddr); + + pr_debug("src=%pI6 - ", &fl6->saddr); } dst = ip6_dst_lookup_flow(sk, fl6, NULL, false); @@ -348,13 +348,16 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, out: if (!IS_ERR_OR_NULL(dst)) { struct rt6_info *rt; + rt = (struct rt6_info *)dst; t->dst = dst; - SCTP_DEBUG_PRINTK("rt6_dst:%pI6 rt6_src:%pI6\n", - &rt->rt6i_dst.addr, &fl6->saddr); + + pr_debug("rt6_dst:%pI6 rt6_src:%pI6\n", &rt->rt6i_dst.addr, + &fl6->saddr); } else { t->dst = NULL; - SCTP_DEBUG_PRINTK("NO ROUTE\n"); + + pr_debug("no route\n"); } } @@ -377,7 +380,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk, struct flowi6 *fl6 = &fl->u.ip6; union sctp_addr *saddr = &t->saddr; - SCTP_DEBUG_PRINTK("%s: asoc:%p dst:%p\n", __func__, t->asoc, t->dst); + pr_debug("%s: asoc:%p dst:%p\n", __func__, t->asoc, t->dst); if (t->dst) { saddr->v6.sin6_family = AF_INET6; @@ -402,7 +405,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, read_lock_bh(&in6_dev->lock); list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { /* Add the address to the local list. */ - addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC); + addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v6.sin6_family = AF_INET6; addr->a.v6.sin6_port = 0; diff --git a/net/sctp/output.c b/net/sctp/output.c index bbef4a7a9b56..a46d1eb41762 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -93,8 +93,7 @@ struct sctp_packet *sctp_packet_config(struct sctp_packet *packet, { struct sctp_chunk *chunk = NULL; - SCTP_DEBUG_PRINTK("%s: packet:%p vtag:0x%x\n", __func__, - packet, vtag); + pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag); packet->vtag = vtag; @@ -119,8 +118,7 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet, struct sctp_association *asoc = transport->asoc; size_t overhead; - SCTP_DEBUG_PRINTK("%s: packet:%p transport:%p\n", __func__, - packet, transport); + pr_debug("%s: packet:%p transport:%p\n", __func__, packet, transport); packet->transport = transport; packet->source_port = sport; @@ -145,7 +143,7 @@ void sctp_packet_free(struct sctp_packet *packet) { struct sctp_chunk *chunk, *tmp; - SCTP_DEBUG_PRINTK("%s: packet:%p\n", __func__, packet); + pr_debug("%s: packet:%p\n", __func__, packet); list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { list_del_init(&chunk->list); @@ -167,8 +165,7 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, sctp_xmit_t retval; int error = 0; - SCTP_DEBUG_PRINTK("%s: packet:%p chunk:%p\n", __func__, - packet, chunk); + pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk); switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) { case SCTP_XMIT_PMTU_FULL: @@ -334,8 +331,7 @@ sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet, { sctp_xmit_t retval = SCTP_XMIT_OK; - SCTP_DEBUG_PRINTK("%s: packet:%p chunk:%p\n", __func__, packet, - chunk); + pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk); /* Data chunks are special. Before seeing what else we can * bundle into this packet, check to see if we are allowed to @@ -402,7 +398,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) unsigned char *auth = NULL; /* pointer to auth in skb data */ __u32 cksum_buf_len = sizeof(struct sctphdr); - SCTP_DEBUG_PRINTK("%s: packet:%p\n", __func__, packet); + pr_debug("%s: packet:%p\n", __func__, packet); /* Do NOT generate a chunkless packet. */ if (list_empty(&packet->chunk_list)) @@ -472,7 +468,9 @@ int sctp_packet_transmit(struct sctp_packet *packet) * * [This whole comment explains WORD_ROUND() below.] */ - SCTP_DEBUG_PRINTK("***sctp_transmit_packet***\n"); + + pr_debug("***sctp_transmit_packet***\n"); + list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { list_del_init(&chunk->list); if (sctp_chunk_is_data(chunk)) { @@ -505,16 +503,13 @@ int sctp_packet_transmit(struct sctp_packet *packet) memcpy(skb_put(nskb, chunk->skb->len), chunk->skb->data, chunk->skb->len); - SCTP_DEBUG_PRINTK("%s %p[%s] %s 0x%x, %s %d, %s %d, %s %d\n", - "*** Chunk", chunk, - sctp_cname(SCTP_ST_CHUNK( - chunk->chunk_hdr->type)), - chunk->has_tsn ? "TSN" : "No TSN", - chunk->has_tsn ? - ntohl(chunk->subh.data_hdr->tsn) : 0, - "length", ntohs(chunk->chunk_hdr->length), - "chunk->skb->len", chunk->skb->len, - "rtt_in_progress", chunk->rtt_in_progress); + pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, " + "rtt_in_progress:%d\n", chunk, + sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)), + chunk->has_tsn ? "TSN" : "No TSN", + chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0, + ntohs(chunk->chunk_hdr->length), chunk->skb->len, + chunk->rtt_in_progress); /* * If this is a control chunk, this is our last @@ -606,8 +601,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) } } - SCTP_DEBUG_PRINTK("***sctp_transmit_packet*** skb len %d\n", - nskb->len); + pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len); nskb->local_df = packet->ipfragok; (*tp->af_specific->sctp_xmit)(nskb, tp); diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index be35e2dbcc9a..ef9e2bbc0f2f 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -299,10 +299,10 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) struct net *net = sock_net(q->asoc->base.sk); int error = 0; - SCTP_DEBUG_PRINTK("sctp_outq_tail(%p, %p[%s])\n", - q, chunk, chunk && chunk->chunk_hdr ? - sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) - : "Illegal Chunk"); + pr_debug("%s: outq:%p, chunk:%p[%s]\n", __func__, q, chunk, + chunk && chunk->chunk_hdr ? + sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : + "illegal chunk"); /* If it is data, queue it up, otherwise, send it * immediately. @@ -328,10 +328,10 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk) break; default: - SCTP_DEBUG_PRINTK("outqueueing (%p, %p[%s])\n", - q, chunk, chunk && chunk->chunk_hdr ? - sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) - : "Illegal Chunk"); + pr_debug("%s: outqueueing: outq:%p, chunk:%p[%s])\n", + __func__, q, chunk, chunk && chunk->chunk_hdr ? + sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : + "illegal chunk"); sctp_outq_tail_data(q, chunk); if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) @@ -460,14 +460,10 @@ void sctp_retransmit_mark(struct sctp_outq *q, } } - SCTP_DEBUG_PRINTK("%s: transport: %p, reason: %d, " - "cwnd: %d, ssthresh: %d, flight_size: %d, " - "pba: %d\n", __func__, - transport, reason, - transport->cwnd, transport->ssthresh, - transport->flight_size, - transport->partial_bytes_acked); - + pr_debug("%s: transport:%p, reason:%d, cwnd:%d, ssthresh:%d, " + "flight_size:%d, pba:%d\n", __func__, transport, reason, + transport->cwnd, transport->ssthresh, transport->flight_size, + transport->partial_bytes_acked); } /* Mark all the eligible packets on a transport for retransmission and force @@ -1014,19 +1010,13 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) sctp_transport_burst_limited(transport); } - SCTP_DEBUG_PRINTK("sctp_outq_flush(%p, %p[%s]), ", - q, chunk, - chunk && chunk->chunk_hdr ? - sctp_cname(SCTP_ST_CHUNK( - chunk->chunk_hdr->type)) - : "Illegal Chunk"); - - SCTP_DEBUG_PRINTK("TX TSN 0x%x skb->head " - "%p skb->users %d.\n", - ntohl(chunk->subh.data_hdr->tsn), - chunk->skb ?chunk->skb->head : NULL, - chunk->skb ? - atomic_read(&chunk->skb->users) : -1); + pr_debug("%s: outq:%p, chunk:%p[%s], tx-tsn:0x%x skb->head:%p " + "skb->users:%d\n", + __func__, q, chunk, chunk && chunk->chunk_hdr ? + sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : + "illegal chunk", ntohl(chunk->subh.data_hdr->tsn), + chunk->skb ? chunk->skb->head : NULL, chunk->skb ? + atomic_read(&chunk->skb->users) : -1); /* Add the chunk to the packet. */ status = sctp_packet_transmit_chunk(packet, chunk, 0); @@ -1038,10 +1028,10 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) /* We could not append this chunk, so put * the chunk back on the output queue. */ - SCTP_DEBUG_PRINTK("sctp_outq_flush: could " - "not transmit TSN: 0x%x, status: %d\n", - ntohl(chunk->subh.data_hdr->tsn), - status); + pr_debug("%s: could not transmit tsn:0x%x, status:%d\n", + __func__, ntohl(chunk->subh.data_hdr->tsn), + status); + sctp_outq_head_data(q, chunk); goto sctp_flush_out; break; @@ -1284,11 +1274,10 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) sctp_generate_fwdtsn(q, sack_ctsn); - SCTP_DEBUG_PRINTK("%s: sack Cumulative TSN Ack is 0x%x.\n", - __func__, sack_ctsn); - SCTP_DEBUG_PRINTK("%s: Cumulative TSN Ack of association, " - "%p is 0x%x. Adv peer ack point: 0x%x\n", - __func__, asoc, ctsn, asoc->adv_peer_ack_point); + pr_debug("%s: sack cumulative tsn ack:0x%x\n", __func__, sack_ctsn); + pr_debug("%s: cumulative tsn ack of assoc:%p is 0x%x, " + "advertised peer ack point:0x%x\n", __func__, asoc, ctsn, + asoc->adv_peer_ack_point); /* See if all chunks are acked. * Make sure the empty queue handler will get run later. @@ -1304,7 +1293,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) goto finish; } - SCTP_DEBUG_PRINTK("sack queue is empty.\n"); + pr_debug("%s: sack queue is empty\n", __func__); finish: return q->empty; } @@ -1345,21 +1334,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, __u8 restart_timer = 0; int bytes_acked = 0; int migrate_bytes = 0; - - /* These state variables are for coherent debug output. --xguo */ - -#if SCTP_DEBUG - __u32 dbg_ack_tsn = 0; /* An ACKed TSN range starts here... */ - __u32 dbg_last_ack_tsn = 0; /* ...and finishes here. */ - __u32 dbg_kept_tsn = 0; /* An un-ACKed range starts here... */ - __u32 dbg_last_kept_tsn = 0; /* ...and finishes here. */ - - /* 0 : The last TSN was ACKed. - * 1 : The last TSN was NOT ACKed (i.e. KEPT). - * -1: We need to initialize. - */ - int dbg_prt_state = -1; -#endif /* SCTP_DEBUG */ + bool forward_progress = false; sack_ctsn = ntohl(sack->cum_tsn_ack); @@ -1426,6 +1401,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, bytes_acked += sctp_data_size(tchunk); if (!tchunk->transport) migrate_bytes += sctp_data_size(tchunk); + forward_progress = true; } if (TSN_lte(tsn, sack_ctsn)) { @@ -1439,6 +1415,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, * current RTO. */ restart_timer = 1; + forward_progress = true; if (!tchunk->tsn_gap_acked) { /* @@ -1482,57 +1459,11 @@ static void sctp_check_transmitted(struct sctp_outq *q, */ list_add_tail(lchunk, &tlist); } - -#if SCTP_DEBUG - switch (dbg_prt_state) { - case 0: /* last TSN was ACKed */ - if (dbg_last_ack_tsn + 1 == tsn) { - /* This TSN belongs to the - * current ACK range. - */ - break; - } - - if (dbg_last_ack_tsn != dbg_ack_tsn) { - /* Display the end of the - * current range. - */ - SCTP_DEBUG_PRINTK_CONT("-%08x", - dbg_last_ack_tsn); - } - - /* Start a new range. */ - SCTP_DEBUG_PRINTK_CONT(",%08x", tsn); - dbg_ack_tsn = tsn; - break; - - case 1: /* The last TSN was NOT ACKed. */ - if (dbg_last_kept_tsn != dbg_kept_tsn) { - /* Display the end of current range. */ - SCTP_DEBUG_PRINTK_CONT("-%08x", - dbg_last_kept_tsn); - } - - SCTP_DEBUG_PRINTK_CONT("\n"); - - /* FALL THROUGH... */ - default: - /* This is the first-ever TSN we examined. */ - /* Start a new range of ACK-ed TSNs. */ - SCTP_DEBUG_PRINTK("ACKed: %08x", tsn); - dbg_prt_state = 0; - dbg_ack_tsn = tsn; - } - - dbg_last_ack_tsn = tsn; -#endif /* SCTP_DEBUG */ - } else { if (tchunk->tsn_gap_acked) { - SCTP_DEBUG_PRINTK("%s: Receiver reneged on " - "data TSN: 0x%x\n", - __func__, - tsn); + pr_debug("%s: receiver reneged on data TSN:0x%x\n", + __func__, tsn); + tchunk->tsn_gap_acked = 0; if (tchunk->transport) @@ -1551,59 +1482,9 @@ static void sctp_check_transmitted(struct sctp_outq *q, } list_add_tail(lchunk, &tlist); - -#if SCTP_DEBUG - /* See the above comments on ACK-ed TSNs. */ - switch (dbg_prt_state) { - case 1: - if (dbg_last_kept_tsn + 1 == tsn) - break; - - if (dbg_last_kept_tsn != dbg_kept_tsn) - SCTP_DEBUG_PRINTK_CONT("-%08x", - dbg_last_kept_tsn); - - SCTP_DEBUG_PRINTK_CONT(",%08x", tsn); - dbg_kept_tsn = tsn; - break; - - case 0: - if (dbg_last_ack_tsn != dbg_ack_tsn) - SCTP_DEBUG_PRINTK_CONT("-%08x", - dbg_last_ack_tsn); - SCTP_DEBUG_PRINTK_CONT("\n"); - - /* FALL THROUGH... */ - default: - SCTP_DEBUG_PRINTK("KEPT: %08x",tsn); - dbg_prt_state = 1; - dbg_kept_tsn = tsn; - } - - dbg_last_kept_tsn = tsn; -#endif /* SCTP_DEBUG */ } } -#if SCTP_DEBUG - /* Finish off the last range, displaying its ending TSN. */ - switch (dbg_prt_state) { - case 0: - if (dbg_last_ack_tsn != dbg_ack_tsn) { - SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_ack_tsn); - } else { - SCTP_DEBUG_PRINTK_CONT("\n"); - } - break; - - case 1: - if (dbg_last_kept_tsn != dbg_kept_tsn) { - SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_kept_tsn); - } else { - SCTP_DEBUG_PRINTK_CONT("\n"); - } - } -#endif /* SCTP_DEBUG */ if (transport) { if (bytes_acked) { struct sctp_association *asoc = transport->asoc; @@ -1625,6 +1506,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, */ transport->error_count = 0; transport->asoc->overall_error_count = 0; + forward_progress = true; /* * While in SHUTDOWN PENDING, we may have started @@ -1676,9 +1558,9 @@ static void sctp_check_transmitted(struct sctp_outq *q, !list_empty(&tlist) && (sack_ctsn+2 == q->asoc->next_tsn) && q->asoc->state < SCTP_STATE_SHUTDOWN_PENDING) { - SCTP_DEBUG_PRINTK("%s: SACK received for zero " - "window probe: %u\n", - __func__, sack_ctsn); + pr_debug("%s: sack received for zero window " + "probe:%u\n", __func__, sack_ctsn); + q->asoc->overall_error_count = 0; transport->error_count = 0; } @@ -1698,6 +1580,11 @@ static void sctp_check_transmitted(struct sctp_outq *q, jiffies + transport->rto)) sctp_transport_hold(transport); } + + if (forward_progress) { + if (transport->dst) + dst_confirm(transport->dst); + } } list_splice(&tlist, transmitted_queue); @@ -1739,10 +1626,8 @@ static void sctp_mark_missing(struct sctp_outq *q, count_of_newacks, tsn)) { chunk->tsn_missing_report++; - SCTP_DEBUG_PRINTK( - "%s: TSN 0x%x missing counter: %d\n", - __func__, tsn, - chunk->tsn_missing_report); + pr_debug("%s: tsn:0x%x missing counter:%d\n", + __func__, tsn, chunk->tsn_missing_report); } } /* @@ -1762,11 +1647,10 @@ static void sctp_mark_missing(struct sctp_outq *q, if (do_fast_retransmit) sctp_retransmit(q, transport, SCTP_RTXR_FAST_RTX); - SCTP_DEBUG_PRINTK("%s: transport: %p, cwnd: %d, " - "ssthresh: %d, flight_size: %d, pba: %d\n", - __func__, transport, transport->cwnd, - transport->ssthresh, transport->flight_size, - transport->partial_bytes_acked); + pr_debug("%s: transport:%p, cwnd:%d, ssthresh:%d, " + "flight_size:%d, pba:%d\n", __func__, transport, + transport->cwnd, transport->ssthresh, + transport->flight_size, transport->partial_bytes_acked); } } diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 4e45ee35d0db..62526c477050 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -134,9 +134,15 @@ static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_commo struct sctp_af *af; if (epb->type == SCTP_EP_TYPE_ASSOCIATION) { - asoc = sctp_assoc(epb); - peer = asoc->peer.primary_path; - primary = &peer->saddr; + asoc = sctp_assoc(epb); + + peer = asoc->peer.primary_path; + if (unlikely(peer == NULL)) { + WARN(1, "Association %p with NULL primary path!\n", asoc); + return; + } + + primary = &peer->saddr; } rcu_read_lock(); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index eaee00c61139..4a17494d736c 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -153,7 +153,7 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { /* Add the address to the local list. */ - addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC); + addr = kzalloc(sizeof(*addr), GFP_ATOMIC); if (addr) { addr->a.v4.sin_family = AF_INET; addr->a.v4.sin_port = 0; @@ -178,7 +178,7 @@ static void sctp_get_local_addr_list(struct net *net) rcu_read_lock(); for_each_netdev_rcu(net, dev) { - __list_for_each(pos, &sctp_address_families) { + list_for_each(pos, &sctp_address_families) { af = list_entry(pos, struct sctp_af, list); af->copy_addrlist(&net->sctp.local_addr_list, dev); } @@ -451,8 +451,8 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, fl4->fl4_sport = saddr->v4.sin_port; } - SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ", - __func__, &fl4->daddr, &fl4->saddr); + pr_debug("%s: dst:%pI4, src:%pI4 - ", __func__, &fl4->daddr, + &fl4->saddr); rt = ip_route_output_key(sock_net(sk), fl4); if (!IS_ERR(rt)) @@ -513,10 +513,10 @@ out_unlock: out: t->dst = dst; if (dst) - SCTP_DEBUG_PRINTK("rt_dst:%pI4, rt_src:%pI4\n", - &fl4->daddr, &fl4->saddr); + pr_debug("rt_dst:%pI4, rt_src:%pI4\n", + &fl4->daddr, &fl4->saddr); else - SCTP_DEBUG_PRINTK("NO ROUTE\n"); + pr_debug("no route\n"); } /* For v4, the source address is cached in the route entry(dst). So no need @@ -604,9 +604,9 @@ static void sctp_addr_wq_timeout_handler(unsigned long arg) spin_lock_bh(&net->sctp.addr_wq_lock); list_for_each_entry_safe(addrw, temp, &net->sctp.addr_waitq, list) { - SCTP_DEBUG_PRINTK_IPADDR("sctp_addrwq_timo_handler: the first ent in wq %p is ", - " for cmd %d at entry %p\n", &net->sctp.addr_waitq, &addrw->a, addrw->state, - addrw); + pr_debug("%s: the first ent in wq:%p is addr:%pISc for cmd:%d at " + "entry:%p\n", __func__, &net->sctp.addr_waitq, &addrw->a.sa, + addrw->state, addrw); #if IS_ENABLED(CONFIG_IPV6) /* Now we send an ASCONF for each association */ @@ -623,8 +623,10 @@ static void sctp_addr_wq_timeout_handler(unsigned long arg) addrw->state == SCTP_ADDR_NEW) { unsigned long timeo_val; - SCTP_DEBUG_PRINTK("sctp_timo_handler: this is on DAD, trying %d sec later\n", - SCTP_ADDRESS_TICK_DELAY); + pr_debug("%s: this is on DAD, trying %d sec " + "later\n", __func__, + SCTP_ADDRESS_TICK_DELAY); + timeo_val = jiffies; timeo_val += msecs_to_jiffies(SCTP_ADDRESS_TICK_DELAY); mod_timer(&net->sctp.addr_wq_timer, timeo_val); @@ -641,7 +643,7 @@ static void sctp_addr_wq_timeout_handler(unsigned long arg) continue; sctp_bh_lock_sock(sk); if (sctp_asconf_mgmt(sp, addrw) < 0) - SCTP_DEBUG_PRINTK("sctp_addrwq_timo_handler: sctp_asconf_mgmt failed\n"); + pr_debug("%s: sctp_asconf_mgmt failed\n", __func__); sctp_bh_unlock_sock(sk); } #if IS_ENABLED(CONFIG_IPV6) @@ -707,9 +709,10 @@ void sctp_addr_wq_mgmt(struct net *net, struct sctp_sockaddr_entry *addr, int cm addrw = sctp_addr_wq_lookup(net, addr); if (addrw) { if (addrw->state != cmd) { - SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt offsets existing entry for %d ", - " in wq %p\n", addrw->state, &addrw->a, - &net->sctp.addr_waitq); + pr_debug("%s: offsets existing entry for %d, addr:%pISc " + "in wq:%p\n", __func__, addrw->state, &addrw->a.sa, + &net->sctp.addr_waitq); + list_del(&addrw->list); kfree(addrw); } @@ -725,8 +728,9 @@ void sctp_addr_wq_mgmt(struct net *net, struct sctp_sockaddr_entry *addr, int cm } addrw->state = cmd; list_add_tail(&addrw->list, &net->sctp.addr_waitq); - SCTP_DEBUG_PRINTK_IPADDR("sctp_addr_wq_mgmt add new entry for cmd:%d ", - " in wq %p\n", addrw->state, &addrw->a, &net->sctp.addr_waitq); + + pr_debug("%s: add new entry for cmd:%d, addr:%pISc in wq:%p\n", + __func__, addrw->state, &addrw->a.sa, &net->sctp.addr_waitq); if (!timer_pending(&net->sctp.addr_wq_timer)) { timeo_val = jiffies; @@ -952,15 +956,14 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, { struct inet_sock *inet = inet_sk(skb->sk); - SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", - __func__, skb, skb->len, - &transport->fl.u.ip4.saddr, - &transport->fl.u.ip4.daddr); + pr_debug("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", __func__, skb, + skb->len, &transport->fl.u.ip4.saddr, &transport->fl.u.ip4.daddr); inet->pmtudisc = transport->param_flags & SPP_PMTUD_ENABLE ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT; SCTP_INC_STATS(sock_net(&inet->sk), SCTP_MIB_OUTSCTPPACKS); + return ip_queue_xmit(skb, &transport->fl); } @@ -1312,7 +1315,7 @@ static struct pernet_operations sctp_net_ops = { }; /* Initialize the universe into something sensible. */ -SCTP_STATIC __init int sctp_init(void) +static __init int sctp_init(void) { int i; int status = -EINVAL; @@ -1321,9 +1324,8 @@ SCTP_STATIC __init int sctp_init(void) int max_share; int order; - /* SCTP_DEBUG sanity check. */ - if (!sctp_sanity_check()) - goto out; + BUILD_BUG_ON(sizeof(struct sctp_ulpevent) > + sizeof(((struct sk_buff *) 0)->cb)); /* Allocate bind_bucket and chunk caches. */ status = -ENOBUFS; @@ -1499,7 +1501,7 @@ err_chunk_cachep: } /* Exit handler for the SCTP protocol. */ -SCTP_STATIC __exit void sctp_exit(void) +static __exit void sctp_exit(void) { /* BUG. This should probably do something useful like clean * up all the remaining associations and all that memory. diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index cf579e71cff0..362ae6e2fd93 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -68,9 +68,8 @@ #include <net/sctp/sctp.h> #include <net/sctp/sm.h> -SCTP_STATIC -struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, - __u8 type, __u8 flags, int paylen); +static struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, + __u8 type, __u8 flags, int paylen); static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, const struct sctp_association *asoc, const struct sctp_chunk *init_chunk, @@ -742,7 +741,8 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) memset(gabs, 0, sizeof(gabs)); ctsn = sctp_tsnmap_get_ctsn(map); - SCTP_DEBUG_PRINTK("sackCTSNAck sent: 0x%x.\n", ctsn); + + pr_debug("%s: sackCTSNAck sent:0x%x\n", __func__, ctsn); /* How much room is needed in the chunk? */ num_gabs = sctp_tsnmap_num_gabs(map, gabs); @@ -1288,10 +1288,8 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb, if (!retval) goto nodata; - - if (!sk) { - SCTP_DEBUG_PRINTK("chunkifying skb %p w/o an sk\n", skb); - } + if (!sk) + pr_debug("%s: chunkifying skb:%p w/o an sk\n", __func__, skb); INIT_LIST_HEAD(&retval->list); retval->skb = skb; @@ -1353,9 +1351,8 @@ const union sctp_addr *sctp_source(const struct sctp_chunk *chunk) /* Create a new chunk, setting the type and flags headers from the * arguments, reserving enough space for a 'paylen' byte payload. */ -SCTP_STATIC -struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, - __u8 type, __u8 flags, int paylen) +static struct sctp_chunk *sctp_make_chunk(const struct sctp_association *asoc, + __u8 type, __u8 flags, int paylen) { struct sctp_chunk *retval; sctp_chunkhdr_t *chunk_hdr; @@ -1632,8 +1629,8 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, cookie->c.adaptation_ind = asoc->peer.adaptation_ind; /* Set an expiration time for the cookie. */ - do_gettimeofday(&cookie->c.expiration); - TIMEVAL_ADD(asoc->cookie_life, cookie->c.expiration); + cookie->c.expiration = ktime_add(asoc->cookie_life, + ktime_get()); /* Copy the peer's init packet. */ memcpy(&cookie->c.peer_init[0], init_chunk->chunk_hdr, @@ -1682,7 +1679,7 @@ struct sctp_association *sctp_unpack_cookie( unsigned int len; sctp_scope_t scope; struct sk_buff *skb = chunk->skb; - struct timeval tv; + ktime_t kt; struct hash_desc desc; /* Header size is static data prior to the actual cookie, including @@ -1759,11 +1756,11 @@ no_hmac: * down the new association establishment instead of every packet. */ if (sock_flag(ep->base.sk, SOCK_TIMESTAMP)) - skb_get_timestamp(skb, &tv); + kt = skb_get_ktime(skb); else - do_gettimeofday(&tv); + kt = ktime_get(); - if (!asoc && tv_lt(bear_cookie->expiration, tv)) { + if (!asoc && ktime_compare(bear_cookie->expiration, kt) < 0) { /* * Section 3.3.10.3 Stale Cookie Error (3) * @@ -1775,9 +1772,7 @@ no_hmac: len = ntohs(chunk->chunk_hdr->length); *errp = sctp_make_op_error_space(asoc, chunk, len); if (*errp) { - suseconds_t usecs = (tv.tv_sec - - bear_cookie->expiration.tv_sec) * 1000000L + - tv.tv_usec - bear_cookie->expiration.tv_usec; + suseconds_t usecs = ktime_to_us(ktime_sub(kt, bear_cookie->expiration)); __be32 n = htonl(usecs); sctp_init_cause(*errp, SCTP_ERROR_STALE_COOKIE, @@ -2195,8 +2190,9 @@ static sctp_ierror_t sctp_verify_param(struct net *net, break; fallthrough: default: - SCTP_DEBUG_PRINTK("Unrecognized param: %d for chunk %d.\n", - ntohs(param.p->type), cid); + pr_debug("%s: unrecognized param:%d for chunk:%d\n", + __func__, ntohs(param.p->type), cid); + retval = sctp_process_unk_param(asoc, param, chunk, err_chunk); break; } @@ -2516,12 +2512,11 @@ do_addr_param: /* Suggested Cookie Life span increment's unit is msec, * (1/1000sec). */ - asoc->cookie_life.tv_sec += stale / 1000; - asoc->cookie_life.tv_usec += (stale % 1000) * 1000; + asoc->cookie_life = ktime_add_ms(asoc->cookie_life, stale); break; case SCTP_PARAM_HOST_NAME_ADDRESS: - SCTP_DEBUG_PRINTK("unimplemented SCTP_HOST_NAME_ADDRESS\n"); + pr_debug("%s: unimplemented SCTP_HOST_NAME_ADDRESS\n", __func__); break; case SCTP_PARAM_SUPPORTED_ADDRESS_TYPES: @@ -2667,8 +2662,8 @@ fall_through: * called prior to this routine. Simply log the error * here. */ - SCTP_DEBUG_PRINTK("Ignoring param: %d for association %p.\n", - ntohs(param.p->type), asoc); + pr_debug("%s: ignoring param:%d for association:%p.\n", + __func__, ntohs(param.p->type), asoc); break; } @@ -2810,7 +2805,10 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, totallen += paramlen; totallen += addr_param_len; del_pickup = 1; - SCTP_DEBUG_PRINTK("mkasconf_update_ip: picked same-scope del_pending addr, totallen for all addresses is %d\n", totallen); + + pr_debug("%s: picked same-scope del_pending addr, " + "totallen for all addresses is %d\n", + __func__, totallen); } } diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 8aab894aeabe..9da68852ee94 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -257,7 +257,7 @@ void sctp_generate_t3_rtx_event(unsigned long peer) sctp_bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { - SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__); + pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ if (!mod_timer(&transport->T3_rtx_timer, jiffies + (HZ/20))) @@ -297,9 +297,8 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc, sctp_bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { - SCTP_DEBUG_PRINTK("%s:Sock is busy: timer %d\n", - __func__, - timeout_type); + pr_debug("%s: sock is busy: timer %d\n", __func__, + timeout_type); /* Try again later. */ if (!mod_timer(&asoc->timers[timeout_type], jiffies + (HZ/20))) @@ -377,7 +376,7 @@ void sctp_generate_heartbeat_event(unsigned long data) sctp_bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { - SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__); + pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ if (!mod_timer(&transport->hb_timer, jiffies + (HZ/20))) @@ -415,7 +414,7 @@ void sctp_generate_proto_unreach_event(unsigned long data) sctp_bh_lock_sock(asoc->base.sk); if (sock_owned_by_user(asoc->base.sk)) { - SCTP_DEBUG_PRINTK("%s:Sock is busy.\n", __func__); + pr_debug("%s: sock is busy\n", __func__); /* Try again later. */ if (!mod_timer(&transport->proto_unreach_timer, @@ -521,11 +520,9 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, if (transport->state != SCTP_INACTIVE && (transport->error_count > transport->pathmaxrxt)) { - SCTP_DEBUG_PRINTK_IPADDR("transport_strike:association %p", - " transport IP: port:%d failed.\n", - asoc, - (&transport->ipaddr), - ntohs(transport->ipaddr.v4.sin_port)); + pr_debug("%s: association:%p transport addr:%pISpc failed\n", + __func__, asoc, &transport->ipaddr.sa); + sctp_assoc_control_transport(asoc, transport, SCTP_TRANSPORT_DOWN, SCTP_FAILED_THRESHOLD); @@ -733,6 +730,12 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP, SCTP_HEARTBEAT_SUCCESS); + /* HB-ACK was received for a the proper HB. Consider this + * forward progress. + */ + if (t->dst) + dst_confirm(t->dst); + /* The receiver of the HEARTBEAT ACK should also perform an * RTT measurement for that destination transport address * using the time value carried in the HEARTBEAT ACK chunk. @@ -804,8 +807,7 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds, asoc->state = state; - SCTP_DEBUG_PRINTK("sctp_cmd_new_state: asoc %p[%s]\n", - asoc, sctp_state_tbl[state]); + pr_debug("%s: asoc:%p[%s]\n", __func__, asoc, sctp_state_tbl[state]); if (sctp_style(sk, TCP)) { /* Change the sk->sk_state of a TCP-style socket that has @@ -864,6 +866,7 @@ static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds, (!asoc->temp) && (sk->sk_shutdown != SHUTDOWN_MASK)) return; + BUG_ON(asoc->peer.primary_path == NULL); sctp_unhash_established(asoc); sctp_association_free(asoc); } @@ -1016,15 +1019,11 @@ static void sctp_cmd_t1_timer_update(struct sctp_association *asoc, asoc->timeouts[timer] = asoc->max_init_timeo; } asoc->init_cycle++; - SCTP_DEBUG_PRINTK( - "T1 %s Timeout adjustment" - " init_err_counter: %d" - " cycle: %d" - " timeout: %ld\n", - name, - asoc->init_err_counter, - asoc->init_cycle, - asoc->timeouts[timer]); + + pr_debug("%s: T1[%s] timeout adjustment init_err_counter:%d" + " cycle:%d timeout:%ld\n", __func__, name, + asoc->init_err_counter, asoc->init_cycle, + asoc->timeouts[timer]); } } @@ -1079,23 +1078,19 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc) * main flow of sctp_do_sm() to keep attention focused on the real * functionality there. */ -#define DEBUG_PRE \ - SCTP_DEBUG_PRINTK("sctp_do_sm prefn: " \ - "ep %p, %s, %s, asoc %p[%s], %s\n", \ - ep, sctp_evttype_tbl[event_type], \ - (*debug_fn)(subtype), asoc, \ - sctp_state_tbl[state], state_fn->name) - -#define DEBUG_POST \ - SCTP_DEBUG_PRINTK("sctp_do_sm postfn: " \ - "asoc %p, status: %s\n", \ - asoc, sctp_status_tbl[status]) - -#define DEBUG_POST_SFX \ - SCTP_DEBUG_PRINTK("sctp_do_sm post sfx: error %d, asoc %p[%s]\n", \ - error, asoc, \ - sctp_state_tbl[(asoc && sctp_id2assoc(ep->base.sk, \ - sctp_assoc2id(asoc)))?asoc->state:SCTP_STATE_CLOSED]) +#define debug_pre_sfn() \ + pr_debug("%s[pre-fn]: ep:%p, %s, %s, asoc:%p[%s], %s\n", __func__, \ + ep, sctp_evttype_tbl[event_type], (*debug_fn)(subtype), \ + asoc, sctp_state_tbl[state], state_fn->name) + +#define debug_post_sfn() \ + pr_debug("%s[post-fn]: asoc:%p, status:%s\n", __func__, asoc, \ + sctp_status_tbl[status]) + +#define debug_post_sfx() \ + pr_debug("%s[post-sfx]: error:%d, asoc:%p[%s]\n", __func__, error, \ + asoc, sctp_state_tbl[(asoc && sctp_id2assoc(ep->base.sk, \ + sctp_assoc2id(asoc))) ? asoc->state : SCTP_STATE_CLOSED]) /* * This is the master state machine processing function. @@ -1115,7 +1110,6 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, sctp_disposition_t status; int error = 0; typedef const char *(printfn_t)(sctp_subtype_t); - static printfn_t *table[] = { NULL, sctp_cname, sctp_tname, sctp_oname, sctp_pname, }; @@ -1128,21 +1122,18 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, sctp_init_cmd_seq(&commands); - DEBUG_PRE; + debug_pre_sfn(); status = (*state_fn->fn)(net, ep, asoc, subtype, event_arg, &commands); - DEBUG_POST; + debug_post_sfn(); error = sctp_side_effects(event_type, subtype, state, ep, asoc, event_arg, status, &commands, gfp); - DEBUG_POST_SFX; + debug_post_sfx(); return error; } -#undef DEBUG_PRE -#undef DEBUG_POST - /***************************************************************** * This the master state function side effect processing function. *****************************************************************/ @@ -1171,9 +1162,9 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, switch (status) { case SCTP_DISPOSITION_DISCARD: - SCTP_DEBUG_PRINTK("Ignored sctp protocol event - state %d, " - "event_type %d, event_id %d\n", - state, event_type, subtype.chunk); + pr_debug("%s: ignored sctp protocol event - state:%d, " + "event_type:%d, event_id:%d\n", __func__, state, + event_type, subtype.chunk); break; case SCTP_DISPOSITION_NOMEM: @@ -1274,8 +1265,10 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_outq_uncork(&asoc->outqueue); local_cork = 0; } - asoc = cmd->obj.asoc; + /* Register with the endpoint. */ + asoc = cmd->obj.asoc; + BUG_ON(asoc->peer.primary_path == NULL); sctp_endpoint_add_asoc(ep, asoc); sctp_hash_established(asoc); break; @@ -1422,18 +1415,18 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_CHUNK_ULP: /* Send a chunk to the sockets layer. */ - SCTP_DEBUG_PRINTK("sm_sideff: %s %p, %s %p.\n", - "chunk_up:", cmd->obj.chunk, - "ulpq:", &asoc->ulpq); + pr_debug("%s: sm_sideff: chunk_up:%p, ulpq:%p\n", + __func__, cmd->obj.chunk, &asoc->ulpq); + sctp_ulpq_tail_data(&asoc->ulpq, cmd->obj.chunk, GFP_ATOMIC); break; case SCTP_CMD_EVENT_ULP: /* Send a notification to the sockets layer. */ - SCTP_DEBUG_PRINTK("sm_sideff: %s %p, %s %p.\n", - "event_up:",cmd->obj.ulpevent, - "ulpq:",&asoc->ulpq); + pr_debug("%s: sm_sideff: event_up:%p, ulpq:%p\n", + __func__, cmd->obj.ulpevent, &asoc->ulpq); + sctp_ulpq_tail_event(&asoc->ulpq, cmd->obj.ulpevent); break; @@ -1598,7 +1591,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, break; case SCTP_CMD_REPORT_BAD_TAG: - SCTP_DEBUG_PRINTK("vtag mismatch!\n"); + pr_debug("%s: vtag mismatch!\n", __func__); break; case SCTP_CMD_STRIKE: diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index de1a0138317f..f6b7109195a6 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1179,9 +1179,9 @@ sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net, /* Check if the timestamp looks valid. */ if (time_after(hbinfo->sent_at, jiffies) || time_after(jiffies, hbinfo->sent_at + max_interval)) { - SCTP_DEBUG_PRINTK("%s: HEARTBEAT ACK with invalid timestamp " - "received for transport: %p\n", - __func__, link); + pr_debug("%s: HEARTBEAT ACK with invalid timestamp received " + "for transport:%p\n", __func__, link); + return SCTP_DISPOSITION_DISCARD; } @@ -2562,7 +2562,8 @@ static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net, const struct sctp_association *asoc, struct sctp_transport *transport) { - SCTP_DEBUG_PRINTK("ABORT received (INIT).\n"); + pr_debug("%s: ABORT received (INIT)\n", __func__); + sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS); @@ -2572,6 +2573,7 @@ static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net, /* CMD_INIT_FAILED will DELETE_TCB. */ sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, SCTP_PERR(error)); + return SCTP_DISPOSITION_ABORT; } @@ -2637,8 +2639,9 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net, ctsn = ntohl(sdh->cum_tsn_ack); if (TSN_lt(ctsn, asoc->ctsn_ack_point)) { - SCTP_DEBUG_PRINTK("ctsn %x\n", ctsn); - SCTP_DEBUG_PRINTK("ctsn_ack_point %x\n", asoc->ctsn_ack_point); + pr_debug("%s: ctsn:%x, ctsn_ack_point:%x\n", __func__, ctsn, + asoc->ctsn_ack_point); + return SCTP_DISPOSITION_DISCARD; } @@ -2721,8 +2724,9 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net, ctsn = ntohl(sdh->cum_tsn_ack); if (TSN_lt(ctsn, asoc->ctsn_ack_point)) { - SCTP_DEBUG_PRINTK("ctsn %x\n", ctsn); - SCTP_DEBUG_PRINTK("ctsn_ack_point %x\n", asoc->ctsn_ack_point); + pr_debug("%s: ctsn:%x, ctsn_ack_point:%x\n", __func__, ctsn, + asoc->ctsn_ack_point); + return SCTP_DISPOSITION_DISCARD; } @@ -3174,8 +3178,9 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net, * Point indicates an out-of-order SACK. */ if (TSN_lt(ctsn, asoc->ctsn_ack_point)) { - SCTP_DEBUG_PRINTK("ctsn %x\n", ctsn); - SCTP_DEBUG_PRINTK("ctsn_ack_point %x\n", asoc->ctsn_ack_point); + pr_debug("%s: ctsn:%x, ctsn_ack_point:%x\n", __func__, ctsn, + asoc->ctsn_ack_point); + return SCTP_DISPOSITION_DISCARD; } @@ -3859,7 +3864,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net, skb_pull(chunk->skb, len); tsn = ntohl(fwdtsn_hdr->new_cum_tsn); - SCTP_DEBUG_PRINTK("%s: TSN 0x%x.\n", __func__, tsn); + pr_debug("%s: TSN 0x%x\n", __func__, tsn); /* The TSN is too high--silently discard the chunk and count on it * getting retransmitted later. @@ -3927,7 +3932,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn_fast( skb_pull(chunk->skb, len); tsn = ntohl(fwdtsn_hdr->new_cum_tsn); - SCTP_DEBUG_PRINTK("%s: TSN 0x%x.\n", __func__, tsn); + pr_debug("%s: TSN 0x%x\n", __func__, tsn); /* The TSN is too high--silently discard the chunk and count on it * getting retransmitted later. @@ -4166,7 +4171,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, struct sctp_chunk *err_chunk; sctp_chunkhdr_t *hdr; - SCTP_DEBUG_PRINTK("Processing the unknown chunk id %d.\n", type.chunk); + pr_debug("%s: processing unknown chunk id:%d\n", __func__, type.chunk); if (!sctp_vtag_verify(unk_chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -4256,7 +4261,8 @@ sctp_disposition_t sctp_sf_discard_chunk(struct net *net, return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); - SCTP_DEBUG_PRINTK("Chunk %d is discarded\n", type.chunk); + pr_debug("%s: chunk:%d is discarded\n", __func__, type.chunk); + return SCTP_DISPOSITION_DISCARD; } @@ -4632,16 +4638,16 @@ sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net, if (!repl) goto nomem; + /* Choose transport for INIT. */ + sctp_add_cmd_sf(commands, SCTP_CMD_INIT_CHOOSE_TRANSPORT, + SCTP_CHUNK(repl)); + /* Cast away the const modifier, as we want to just * rerun it through as a sideffect. */ my_asoc = (struct sctp_association *)asoc; sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(my_asoc)); - /* Choose transport for INIT. */ - sctp_add_cmd_sf(commands, SCTP_CMD_INIT_CHOOSE_TRANSPORT, - SCTP_CHUNK(repl)); - /* After sending the INIT, "A" starts the T1-init timer and * enters the COOKIE-WAIT state. */ @@ -5184,7 +5190,9 @@ sctp_disposition_t sctp_sf_ignore_primitive( void *arg, sctp_cmd_seq_t *commands) { - SCTP_DEBUG_PRINTK("Primitive type %d is ignored.\n", type.primitive); + pr_debug("%s: primitive type:%d is ignored\n", __func__, + type.primitive); + return SCTP_DISPOSITION_DISCARD; } @@ -5379,7 +5387,9 @@ sctp_disposition_t sctp_sf_ignore_other(struct net *net, void *arg, sctp_cmd_seq_t *commands) { - SCTP_DEBUG_PRINTK("The event other type %d is ignored\n", type.other); + pr_debug("%s: the event other type:%d is ignored\n", + __func__, type.other); + return SCTP_DISPOSITION_DISCARD; } @@ -5527,7 +5537,8 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net, struct sctp_bind_addr *bp; int attempts = asoc->init_err_counter + 1; - SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n"); + pr_debug("%s: timer T1 expired (INIT)\n", __func__); + SCTP_INC_STATS(net, SCTP_MIB_T1_INIT_EXPIREDS); if (attempts <= asoc->max_init_attempts) { @@ -5546,9 +5557,10 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net, sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); } else { - SCTP_DEBUG_PRINTK("Giving up on INIT, attempts: %d" - " max_init_attempts: %d\n", - attempts, asoc->max_init_attempts); + pr_debug("%s: giving up on INIT, attempts:%d " + "max_init_attempts:%d\n", __func__, attempts, + asoc->max_init_attempts); + sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); sctp_add_cmd_sf(commands, SCTP_CMD_INIT_FAILED, @@ -5588,7 +5600,8 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net, struct sctp_chunk *repl = NULL; int attempts = asoc->init_err_counter + 1; - SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n"); + pr_debug("%s: timer T1 expired (COOKIE-ECHO)\n", __func__); + SCTP_INC_STATS(net, SCTP_MIB_T1_COOKIE_EXPIREDS); if (attempts <= asoc->max_init_attempts) { @@ -5636,7 +5649,8 @@ sctp_disposition_t sctp_sf_t2_timer_expire(struct net *net, { struct sctp_chunk *reply = NULL; - SCTP_DEBUG_PRINTK("Timer T2 expired.\n"); + pr_debug("%s: timer T2 expired\n", __func__); + SCTP_INC_STATS(net, SCTP_MIB_T2_SHUTDOWN_EXPIREDS); ((struct sctp_association *)asoc)->shutdown_retries++; @@ -5777,7 +5791,8 @@ sctp_disposition_t sctp_sf_t5_timer_expire(struct net *net, { struct sctp_chunk *reply = NULL; - SCTP_DEBUG_PRINTK("Timer T5 expired.\n"); + pr_debug("%s: timer T5 expired\n", __func__); + SCTP_INC_STATS(net, SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS); reply = sctp_make_abort(asoc, NULL, 0); @@ -5892,7 +5907,8 @@ sctp_disposition_t sctp_sf_timer_ignore(struct net *net, void *arg, sctp_cmd_seq_t *commands) { - SCTP_DEBUG_PRINTK("Timer %d ignored.\n", type.chunk); + pr_debug("%s: timer %d ignored\n", __func__, type.chunk); + return SCTP_DISPOSITION_CONSUME; } @@ -6102,7 +6118,7 @@ static int sctp_eat_data(const struct sctp_association *asoc, skb_pull(chunk->skb, sizeof(sctp_datahdr_t)); tsn = ntohl(data_hdr->tsn); - SCTP_DEBUG_PRINTK("eat_data: TSN 0x%x.\n", tsn); + pr_debug("%s: TSN 0x%x\n", __func__, tsn); /* ASSERT: Now skb->data is really the user data. */ @@ -6179,12 +6195,12 @@ static int sctp_eat_data(const struct sctp_association *asoc, */ if (sctp_tsnmap_has_gap(map) && (sctp_tsnmap_get_ctsn(map) + 1) == tsn) { - SCTP_DEBUG_PRINTK("Reneging for tsn:%u\n", tsn); + pr_debug("%s: reneging for tsn:%u\n", __func__, tsn); deliver = SCTP_CMD_RENEGE; } else { - SCTP_DEBUG_PRINTK("Discard tsn: %u len: %Zd, " - "rwnd: %d\n", tsn, datalen, - asoc->rwnd); + pr_debug("%s: discard tsn:%u len:%zu, rwnd:%d\n", + __func__, tsn, datalen, asoc->rwnd); + return SCTP_IERROR_IGNORE_TSN; } } @@ -6199,7 +6215,8 @@ static int sctp_eat_data(const struct sctp_association *asoc, if (*sk->sk_prot_creator->memory_pressure) { if (sctp_tsnmap_has_gap(map) && (sctp_tsnmap_get_ctsn(map) + 1) == tsn) { - SCTP_DEBUG_PRINTK("Under Pressure! Reneging for tsn:%u\n", tsn); + pr_debug("%s: under pressure, reneging for tsn:%u\n", + __func__, tsn); deliver = SCTP_CMD_RENEGE; } } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6abb1caf9836..c6670d2e3f8d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -84,11 +84,6 @@ #include <net/sctp/sctp.h> #include <net/sctp/sm.h> -/* WARNING: Please do not remove the SCTP_STATIC attribute to - * any of the functions below as they are used to export functions - * used by a project regression testsuite. - */ - /* Forward declarations for internal helper functions. */ static int sctp_writeable(struct sock *sk); static void sctp_wfree(struct sk_buff *skb); @@ -98,6 +93,7 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p); static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); static int sctp_wait_for_accept(struct sock *sk, long timeo); static void sctp_wait_for_close(struct sock *sk, long timeo); +static void sctp_destruct_sock(struct sock *sk); static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, union sctp_addr *addr, int len); static int sctp_bindx_add(struct sock *, struct sockaddr *, int); @@ -279,14 +275,14 @@ static struct sctp_transport *sctp_addr_id2transport(struct sock *sk, * sockaddr_in6 [RFC 2553]), * addr_len - the size of the address structure. */ -SCTP_STATIC int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len) +static int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len) { int retval = 0; sctp_lock_sock(sk); - SCTP_DEBUG_PRINTK("sctp_bind(sk: %p, addr: %p, addr_len: %d)\n", - sk, addr, addr_len); + pr_debug("%s: sk:%p, addr:%p, addr_len:%d\n", __func__, sk, + addr, addr_len); /* Disallow binding twice. */ if (!sctp_sk(sk)->ep->base.bind_addr.port) @@ -333,7 +329,7 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, } /* Bind a local address either to an endpoint or to an association. */ -SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) +static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) { struct net *net = sock_net(sk); struct sctp_sock *sp = sctp_sk(sk); @@ -346,19 +342,15 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) /* Common sockaddr verification. */ af = sctp_sockaddr_af(sp, addr, len); if (!af) { - SCTP_DEBUG_PRINTK("sctp_do_bind(sk: %p, newaddr: %p, len: %d) EINVAL\n", - sk, addr, len); + pr_debug("%s: sk:%p, newaddr:%p, len:%d EINVAL\n", + __func__, sk, addr, len); return -EINVAL; } snum = ntohs(addr->v4.sin_port); - SCTP_DEBUG_PRINTK_IPADDR("sctp_do_bind(sk: %p, new addr: ", - ", port: %d, new port: %d, len: %d)\n", - sk, - addr, - bp->port, snum, - len); + pr_debug("%s: sk:%p, new addr:%pISc, port:%d, new port:%d, len:%d\n", + __func__, sk, &addr->sa, bp->port, snum, len); /* PF specific bind() address verification. */ if (!sp->pf->bind_verify(sp, addr)) @@ -372,9 +364,8 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) if (!snum) snum = bp->port; else if (snum != bp->port) { - SCTP_DEBUG_PRINTK("sctp_do_bind:" - " New port %d does not match existing port " - "%d.\n", snum, bp->port); + pr_debug("%s: new port %d doesn't match existing port " + "%d\n", __func__, snum, bp->port); return -EINVAL; } } @@ -472,8 +463,8 @@ static int sctp_bindx_add(struct sock *sk, struct sockaddr *addrs, int addrcnt) struct sockaddr *sa_addr; struct sctp_af *af; - SCTP_DEBUG_PRINTK("sctp_bindx_add (sk: %p, addrs: %p, addrcnt: %d)\n", - sk, addrs, addrcnt); + pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", __func__, sk, + addrs, addrcnt); addr_buf = addrs; for (cnt = 0; cnt < addrcnt; cnt++) { @@ -539,11 +530,10 @@ static int sctp_send_asconf_add_ip(struct sock *sk, sp = sctp_sk(sk); ep = sp->ep; - SCTP_DEBUG_PRINTK("%s: (sk: %p, addrs: %p, addrcnt: %d)\n", - __func__, sk, addrs, addrcnt); + pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", + __func__, sk, addrs, addrcnt); list_for_each_entry(asoc, &ep->asocs, asocs) { - if (!asoc->peer.asconf_capable) continue; @@ -650,8 +640,8 @@ static int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt) union sctp_addr *sa_addr; struct sctp_af *af; - SCTP_DEBUG_PRINTK("sctp_bindx_rem (sk: %p, addrs: %p, addrcnt: %d)\n", - sk, addrs, addrcnt); + pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", + __func__, sk, addrs, addrcnt); addr_buf = addrs; for (cnt = 0; cnt < addrcnt; cnt++) { @@ -744,8 +734,8 @@ static int sctp_send_asconf_del_ip(struct sock *sk, sp = sctp_sk(sk); ep = sp->ep; - SCTP_DEBUG_PRINTK("%s: (sk: %p, addrs: %p, addrcnt: %d)\n", - __func__, sk, addrs, addrcnt); + pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", + __func__, sk, addrs, addrcnt); list_for_each_entry(asoc, &ep->asocs, asocs) { @@ -812,9 +802,11 @@ static int sctp_send_asconf_del_ip(struct sock *sk, sin6 = (struct sockaddr_in6 *)addrs; asoc->asconf_addr_del_pending->v6.sin6_addr = sin6->sin6_addr; } - SCTP_DEBUG_PRINTK_IPADDR("send_asconf_del_ip: keep the last address asoc: %p ", - " at %p\n", asoc, asoc->asconf_addr_del_pending, - asoc->asconf_addr_del_pending); + + pr_debug("%s: keep the last address asoc:%p %pISc at %p\n", + __func__, asoc, &asoc->asconf_addr_del_pending->sa, + asoc->asconf_addr_del_pending); + asoc->src_out_of_asoc_ok = 1; stored = 1; goto skip_mkasconf; @@ -964,9 +956,9 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw) * * Returns 0 if ok, <0 errno code on error. */ -SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk, - struct sockaddr __user *addrs, - int addrs_size, int op) +static int sctp_setsockopt_bindx(struct sock* sk, + struct sockaddr __user *addrs, + int addrs_size, int op) { struct sockaddr *kaddrs; int err; @@ -976,8 +968,8 @@ SCTP_STATIC int sctp_setsockopt_bindx(struct sock* sk, void *addr_buf; struct sctp_af *af; - SCTP_DEBUG_PRINTK("sctp_setsockopt_bindx: sk %p addrs %p" - " addrs_size %d opt %d\n", sk, addrs, addrs_size, op); + pr_debug("%s: sk:%p addrs:%p addrs_size:%d opt:%d\n", + __func__, sk, addrs, addrs_size, op); if (unlikely(addrs_size <= 0)) return -EINVAL; @@ -1235,10 +1227,9 @@ static int __sctp_connect(struct sock* sk, asoc = NULL; out_free: + pr_debug("%s: took out_free path with asoc:%p kaddrs:%p err:%d\n", + __func__, asoc, kaddrs, err); - SCTP_DEBUG_PRINTK("About to exit __sctp_connect() free asoc: %p" - " kaddrs: %p err: %d\n", - asoc, kaddrs, err); if (asoc) { /* sctp_primitive_ASSOCIATE may have added this association * To the hash table, try to unhash it, just in case, its a noop @@ -1312,7 +1303,7 @@ out_free: * * Returns >=0 if ok, <0 errno code on error. */ -SCTP_STATIC int __sctp_setsockopt_connectx(struct sock* sk, +static int __sctp_setsockopt_connectx(struct sock* sk, struct sockaddr __user *addrs, int addrs_size, sctp_assoc_t *assoc_id) @@ -1320,8 +1311,8 @@ SCTP_STATIC int __sctp_setsockopt_connectx(struct sock* sk, int err = 0; struct sockaddr *kaddrs; - SCTP_DEBUG_PRINTK("%s - sk %p addrs %p addrs_size %d\n", - __func__, sk, addrs, addrs_size); + pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n", + __func__, sk, addrs, addrs_size); if (unlikely(addrs_size <= 0)) return -EINVAL; @@ -1350,9 +1341,9 @@ SCTP_STATIC int __sctp_setsockopt_connectx(struct sock* sk, * This is an older interface. It's kept for backward compatibility * to the option that doesn't provide association id. */ -SCTP_STATIC int sctp_setsockopt_connectx_old(struct sock* sk, - struct sockaddr __user *addrs, - int addrs_size) +static int sctp_setsockopt_connectx_old(struct sock* sk, + struct sockaddr __user *addrs, + int addrs_size) { return __sctp_setsockopt_connectx(sk, addrs, addrs_size, NULL); } @@ -1363,9 +1354,9 @@ SCTP_STATIC int sctp_setsockopt_connectx_old(struct sock* sk, * indication to the call. Error is always negative and association id is * always positive. */ -SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk, - struct sockaddr __user *addrs, - int addrs_size) +static int sctp_setsockopt_connectx(struct sock* sk, + struct sockaddr __user *addrs, + int addrs_size) { sctp_assoc_t assoc_id = 0; int err = 0; @@ -1386,9 +1377,9 @@ SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk, * addrs_num structure member. That way we can re-use the existing * code. */ -SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len, - char __user *optval, - int __user *optlen) +static int sctp_getsockopt_connectx3(struct sock* sk, int len, + char __user *optval, + int __user *optlen) { struct sctp_getaddrs_old param; sctp_assoc_t assoc_id = 0; @@ -1464,7 +1455,7 @@ SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len, * shutdown phase does not finish during this period, close() will * return but the graceful shutdown phase continues in the system. */ -SCTP_STATIC void sctp_close(struct sock *sk, long timeout) +static void sctp_close(struct sock *sk, long timeout) { struct net *net = sock_net(sk); struct sctp_endpoint *ep; @@ -1472,7 +1463,7 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout) struct list_head *pos, *temp; unsigned int data_was_unread; - SCTP_DEBUG_PRINTK("sctp_close(sk: 0x%p, timeout:%ld)\n", sk, timeout); + pr_debug("%s: sk:%p, timeout:%ld\n", __func__, sk, timeout); sctp_lock_sock(sk); sk->sk_shutdown = SHUTDOWN_MASK; @@ -1573,10 +1564,10 @@ static int sctp_error(struct sock *sk, int flags, int err) */ /* BUG: We do not implement the equivalent of sk_stream_wait_memory(). */ -SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *); +static int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *); -SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t msg_len) +static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t msg_len) { struct net *net = sock_net(sk); struct sctp_sock *sp; @@ -1598,14 +1589,12 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, struct sctp_datamsg *datamsg; int msg_flags = msg->msg_flags; - SCTP_DEBUG_PRINTK("sctp_sendmsg(sk: %p, msg: %p, msg_len: %zu)\n", - sk, msg, msg_len); - err = 0; sp = sctp_sk(sk); ep = sp->ep; - SCTP_DEBUG_PRINTK("Using endpoint: %p.\n", ep); + pr_debug("%s: sk:%p, msg:%p, msg_len:%zu ep:%p\n", __func__, sk, + msg, msg_len, ep); /* We cannot send a message over a TCP-style listening socket. */ if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) { @@ -1615,9 +1604,8 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, /* Parse out the SCTP CMSGs. */ err = sctp_msghdr_parse(msg, &cmsgs); - if (err) { - SCTP_DEBUG_PRINTK("msghdr parse err = %x\n", err); + pr_debug("%s: msghdr parse err:%x\n", __func__, err); goto out_nounlock; } @@ -1649,8 +1637,8 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, associd = sinfo->sinfo_assoc_id; } - SCTP_DEBUG_PRINTK("msg_len: %zu, sinfo_flags: 0x%x\n", - msg_len, sinfo_flags); + pr_debug("%s: msg_len:%zu, sinfo_flags:0x%x\n", __func__, + msg_len, sinfo_flags); /* SCTP_EOF or SCTP_ABORT cannot be set on a TCP-style socket. */ if (sctp_style(sk, TCP) && (sinfo_flags & (SCTP_EOF | SCTP_ABORT))) { @@ -1679,7 +1667,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, transport = NULL; - SCTP_DEBUG_PRINTK("About to look up association.\n"); + pr_debug("%s: about to look up association\n", __func__); sctp_lock_sock(sk); @@ -1709,7 +1697,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, } if (asoc) { - SCTP_DEBUG_PRINTK("Just looked up association: %p.\n", asoc); + pr_debug("%s: just looked up association:%p\n", __func__, asoc); /* We cannot send a message on a TCP-style SCTP_SS_ESTABLISHED * socket that has an association in CLOSED state. This can @@ -1722,8 +1710,9 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, } if (sinfo_flags & SCTP_EOF) { - SCTP_DEBUG_PRINTK("Shutting down association: %p\n", - asoc); + pr_debug("%s: shutting down association:%p\n", + __func__, asoc); + sctp_primitive_SHUTDOWN(net, asoc, NULL); err = 0; goto out_unlock; @@ -1736,7 +1725,9 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, goto out_unlock; } - SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc); + pr_debug("%s: aborting association:%p\n", + __func__, asoc); + sctp_primitive_ABORT(net, asoc, chunk); err = 0; goto out_unlock; @@ -1745,7 +1736,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, /* Do we need to create the association? */ if (!asoc) { - SCTP_DEBUG_PRINTK("There is no association yet.\n"); + pr_debug("%s: there is no association yet\n", __func__); if (sinfo_flags & (SCTP_EOF | SCTP_ABORT)) { err = -EINVAL; @@ -1844,7 +1835,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, } /* ASSERT: we have a valid association at this point. */ - SCTP_DEBUG_PRINTK("We have a valid association.\n"); + pr_debug("%s: we have a valid association\n", __func__); if (!sinfo) { /* If the user didn't specify SNDRCVINFO, make up one with @@ -1913,7 +1904,8 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, err = sctp_primitive_ASSOCIATE(net, asoc, NULL); if (err < 0) goto out_free; - SCTP_DEBUG_PRINTK("We associated primitively.\n"); + + pr_debug("%s: we associated primitively\n", __func__); } /* Break the message into multiple chunks of maximum size. */ @@ -1940,17 +1932,15 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, */ err = sctp_primitive_SEND(net, asoc, datamsg); /* Did the lower layer accept the chunk? */ - if (err) + if (err) { sctp_datamsg_free(datamsg); - else - sctp_datamsg_put(datamsg); + goto out_free; + } - SCTP_DEBUG_PRINTK("We sent primitively.\n"); + pr_debug("%s: we sent primitively\n", __func__); - if (err) - goto out_free; - else - err = msg_len; + sctp_datamsg_put(datamsg); + err = msg_len; /* If we are already past ASSOCIATE, the lower * layers are responsible for association cleanup. @@ -2034,9 +2024,9 @@ static int sctp_skb_pull(struct sk_buff *skb, int len) */ static struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); -SCTP_STATIC int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, int noblock, - int flags, int *addr_len) +static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) { struct sctp_ulpevent *event = NULL; struct sctp_sock *sp = sctp_sk(sk); @@ -2045,10 +2035,9 @@ SCTP_STATIC int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, int err = 0; int skb_len; - SCTP_DEBUG_PRINTK("sctp_recvmsg(%s: %p, %s: %p, %s: %zd, %s: %d, %s: " - "0x%x, %s: %p)\n", "sk", sk, "msghdr", msg, - "len", len, "knoblauch", noblock, - "flags", flags, "addr_len", addr_len); + pr_debug("%s: sk:%p, msghdr:%p, len:%zd, noblock:%d, flags:0x%x, " + "addr_len:%p)\n", __func__, sk, msg, len, noblock, flags, + addr_len); sctp_lock_sock(sk); @@ -2915,13 +2904,8 @@ static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, unsig asoc->max_retrans = assocparams.sasoc_asocmaxrxt; } - if (assocparams.sasoc_cookie_life != 0) { - asoc->cookie_life.tv_sec = - assocparams.sasoc_cookie_life / 1000; - asoc->cookie_life.tv_usec = - (assocparams.sasoc_cookie_life % 1000) - * 1000; - } + if (assocparams.sasoc_cookie_life != 0) + asoc->cookie_life = ms_to_ktime(assocparams.sasoc_cookie_life); } else { /* Set the values to the endpoint */ struct sctp_sock *sp = sctp_sk(sk); @@ -3095,7 +3079,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva err = sctp_send_asconf(asoc, chunk); - SCTP_DEBUG_PRINTK("We set peer primary addr primitively.\n"); + pr_debug("%s: we set peer primary addr primitively\n", __func__); return err; } @@ -3565,13 +3549,12 @@ static int sctp_setsockopt_paddr_thresholds(struct sock *sk, * optval - the buffer to store the value of the option. * optlen - the size of the buffer. */ -SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, unsigned int optlen) +static int sctp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, unsigned int optlen) { int retval = 0; - SCTP_DEBUG_PRINTK("sctp_setsockopt(sk: %p... optname: %d)\n", - sk, optname); + pr_debug("%s: sk:%p, optname:%d\n", __func__, sk, optname); /* I can hardly begin to describe how wrong this is. This is * so broken as to be worse than useless. The API draft @@ -3725,16 +3708,16 @@ out_nounlock: * * len: the size of the address. */ -SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *addr, - int addr_len) +static int sctp_connect(struct sock *sk, struct sockaddr *addr, + int addr_len) { int err = 0; struct sctp_af *af; sctp_lock_sock(sk); - SCTP_DEBUG_PRINTK("%s - sk: %p, sockaddr: %p, addr_len: %d\n", - __func__, sk, addr, addr_len); + pr_debug("%s: sk:%p, sockaddr:%p, addr_len:%d\n", __func__, sk, + addr, addr_len); /* Validate addr_len before calling common connect/connectx routine. */ af = sctp_get_af_specific(addr->sa_family); @@ -3752,7 +3735,7 @@ SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *addr, } /* FIXME: Write comments. */ -SCTP_STATIC int sctp_disconnect(struct sock *sk, int flags) +static int sctp_disconnect(struct sock *sk, int flags) { return -EOPNOTSUPP; /* STUB */ } @@ -3764,7 +3747,7 @@ SCTP_STATIC int sctp_disconnect(struct sock *sk, int flags) * descriptor will be returned from accept() to represent the newly * formed association. */ -SCTP_STATIC struct sock *sctp_accept(struct sock *sk, int flags, int *err) +static struct sock *sctp_accept(struct sock *sk, int flags, int *err) { struct sctp_sock *sp; struct sctp_endpoint *ep; @@ -3817,7 +3800,7 @@ out: } /* The SCTP ioctl handler. */ -SCTP_STATIC int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg) +static int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg) { int rc = -ENOTCONN; @@ -3859,13 +3842,12 @@ out: * initialized the SCTP-specific portion of the sock. * The sock structure should already be zero-filled memory. */ -SCTP_STATIC int sctp_init_sock(struct sock *sk) +static int sctp_init_sock(struct sock *sk) { struct net *net = sock_net(sk); - struct sctp_endpoint *ep; struct sctp_sock *sp; - SCTP_DEBUG_PRINTK("sctp_init_sock(sk: %p)\n", sk); + pr_debug("%s: sk:%p\n", __func__, sk); sp = sctp_sk(sk); @@ -3971,13 +3953,14 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) * change the data structure relationships, this may still * be useful for storing pre-connect address information. */ - ep = sctp_endpoint_new(sk, GFP_KERNEL); - if (!ep) + sp->ep = sctp_endpoint_new(sk, GFP_KERNEL); + if (!sp->ep) return -ENOMEM; - sp->ep = ep; sp->hmac = NULL; + sk->sk_destruct = sctp_destruct_sock; + SCTP_DBG_OBJCNT_INC(sock); local_bh_disable(); @@ -3995,11 +3978,11 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) } /* Cleanup any SCTP per socket resources. */ -SCTP_STATIC void sctp_destroy_sock(struct sock *sk) +static void sctp_destroy_sock(struct sock *sk) { struct sctp_sock *sp; - SCTP_DEBUG_PRINTK("sctp_destroy_sock(sk: %p)\n", sk); + pr_debug("%s: sk:%p\n", __func__, sk); /* Release our hold on the endpoint. */ sp = sctp_sk(sk); @@ -4020,6 +4003,17 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk) local_bh_enable(); } +/* Triggered when there are no references on the socket anymore */ +static void sctp_destruct_sock(struct sock *sk) +{ + struct sctp_sock *sp = sctp_sk(sk); + + /* Free up the HMAC transform. */ + crypto_free_hash(sp->hmac); + + inet_sock_destruct(sk); +} + /* API 4.1.7 shutdown() - TCP Style Syntax * int shutdown(int socket, int how); * @@ -4036,7 +4030,7 @@ SCTP_STATIC void sctp_destroy_sock(struct sock *sk) * Disables further send and receive operations * and initiates the SCTP shutdown sequence. */ -SCTP_STATIC void sctp_shutdown(struct sock *sk, int how) +static void sctp_shutdown(struct sock *sk, int how) { struct net *net = sock_net(sk); struct sctp_endpoint *ep; @@ -4121,9 +4115,9 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, goto out; } - SCTP_DEBUG_PRINTK("sctp_getsockopt_sctp_status(%d): %d %d %d\n", - len, status.sstat_state, status.sstat_rwnd, - status.sstat_assoc_id); + pr_debug("%s: len:%d, state:%d, rwnd:%d, assoc_id:%d\n", + __func__, len, status.sstat_state, status.sstat_rwnd, + status.sstat_assoc_id); if (copy_to_user(optval, &status, len)) { retval = -EFAULT; @@ -4318,7 +4312,7 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval goto out; /* Map the socket to an unused fd that can be returned to the user. */ - retval = get_unused_fd(); + retval = get_unused_fd_flags(0); if (retval < 0) { sock_release(newsock); goto out; @@ -4331,8 +4325,8 @@ static int sctp_getsockopt_peeloff(struct sock *sk, int len, char __user *optval return PTR_ERR(newfile); } - SCTP_DEBUG_PRINTK("%s: sk: %p newsk: %p sd: %d\n", - __func__, sk, newsock->sk, retval); + pr_debug("%s: sk:%p, newsk:%p, sd:%d\n", __func__, sk, newsock->sk, + retval); /* Return the fd mapped to the new socket. */ if (put_user(len, optlen)) { @@ -4465,7 +4459,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, trans = sctp_addr_id2transport(sk, ¶ms.spp_address, params.spp_assoc_id); if (!trans) { - SCTP_DEBUG_PRINTK("Failed no transport\n"); + pr_debug("%s: failed no transport\n", __func__); return -EINVAL; } } @@ -4476,7 +4470,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, */ asoc = sctp_id2assoc(sk, params.spp_assoc_id); if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) { - SCTP_DEBUG_PRINTK("Failed no association\n"); + pr_debug("%s: failed no association\n", __func__); return -EINVAL; } @@ -5081,10 +5075,7 @@ static int sctp_getsockopt_associnfo(struct sock *sk, int len, assocparams.sasoc_asocmaxrxt = asoc->max_retrans; assocparams.sasoc_peer_rwnd = asoc->peer.rwnd; assocparams.sasoc_local_rwnd = asoc->a_rwnd; - assocparams.sasoc_cookie_life = (asoc->cookie_life.tv_sec - * 1000) + - (asoc->cookie_life.tv_usec - / 1000); + assocparams.sasoc_cookie_life = ktime_to_ms(asoc->cookie_life); list_for_each(pos, &asoc->peer.transport_addr_list) { cnt ++; @@ -5699,8 +5690,7 @@ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len, if (put_user(len, optlen)) return -EFAULT; - SCTP_DEBUG_PRINTK("sctp_getsockopt_assoc_stat(%d): %d\n", - len, sas.sas_assoc_id); + pr_debug("%s: len:%d, assoc_id:%d\n", __func__, len, sas.sas_assoc_id); if (copy_to_user(optval, &sas, len)) return -EFAULT; @@ -5708,14 +5698,13 @@ static int sctp_getsockopt_assoc_stats(struct sock *sk, int len, return 0; } -SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) +static int sctp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) { int retval = 0; int len; - SCTP_DEBUG_PRINTK("sctp_getsockopt(sk: %p... optname: %d)\n", - sk, optname); + pr_debug("%s: sk:%p, optname:%d\n", __func__, sk, optname); /* I can hardly begin to describe how wrong this is. This is * so broken as to be worse than useless. The API draft @@ -5895,7 +5884,8 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr) snum = ntohs(addr->v4.sin_port); - SCTP_DEBUG_PRINTK("sctp_get_port() begins, snum=%d\n", snum); + pr_debug("%s: begins, snum:%d\n", __func__, snum); + sctp_local_bh_disable(); if (snum == 0) { @@ -5961,7 +5951,8 @@ pp_found: int reuse = sk->sk_reuse; struct sock *sk2; - SCTP_DEBUG_PRINTK("sctp_get_port() found a possible match\n"); + pr_debug("%s: found a possible match\n", __func__); + if (pp->fastreuse && sk->sk_reuse && sk->sk_state != SCTP_SS_LISTENING) goto success; @@ -5991,7 +5982,8 @@ pp_found: goto fail_unlock; } } - SCTP_DEBUG_PRINTK("sctp_get_port(): Found a match\n"); + + pr_debug("%s: found a match\n", __func__); } pp_not_found: /* If there was a hash table miss, create a new port. */ @@ -6037,7 +6029,6 @@ fail: */ static int sctp_get_port(struct sock *sk, unsigned short snum) { - long ret; union sctp_addr addr; struct sctp_af *af = sctp_sk(sk)->pf->af; @@ -6046,15 +6037,13 @@ static int sctp_get_port(struct sock *sk, unsigned short snum) addr.v4.sin_port = htons(snum); /* Note: sk->sk_num gets filled in if ephemeral port request. */ - ret = sctp_get_port_local(sk, &addr); - - return ret ? 1 : 0; + return !!sctp_get_port_local(sk, &addr); } /* * Move a socket to LISTENING state. */ -SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog) +static int sctp_listen_start(struct sock *sk, int backlog) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_endpoint *ep = sp->ep; @@ -6341,8 +6330,7 @@ static int sctp_autobind(struct sock *sk) * msg_control * points here */ -SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *msg, - sctp_cmsgs_t *cmsgs) +static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) { struct cmsghdr *cmsg; struct msghdr *my_msg = (struct msghdr *)msg; @@ -6484,8 +6472,8 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, timeo = sock_rcvtimeo(sk, noblock); - SCTP_DEBUG_PRINTK("Timeout: timeo: %ld, MAX: %ld.\n", - timeo, MAX_SCHEDULE_TIMEOUT); + pr_debug("%s: timeo:%ld, max:%ld\n", __func__, timeo, + MAX_SCHEDULE_TIMEOUT); do { /* Again only user level code calls this function, @@ -6616,8 +6604,8 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, long current_timeo = *timeo_p; DEFINE_WAIT(wait); - SCTP_DEBUG_PRINTK("wait_for_sndbuf: asoc=%p, timeo=%ld, msg_len=%zu\n", - asoc, (long)(*timeo_p), msg_len); + pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc, + *timeo_p, msg_len); /* Increment the association's refcnt. */ sctp_association_hold(asoc); @@ -6723,8 +6711,7 @@ static int sctp_wait_for_connect(struct sctp_association *asoc, long *timeo_p) long current_timeo = *timeo_p; DEFINE_WAIT(wait); - SCTP_DEBUG_PRINTK("%s: asoc=%p, timeo=%ld\n", __func__, asoc, - (long)(*timeo_p)); + pr_debug("%s: asoc:%p, timeo:%ld\n", __func__, asoc, *timeo_p); /* Increment the association's refcnt. */ sctp_association_hold(asoc); @@ -6864,7 +6851,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newsk->sk_reuse = sk->sk_reuse; newsk->sk_shutdown = sk->sk_shutdown; - newsk->sk_destruct = inet_sock_destruct; + newsk->sk_destruct = sctp_destruct_sock; newsk->sk_family = sk->sk_family; newsk->sk_protocol = IPPROTO_SCTP; newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index bf3c6e8fc401..9a5c4c9eddaf 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -62,12 +62,12 @@ extern long sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; extern int sysctl_sctp_wmem[3]; -static int proc_sctp_do_hmac_alg(ctl_table *ctl, +static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -static ctl_table sctp_table[] = { +static struct ctl_table sctp_table[] = { { .procname = "sctp_mem", .data = &sysctl_sctp_mem, @@ -93,7 +93,7 @@ static ctl_table sctp_table[] = { { /* sentinel */ } }; -static ctl_table sctp_net_table[] = { +static struct ctl_table sctp_net_table[] = { { .procname = "rto_initial", .data = &init_net.sctp.rto_initial, @@ -300,14 +300,14 @@ static ctl_table sctp_net_table[] = { { /* sentinel */ } }; -static int proc_sctp_do_hmac_alg(ctl_table *ctl, +static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; char tmp[8]; - ctl_table tbl; + struct ctl_table tbl; int ret; int changed = 0; char *none = "none"; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 098f1d5f769e..bdbbc3fd7c14 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -116,7 +116,7 @@ struct sctp_transport *sctp_transport_new(struct net *net, { struct sctp_transport *transport; - transport = t_new(struct sctp_transport, gfp); + transport = kzalloc(sizeof(*transport), gfp); if (!transport) goto fail; @@ -176,7 +176,10 @@ static void sctp_transport_destroy_rcu(struct rcu_head *head) */ static void sctp_transport_destroy(struct sctp_transport *transport) { - SCTP_ASSERT(transport->dead, "Transport is not dead", return); + if (unlikely(!transport->dead)) { + WARN(1, "Attempt to destroy undead transport %p!\n", transport); + return; + } call_rcu(&transport->rcu, sctp_transport_destroy_rcu); @@ -317,11 +320,9 @@ void sctp_transport_put(struct sctp_transport *transport) /* Update transport's RTO based on the newly calculated RTT. */ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) { - /* Check for valid transport. */ - SCTP_ASSERT(tp, "NULL transport", return); - - /* We should not be doing any RTO updates unless rto_pending is set. */ - SCTP_ASSERT(tp->rto_pending, "rto_pending not set", return); + if (unlikely(!tp->rto_pending)) + /* We should not be doing any RTO updates unless rto_pending is set. */ + pr_debug("%s: rto_pending not set on transport %p!\n", __func__, tp); if (tp->rttvar || tp->srtt) { struct net *net = sock_net(tp->asoc->base.sk); @@ -377,9 +378,8 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) */ tp->rto_pending = 0; - SCTP_DEBUG_PRINTK("%s: transport: %p, rtt: %d, srtt: %d " - "rttvar: %d, rto: %ld\n", __func__, - tp, rtt, tp->srtt, tp->rttvar, tp->rto); + pr_debug("%s: transport:%p, rtt:%d, srtt:%d rttvar:%d, rto:%ld\n", + __func__, tp, rtt, tp->srtt, tp->rttvar, tp->rto); } /* This routine updates the transport's cwnd and partial_bytes_acked @@ -433,12 +433,11 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, cwnd += pmtu; else cwnd += bytes_acked; - SCTP_DEBUG_PRINTK("%s: SLOW START: transport: %p, " - "bytes_acked: %d, cwnd: %d, ssthresh: %d, " - "flight_size: %d, pba: %d\n", - __func__, - transport, bytes_acked, cwnd, - ssthresh, flight_size, pba); + + pr_debug("%s: slow start: transport:%p, bytes_acked:%d, " + "cwnd:%d, ssthresh:%d, flight_size:%d, pba:%d\n", + __func__, transport, bytes_acked, cwnd, ssthresh, + flight_size, pba); } else { /* RFC 2960 7.2.2 Whenever cwnd is greater than ssthresh, * upon each SACK arrival that advances the Cumulative TSN Ack @@ -459,12 +458,12 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, cwnd += pmtu; pba = ((cwnd < pba) ? (pba - cwnd) : 0); } - SCTP_DEBUG_PRINTK("%s: CONGESTION AVOIDANCE: " - "transport: %p, bytes_acked: %d, cwnd: %d, " - "ssthresh: %d, flight_size: %d, pba: %d\n", - __func__, - transport, bytes_acked, cwnd, - ssthresh, flight_size, pba); + + pr_debug("%s: congestion avoidance: transport:%p, " + "bytes_acked:%d, cwnd:%d, ssthresh:%d, " + "flight_size:%d, pba:%d\n", __func__, + transport, bytes_acked, cwnd, ssthresh, + flight_size, pba); } transport->cwnd = cwnd; @@ -558,10 +557,10 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, } transport->partial_bytes_acked = 0; - SCTP_DEBUG_PRINTK("%s: transport: %p reason: %d cwnd: " - "%d ssthresh: %d\n", __func__, - transport, reason, - transport->cwnd, transport->ssthresh); + + pr_debug("%s: transport:%p, reason:%d, cwnd:%d, ssthresh:%d\n", + __func__, transport, reason, transport->cwnd, + transport->ssthresh); } /* Apply Max.Burst limit to the congestion window: diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index 396c45174e5b..b46019568a86 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -161,8 +161,8 @@ int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn, /* Initialize a Gap Ack Block iterator from memory being provided. */ -SCTP_STATIC void sctp_tsnmap_iter_init(const struct sctp_tsnmap *map, - struct sctp_tsnmap_iter *iter) +static void sctp_tsnmap_iter_init(const struct sctp_tsnmap *map, + struct sctp_tsnmap_iter *iter) { /* Only start looking one past the Cumulative TSN Ack Point. */ iter->start = map->cumulative_tsn_ack_point + 1; @@ -171,9 +171,9 @@ SCTP_STATIC void sctp_tsnmap_iter_init(const struct sctp_tsnmap *map, /* Get the next Gap Ack Blocks. Returns 0 if there was not another block * to get. */ -SCTP_STATIC int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map, - struct sctp_tsnmap_iter *iter, - __u16 *start, __u16 *end) +static int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map, + struct sctp_tsnmap_iter *iter, + __u16 *start, __u16 *end) { int ended = 0; __u16 start_ = 0, end_ = 0, offset; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 10c018a5b9fe..44a45dbee4df 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -57,9 +57,9 @@ static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event); /* Initialize an ULP event from an given skb. */ -SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event, - int msg_flags, - unsigned int len) +static void sctp_ulpevent_init(struct sctp_ulpevent *event, + int msg_flags, + unsigned int len) { memset(event, 0, sizeof(struct sctp_ulpevent)); event->msg_flags = msg_flags; @@ -67,8 +67,8 @@ SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event, } /* Create a new sctp_ulpevent. */ -SCTP_STATIC struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags, - gfp_t gfp) +static struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags, + gfp_t gfp) { struct sctp_ulpevent *event; struct sk_buff *skb; diff --git a/net/socket.c b/net/socket.c index 4ca1526db756..b2d7c629eeb9 100644 --- a/net/socket.c +++ b/net/socket.c @@ -104,6 +104,12 @@ #include <linux/route.h> #include <linux/sockios.h> #include <linux/atalk.h> +#include <net/busy_poll.h> + +#ifdef CONFIG_NET_RX_BUSY_POLL +unsigned int sysctl_net_busy_read __read_mostly; +unsigned int sysctl_net_busy_poll __read_mostly; +#endif static int sock_no_open(struct inode *irrelevant, struct file *dontcare); static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, @@ -1142,13 +1148,24 @@ EXPORT_SYMBOL(sock_create_lite); /* No kernel lock held - perfect */ static unsigned int sock_poll(struct file *file, poll_table *wait) { + unsigned int busy_flag = 0; struct socket *sock; /* * We can't return errors to poll, so it's either yes or no. */ sock = file->private_data; - return sock->ops->poll(file, sock, wait); + + if (sk_can_busy_loop(sock->sk)) { + /* this socket can poll_ll so tell the system call */ + busy_flag = POLL_BUSY_LOOP; + + /* once, only if requested by syscall */ + if (wait && (wait->_key & POLL_BUSY_LOOP)) + sk_busy_loop(sock->sk, 1); + } + + return busy_flag | sock->ops->poll(file, sock, wait); } static int sock_mmap(struct file *file, struct vm_area_struct *vma) @@ -2635,7 +2652,9 @@ static int __init sock_init(void) */ #ifdef CONFIG_NETFILTER - netfilter_init(); + err = netfilter_init(); + if (err) + goto out; #endif #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index defa9d33925c..27ce26240932 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -139,11 +139,12 @@ void gss_mech_unregister(struct gss_api_mech *gm) } EXPORT_SYMBOL_GPL(gss_mech_unregister); -static struct gss_api_mech *gss_mech_get(struct gss_api_mech *gm) +struct gss_api_mech *gss_mech_get(struct gss_api_mech *gm) { __module_get(gm->gm_owner); return gm; } +EXPORT_SYMBOL(gss_mech_get); static struct gss_api_mech * _gss_mech_get_by_name(const char *name) @@ -360,6 +361,7 @@ gss_pseudoflavor_to_service(struct gss_api_mech *gm, u32 pseudoflavor) } return 0; } +EXPORT_SYMBOL(gss_pseudoflavor_to_service); char * gss_service_to_auth_domain_name(struct gss_api_mech *gm, u32 service) @@ -379,6 +381,7 @@ gss_mech_put(struct gss_api_mech * gm) if (gm) module_put(gm->gm_owner); } +EXPORT_SYMBOL(gss_mech_put); /* The mech could probably be determined from the token instead, but it's just * as easy for now to pass it in. */ diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index d304f41260f2..af7ffd447fee 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -120,7 +120,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt) if (IS_ERR(clnt)) { dprintk("RPC: failed to create AF_LOCAL gssproxy " "client (errno %ld).\n", PTR_ERR(clnt)); - result = -PTR_ERR(clnt); + result = PTR_ERR(clnt); *_clnt = NULL; goto out; } @@ -328,7 +328,6 @@ void gssp_free_upcall_data(struct gssp_upcall_data *data) kfree(data->in_handle.data); kfree(data->out_handle.data); kfree(data->out_token.data); - kfree(data->mech_oid.data); free_svc_cred(&data->creds); } diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index 357f613df7ff..3c85d1c8a028 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -430,7 +430,7 @@ static int dummy_enc_nameattr_array(struct xdr_stream *xdr, static int dummy_dec_nameattr_array(struct xdr_stream *xdr, struct gssx_name_attr_array *naa) { - struct gssx_name_attr dummy; + struct gssx_name_attr dummy = { .attr = {.len = 0} }; u32 count, i; __be32 *p; @@ -493,12 +493,13 @@ static int gssx_enc_name(struct xdr_stream *xdr, return err; } + static int gssx_dec_name(struct xdr_stream *xdr, struct gssx_name *name) { - struct xdr_netobj dummy_netobj; - struct gssx_name_attr_array dummy_name_attr_array; - struct gssx_option_array dummy_option_array; + struct xdr_netobj dummy_netobj = { .len = 0 }; + struct gssx_name_attr_array dummy_name_attr_array = { .count = 0 }; + struct gssx_option_array dummy_option_array = { .count = 0 }; int err; /* name->display_name */ diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index b05ace4c5f12..09fb638bcaa4 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -377,8 +377,7 @@ rsc_init(struct cache_head *cnew, struct cache_head *ctmp) new->handle.data = tmp->handle.data; tmp->handle.data = NULL; new->mechctx = NULL; - new->cred.cr_group_info = NULL; - new->cred.cr_principal = NULL; + init_svc_cred(&new->cred); } static void @@ -392,9 +391,7 @@ update_rsc(struct cache_head *cnew, struct cache_head *ctmp) memset(&new->seqdata, 0, sizeof(new->seqdata)); spin_lock_init(&new->seqdata.sd_lock); new->cred = tmp->cred; - tmp->cred.cr_group_info = NULL; - new->cred.cr_principal = tmp->cred.cr_principal; - tmp->cred.cr_principal = NULL; + init_svc_cred(&tmp->cred); } static struct cache_head * @@ -487,7 +484,7 @@ static int rsc_parse(struct cache_detail *cd, len = qword_get(&mesg, buf, mlen); if (len < 0) goto out; - gm = gss_mech_get_by_name(buf); + gm = rsci.cred.cr_gss_mech = gss_mech_get_by_name(buf); status = -EOPNOTSUPP; if (!gm) goto out; @@ -517,7 +514,6 @@ static int rsc_parse(struct cache_detail *cd, rscp = rsc_update(cd, &rsci, rscp); status = 0; out: - gss_mech_put(gm); rsc_free(&rsci); if (rscp) cache_put(&rscp->h, cd); @@ -1184,6 +1180,7 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, gm = gss_mech_get_by_OID(&ud->mech_oid); if (!gm) goto out; + rsci.cred.cr_gss_mech = gm; status = -EINVAL; /* mech-specific data: */ @@ -1199,7 +1196,6 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, rscp = rsc_update(cd, &rsci, rscp); status = 0; out: - gss_mech_put(gm); rsc_free(&rsci); if (rscp) cache_put(&rscp->h, cd); diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 80fe5c86efd1..a72de074172d 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -50,12 +50,6 @@ static void cache_init(struct cache_head *h) h->last_refresh = now; } -static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) -{ - return (h->expiry_time < seconds_since_boot()) || - (detail->flush_time > h->last_refresh); -} - struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, struct cache_head *key, int hash) { @@ -201,7 +195,7 @@ static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h) return sunrpc_cache_pipe_upcall(cd, h); } -static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h) +static inline int cache_is_valid(struct cache_head *h) { if (!test_bit(CACHE_VALID, &h->flags)) return -EAGAIN; @@ -227,16 +221,15 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h int rv; write_lock(&detail->hash_lock); - rv = cache_is_valid(detail, h); - if (rv != -EAGAIN) { - write_unlock(&detail->hash_lock); - return rv; + rv = cache_is_valid(h); + if (rv == -EAGAIN) { + set_bit(CACHE_NEGATIVE, &h->flags); + cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY); + rv = -ENOENT; } - set_bit(CACHE_NEGATIVE, &h->flags); - cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY); write_unlock(&detail->hash_lock); cache_fresh_unlocked(h, detail); - return -ENOENT; + return rv; } /* @@ -260,7 +253,7 @@ int cache_check(struct cache_detail *detail, long refresh_age, age; /* First decide return status as best we can */ - rv = cache_is_valid(detail, h); + rv = cache_is_valid(h); /* now see if we want to start an upcall */ refresh_age = (h->expiry_time - h->last_refresh); @@ -269,19 +262,17 @@ int cache_check(struct cache_detail *detail, if (rqstp == NULL) { if (rv == -EAGAIN) rv = -ENOENT; - } else if (rv == -EAGAIN || age > refresh_age/2) { + } else if (rv == -EAGAIN || + (h->expiry_time != 0 && age > refresh_age/2)) { dprintk("RPC: Want update, refage=%ld, age=%ld\n", refresh_age, age); if (!test_and_set_bit(CACHE_PENDING, &h->flags)) { switch (cache_make_upcall(detail, h)) { case -EINVAL: - clear_bit(CACHE_PENDING, &h->flags); - cache_revisit_request(h); rv = try_to_negate_entry(detail, h); break; case -EAGAIN: - clear_bit(CACHE_PENDING, &h->flags); - cache_revisit_request(h); + cache_fresh_unlocked(h, detail); break; } } @@ -293,7 +284,7 @@ int cache_check(struct cache_detail *detail, * Request was not deferred; handle it as best * we can ourselves: */ - rv = cache_is_valid(detail, h); + rv = cache_is_valid(h); if (rv == -EAGAIN) rv = -ETIMEDOUT; } @@ -310,7 +301,7 @@ EXPORT_SYMBOL_GPL(cache_check); * a current pointer into that list and into the table * for that entry. * - * Each time clean_cache is called it finds the next non-empty entry + * Each time cache_clean is called it finds the next non-empty entry * in the current table and walks the list in that entry * looking for entries that can be removed. * @@ -457,9 +448,8 @@ static int cache_clean(void) current_index ++; spin_unlock(&cache_list_lock); if (ch) { - if (test_and_clear_bit(CACHE_PENDING, &ch->flags)) - cache_dequeue(current_detail, ch); - cache_revisit_request(ch); + set_bit(CACHE_CLEANED, &ch->flags); + cache_fresh_unlocked(ch, d); cache_put(ch, d); } } else @@ -1036,23 +1026,32 @@ static int cache_release(struct inode *inode, struct file *filp, static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch) { - struct cache_queue *cq; + struct cache_queue *cq, *tmp; + struct cache_request *cr; + struct list_head dequeued; + + INIT_LIST_HEAD(&dequeued); spin_lock(&queue_lock); - list_for_each_entry(cq, &detail->queue, list) + list_for_each_entry_safe(cq, tmp, &detail->queue, list) if (!cq->reader) { - struct cache_request *cr = container_of(cq, struct cache_request, q); + cr = container_of(cq, struct cache_request, q); if (cr->item != ch) continue; + if (test_bit(CACHE_PENDING, &ch->flags)) + /* Lost a race and it is pending again */ + break; if (cr->readers != 0) continue; - list_del(&cr->q.list); - spin_unlock(&queue_lock); - cache_put(cr->item, detail); - kfree(cr->buf); - kfree(cr); - return; + list_move(&cr->q.list, &dequeued); } spin_unlock(&queue_lock); + while (!list_empty(&dequeued)) { + cr = list_entry(dequeued.next, struct cache_request, q.list); + list_del(&cr->q.list); + cache_put(cr->item, detail); + kfree(cr->buf); + kfree(cr); + } } /* @@ -1166,6 +1165,7 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) char *buf; struct cache_request *crq; + int ret = 0; if (!detail->cache_request) return -EINVAL; @@ -1174,6 +1174,9 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) warn_no_listener(detail); return -EINVAL; } + if (test_bit(CACHE_CLEANED, &h->flags)) + /* Too late to make an upcall */ + return -EAGAIN; buf = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!buf) @@ -1191,10 +1194,18 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) crq->len = 0; crq->readers = 0; spin_lock(&queue_lock); - list_add_tail(&crq->q.list, &detail->queue); + if (test_bit(CACHE_PENDING, &h->flags)) + list_add_tail(&crq->q.list, &detail->queue); + else + /* Lost a race, no longer PENDING, so don't enqueue */ + ret = -EAGAIN; spin_unlock(&queue_lock); wake_up(&queue_wait); - return 0; + if (ret == -EAGAIN) { + kfree(buf); + kfree(crq); + } + return ret; } EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall); @@ -1812,19 +1823,11 @@ int sunrpc_cache_register_pipefs(struct dentry *parent, const char *name, umode_t umode, struct cache_detail *cd) { - struct qstr q; - struct dentry *dir; - int ret = 0; - - q.name = name; - q.len = strlen(name); - q.hash = full_name_hash(q.name, q.len); - dir = rpc_create_cache_dir(parent, &q, umode, cd); - if (!IS_ERR(dir)) - cd->u.pipefs.dir = dir; - else - ret = PTR_ERR(dir); - return ret; + struct dentry *dir = rpc_create_cache_dir(parent, name, umode, cd); + if (IS_ERR(dir)) + return PTR_ERR(dir); + cd->u.pipefs.dir = dir; + return 0; } EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 5a750b9c3640..74f6a704e374 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -128,9 +128,7 @@ static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb, { static uint32_t clntid; char name[15]; - struct qstr q = { .name = name }; struct dentry *dir, *dentry; - int error; dir = rpc_d_lookup_sb(sb, dir_name); if (dir == NULL) { @@ -138,39 +136,32 @@ static struct dentry *rpc_setup_pipedir_sb(struct super_block *sb, return dir; } for (;;) { - q.len = snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++); + snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++); name[sizeof(name) - 1] = '\0'; - q.hash = full_name_hash(q.name, q.len); - dentry = rpc_create_client_dir(dir, &q, clnt); + dentry = rpc_create_client_dir(dir, name, clnt); if (!IS_ERR(dentry)) break; - error = PTR_ERR(dentry); - if (error != -EEXIST) { - printk(KERN_INFO "RPC: Couldn't create pipefs entry" - " %s/%s, error %d\n", - dir_name, name, error); - break; - } + if (dentry == ERR_PTR(-EEXIST)) + continue; + printk(KERN_INFO "RPC: Couldn't create pipefs entry" + " %s/%s, error %ld\n", + dir_name, name, PTR_ERR(dentry)); + break; } dput(dir); return dentry; } static int -rpc_setup_pipedir(struct rpc_clnt *clnt, const char *dir_name) +rpc_setup_pipedir(struct rpc_clnt *clnt, const char *dir_name, + struct super_block *pipefs_sb) { - struct net *net = rpc_net_ns(clnt); - struct super_block *pipefs_sb; struct dentry *dentry; clnt->cl_dentry = NULL; if (dir_name == NULL) return 0; - pipefs_sb = rpc_get_sb_net(net); - if (!pipefs_sb) - return 0; dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt, dir_name); - rpc_put_sb_net(net); if (IS_ERR(dentry)) return PTR_ERR(dentry); clnt->cl_dentry = dentry; @@ -182,6 +173,8 @@ static inline int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event if (((event == RPC_PIPEFS_MOUNT) && clnt->cl_dentry) || ((event == RPC_PIPEFS_UMOUNT) && !clnt->cl_dentry)) return 1; + if ((event == RPC_PIPEFS_MOUNT) && atomic_read(&clnt->cl_count) == 0) + return 1; return 0; } @@ -241,8 +234,6 @@ static struct rpc_clnt *rpc_get_client_for_event(struct net *net, int event) continue; if (rpc_clnt_skip_event(clnt, event)) continue; - if (atomic_inc_not_zero(&clnt->cl_count) == 0) - continue; spin_unlock(&sn->rpc_client_lock); return clnt; } @@ -259,7 +250,6 @@ static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, while ((clnt = rpc_get_client_for_event(sb->s_fs_info, event))) { error = __rpc_pipefs_event(clnt, event, sb); - rpc_release_client(clnt); if (error) break; } @@ -289,12 +279,49 @@ static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename) memcpy(clnt->cl_nodename, nodename, clnt->cl_nodelen); } +static int rpc_client_register(const struct rpc_create_args *args, + struct rpc_clnt *clnt) +{ + const struct rpc_program *program = args->program; + struct rpc_auth *auth; + struct net *net = rpc_net_ns(clnt); + struct super_block *pipefs_sb; + int err; + + pipefs_sb = rpc_get_sb_net(net); + if (pipefs_sb) { + err = rpc_setup_pipedir(clnt, program->pipe_dir_name, pipefs_sb); + if (err) + goto out; + } + + rpc_register_client(clnt); + if (pipefs_sb) + rpc_put_sb_net(net); + + auth = rpcauth_create(args->authflavor, clnt); + if (IS_ERR(auth)) { + dprintk("RPC: Couldn't create auth handle (flavor %u)\n", + args->authflavor); + err = PTR_ERR(auth); + goto err_auth; + } + return 0; +err_auth: + pipefs_sb = rpc_get_sb_net(net); + rpc_unregister_client(clnt); + __rpc_clnt_remove_pipedir(clnt); +out: + if (pipefs_sb) + rpc_put_sb_net(net); + return err; +} + static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt) { const struct rpc_program *program = args->program; const struct rpc_version *version; struct rpc_clnt *clnt = NULL; - struct rpc_auth *auth; int err; /* sanity check the name before trying to print it */ @@ -354,25 +381,14 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru atomic_set(&clnt->cl_count, 1); - err = rpc_setup_pipedir(clnt, program->pipe_dir_name); - if (err < 0) - goto out_no_path; - - auth = rpcauth_create(args->authflavor, clnt); - if (IS_ERR(auth)) { - dprintk("RPC: Couldn't create auth handle (flavor %u)\n", - args->authflavor); - err = PTR_ERR(auth); - goto out_no_auth; - } - /* save the nodename */ rpc_clnt_set_nodename(clnt, utsname()->nodename); - rpc_register_client(clnt); + + err = rpc_client_register(args, clnt); + if (err) + goto out_no_path; return clnt; -out_no_auth: - rpc_clnt_remove_pipedir(clnt); out_no_path: kfree(clnt->cl_principal); out_no_principal: @@ -637,8 +653,8 @@ rpc_free_client(struct rpc_clnt *clnt) rcu_dereference(clnt->cl_xprt)->servername); if (clnt->cl_parent != clnt) rpc_release_client(clnt->cl_parent); - rpc_unregister_client(clnt); rpc_clnt_remove_pipedir(clnt); + rpc_unregister_client(clnt); rpc_free_iostats(clnt->cl_metrics); kfree(clnt->cl_principal); clnt->cl_metrics = NULL; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index e7ce4b3eb0bd..406859cc68aa 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -480,6 +480,23 @@ static const struct dentry_operations rpc_dentry_operations = { .d_delete = rpc_delete_dentry, }; +/* + * Lookup the data. This is trivial - if the dentry didn't already + * exist, we know it is negative. + */ +static struct dentry * +rpc_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ + if (dentry->d_name.len > NAME_MAX) + return ERR_PTR(-ENAMETOOLONG); + d_add(dentry, NULL); + return NULL; +} + +static const struct inode_operations rpc_dir_inode_operations = { + .lookup = rpc_lookup, +}; + static struct inode * rpc_get_inode(struct super_block *sb, umode_t mode) { @@ -492,7 +509,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode) switch (mode & S_IFMT) { case S_IFDIR: inode->i_fop = &simple_dir_operations; - inode->i_op = &simple_dir_inode_operations; + inode->i_op = &rpc_dir_inode_operations; inc_nlink(inode); default: break; @@ -656,20 +673,17 @@ static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry) } static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, - struct qstr *name) + const char *name) { - struct dentry *dentry; - - dentry = d_lookup(parent, name); + struct qstr q = QSTR_INIT(name, strlen(name)); + struct dentry *dentry = d_hash_and_lookup(parent, &q); if (!dentry) { - dentry = d_alloc(parent, name); + dentry = d_alloc(parent, &q); if (!dentry) return ERR_PTR(-ENOMEM); } - if (dentry->d_inode == NULL) { - d_set_d_op(dentry, &rpc_dentry_operations); + if (dentry->d_inode == NULL) return dentry; - } dput(dentry); return ERR_PTR(-EEXIST); } @@ -689,8 +703,7 @@ static void __rpc_depopulate(struct dentry *parent, for (i = start; i < eof; i++) { name.name = files[i].name; name.len = strlen(files[i].name); - name.hash = full_name_hash(name.name, name.len); - dentry = d_lookup(parent, &name); + dentry = d_hash_and_lookup(parent, &name); if (dentry == NULL) continue; @@ -732,12 +745,7 @@ static int rpc_populate(struct dentry *parent, mutex_lock(&dir->i_mutex); for (i = start; i < eof; i++) { - struct qstr q; - - q.name = files[i].name; - q.len = strlen(files[i].name); - q.hash = full_name_hash(q.name, q.len); - dentry = __rpc_lookup_create_exclusive(parent, &q); + dentry = __rpc_lookup_create_exclusive(parent, files[i].name); err = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_bad; @@ -770,7 +778,7 @@ out_bad: } static struct dentry *rpc_mkdir_populate(struct dentry *parent, - struct qstr *name, umode_t mode, void *private, + const char *name, umode_t mode, void *private, int (*populate)(struct dentry *, void *), void *args_populate) { struct dentry *dentry; @@ -841,7 +849,6 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name, struct dentry *dentry; struct inode *dir = parent->d_inode; umode_t umode = S_IFIFO | S_IRUSR | S_IWUSR; - struct qstr q; int err; if (pipe->ops->upcall == NULL) @@ -849,12 +856,8 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name, if (pipe->ops->downcall == NULL) umode &= ~S_IWUGO; - q.name = name; - q.len = strlen(name); - q.hash = full_name_hash(q.name, q.len), - mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - dentry = __rpc_lookup_create_exclusive(parent, &q); + dentry = __rpc_lookup_create_exclusive(parent, name); if (IS_ERR(dentry)) goto out; err = __rpc_mkpipe_dentry(dir, dentry, umode, &rpc_pipe_fops, @@ -925,8 +928,8 @@ static void rpc_clntdir_depopulate(struct dentry *dentry) /** * rpc_create_client_dir - Create a new rpc_client directory in rpc_pipefs - * @dentry: dentry from the rpc_pipefs root to the new directory - * @name: &struct qstr for the name + * @dentry: the parent of new directory + * @name: the name of new directory * @rpc_client: rpc client to associate with this directory * * This creates a directory at the given @path associated with @@ -935,7 +938,7 @@ static void rpc_clntdir_depopulate(struct dentry *dentry) * later be created using rpc_mkpipe(). */ struct dentry *rpc_create_client_dir(struct dentry *dentry, - struct qstr *name, + const char *name, struct rpc_clnt *rpc_client) { return rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL, @@ -981,7 +984,7 @@ static void rpc_cachedir_depopulate(struct dentry *dentry) rpc_depopulate(dentry, cache_pipefs_files, 0, 3); } -struct dentry *rpc_create_cache_dir(struct dentry *parent, struct qstr *name, +struct dentry *rpc_create_cache_dir(struct dentry *parent, const char *name, umode_t umode, struct cache_detail *cd) { return rpc_mkdir_populate(parent, name, umode, NULL, @@ -1061,9 +1064,7 @@ struct dentry *rpc_d_lookup_sb(const struct super_block *sb, const unsigned char *dir_name) { struct qstr dir = QSTR_INIT(dir_name, strlen(dir_name)); - - dir.hash = full_name_hash(dir.name, dir.len); - return d_lookup(sb->s_root, &dir); + return d_hash_and_lookup(sb->s_root, &dir); } EXPORT_SYMBOL_GPL(rpc_d_lookup_sb); @@ -1116,6 +1117,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) sb->s_blocksize_bits = PAGE_CACHE_SHIFT; sb->s_magic = RPCAUTH_GSSMAGIC; sb->s_op = &s_ops; + sb->s_d_op = &rpc_dentry_operations; sb->s_time_gran = 1; inode = rpc_get_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO); @@ -1126,6 +1128,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) return -ENOMEM; dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n", net, NET_NAME(net)); + mutex_lock(&sn->pipefs_sb_lock); sn->pipefs_sb = sb; err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list, RPC_PIPEFS_MOUNT, @@ -1133,6 +1136,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) if (err) goto err_depopulate; sb->s_fs_info = get_net(net); + mutex_unlock(&sn->pipefs_sb_lock); return 0; err_depopulate: @@ -1141,6 +1145,7 @@ err_depopulate: sb); sn->pipefs_sb = NULL; __rpc_depopulate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF); + mutex_unlock(&sn->pipefs_sb_lock); return err; } @@ -1162,12 +1167,12 @@ static void rpc_kill_sb(struct super_block *sb) goto out; } sn->pipefs_sb = NULL; - mutex_unlock(&sn->pipefs_sb_lock); dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n", net, NET_NAME(net)); blocking_notifier_call_chain(&rpc_pipefs_notifier_list, RPC_PIPEFS_UMOUNT, sb); + mutex_unlock(&sn->pipefs_sb_lock); put_net(net); out: kill_litter_super(sb); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 77d251e02593..93a7a4e94d80 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -446,20 +446,6 @@ static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct r } /* - * Tests whether rpc queue is empty - */ -int rpc_queue_empty(struct rpc_wait_queue *queue) -{ - int res; - - spin_lock_bh(&queue->lock); - res = queue->qlen; - spin_unlock_bh(&queue->lock); - return res == 0; -} -EXPORT_SYMBOL_GPL(rpc_queue_empty); - -/* * Wake up a task on a specific queue */ void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task) @@ -804,7 +790,6 @@ static void __rpc_execute(struct rpc_task *task) task->tk_flags |= RPC_TASK_KILLED; rpc_exit(task, -ERESTARTSYS); } - rpc_set_running(task); dprintk("RPC: %5u sync task resuming\n", task->tk_pid); } @@ -825,9 +810,11 @@ static void __rpc_execute(struct rpc_task *task) */ void rpc_execute(struct rpc_task *task) { + bool is_async = RPC_IS_ASYNC(task); + rpc_set_active(task); rpc_make_runnable(task); - if (!RPC_IS_ASYNC(task)) + if (!is_async) __rpc_execute(task); } diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 06bdf5a1082c..621ca7b4a155 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -347,13 +347,13 @@ ip_map_cached_get(struct svc_xprt *xprt) spin_lock(&xprt->xpt_lock); ipm = xprt->xpt_auth_cache; if (ipm != NULL) { - if (!cache_valid(&ipm->h)) { + sn = net_generic(xprt->xpt_net, sunrpc_net_id); + if (cache_is_expired(sn->ip_map_cache, &ipm->h)) { /* * The entry has been invalidated since it was * remembered, e.g. by a second mount from the * same IP address. */ - sn = net_generic(xprt->xpt_net, sunrpc_net_id); xprt->xpt_auth_cache = NULL; spin_unlock(&xprt->xpt_lock); cache_put(&ipm->h, sn->ip_map_cache); @@ -493,8 +493,6 @@ static int unix_gid_parse(struct cache_detail *cd, if (rv) return -EINVAL; uid = make_kuid(&init_user_ns, id); - if (!uid_valid(uid)) - return -EINVAL; ug.uid = uid; expiry = get_expiry(&mesg); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 0f679df7d072..7762b9f8a8b7 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -917,7 +917,10 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk) len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { - BUG_ON(svsk->sk_pages[i] == NULL); + if (svsk->sk_pages[i] == NULL) { + WARN_ON_ONCE(1); + continue; + } put_page(svsk->sk_pages[i]); svsk->sk_pages[i] = NULL; } @@ -1092,8 +1095,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) goto err_noclose; } - if (svc_sock_reclen(svsk) < 8) + if (svsk->sk_datalen < 8) { + svsk->sk_datalen = 0; goto err_delete; /* client is nuts. */ + } rqstp->rq_arg.len = svsk->sk_datalen; rqstp->rq_arg.page_base = 0; @@ -1188,7 +1193,9 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt) if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) return 1; required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg; - if (sk_stream_wspace(svsk->sk_sk) >= required) + if (sk_stream_wspace(svsk->sk_sk) >= required || + (sk_stream_min_wspace(svsk->sk_sk) == 0 && + atomic_read(&xprt->xpt_reserved) == 0)) return 1; set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); return 0; diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index af7d339add9d..c99c58e2ee66 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -40,7 +40,7 @@ EXPORT_SYMBOL_GPL(nlm_debug); #ifdef RPC_DEBUG static struct ctl_table_header *sunrpc_table_header; -static ctl_table sunrpc_table[]; +static struct ctl_table sunrpc_table[]; void rpc_register_sysctl(void) @@ -58,7 +58,7 @@ rpc_unregister_sysctl(void) } } -static int proc_do_xprt(ctl_table *table, int write, +static int proc_do_xprt(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char tmpbuf[256]; @@ -73,7 +73,7 @@ static int proc_do_xprt(ctl_table *table, int write, } static int -proc_dodebug(ctl_table *table, int write, +proc_dodebug(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { char tmpbuf[20], c, *s; @@ -135,7 +135,7 @@ done: } -static ctl_table debug_table[] = { +static struct ctl_table debug_table[] = { { .procname = "rpc_debug", .data = &rpc_debug, @@ -173,7 +173,7 @@ static ctl_table debug_table[] = { { } }; -static ctl_table sunrpc_table[] = { +static struct ctl_table sunrpc_table[] = { { .procname = "sunrpc", .mode = 0555, diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 8343737e85f4..c1b6270262c2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -84,7 +84,7 @@ struct workqueue_struct *svc_rdma_wq; * resets the associated statistic to zero. Any read returns it's * current value. */ -static int read_reset_stat(ctl_table *table, int write, +static int read_reset_stat(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -119,7 +119,7 @@ static int read_reset_stat(ctl_table *table, int write, } static struct ctl_table_header *svcrdma_table_header; -static ctl_table svcrdma_parm_table[] = { +static struct ctl_table svcrdma_parm_table[] = { { .procname = "max_requests", .data = &svcrdma_max_requests, @@ -214,7 +214,7 @@ static ctl_table svcrdma_parm_table[] = { { }, }; -static ctl_table svcrdma_table[] = { +static struct ctl_table svcrdma_table[] = { { .procname = "svc_rdma", .mode = 0555, @@ -223,7 +223,7 @@ static ctl_table svcrdma_table[] = { { }, }; -static ctl_table svcrdma_root_table[] = { +static struct ctl_table svcrdma_root_table[] = { { .procname = "sunrpc", .mode = 0555, diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c index 8d2edddf48cf..65b146297f5a 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c +++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c @@ -98,6 +98,7 @@ void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch, */ static u32 *decode_write_list(u32 *va, u32 *vaend) { + unsigned long start, end; int nchunks; struct rpcrdma_write_array *ary = @@ -113,9 +114,12 @@ static u32 *decode_write_list(u32 *va, u32 *vaend) return NULL; } nchunks = ntohl(ary->wc_nchunks); - if (((unsigned long)&ary->wc_array[0] + - (sizeof(struct rpcrdma_write_chunk) * nchunks)) > - (unsigned long)vaend) { + + start = (unsigned long)&ary->wc_array[0]; + end = (unsigned long)vaend; + if (nchunks < 0 || + nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) || + (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) { dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", ary, nchunks, vaend); return NULL; @@ -129,6 +133,7 @@ static u32 *decode_write_list(u32 *va, u32 *vaend) static u32 *decode_reply_array(u32 *va, u32 *vaend) { + unsigned long start, end; int nchunks; struct rpcrdma_write_array *ary = (struct rpcrdma_write_array *)va; @@ -143,9 +148,12 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend) return NULL; } nchunks = ntohl(ary->wc_nchunks); - if (((unsigned long)&ary->wc_array[0] + - (sizeof(struct rpcrdma_write_chunk) * nchunks)) > - (unsigned long)vaend) { + + start = (unsigned long)&ary->wc_array[0]; + end = (unsigned long)vaend; + if (nchunks < 0 || + nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) || + (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) { dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", ary, nchunks, vaend); return NULL; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 794312f22b9b..285dc0884115 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -86,7 +86,7 @@ static unsigned int max_memreg = RPCRDMA_LAST - 1; static struct ctl_table_header *sunrpc_table_header; -static ctl_table xr_tunables_table[] = { +static struct ctl_table xr_tunables_table[] = { { .procname = "rdma_slot_table_entries", .data = &xprt_rdma_slot_table_entries, @@ -138,7 +138,7 @@ static ctl_table xr_tunables_table[] = { { }, }; -static ctl_table sunrpc_table[] = { +static struct ctl_table sunrpc_table[] = { { .procname = "sunrpc", .mode = 0555, diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ffd50348a509..ddf0602603bd 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -87,7 +87,7 @@ static struct ctl_table_header *sunrpc_table_header; * FIXME: changing the UDP slot table size should also resize the UDP * socket buffers for existing UDP transports */ -static ctl_table xs_tunables_table[] = { +static struct ctl_table xs_tunables_table[] = { { .procname = "udp_slot_table_entries", .data = &xprt_udp_slot_table_entries, @@ -143,7 +143,7 @@ static ctl_table xs_tunables_table[] = { { }, }; -static ctl_table sunrpc_table[] = { +static struct ctl_table sunrpc_table[] = { { .procname = "sunrpc", .mode = 0555, @@ -2534,7 +2534,6 @@ static struct rpc_xprt_ops bc_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xprt_release_xprt, .alloc_slot = xprt_alloc_slot, - .rpcbind = xs_local_rpcbind, .buf_alloc = bc_malloc, .buf_free = bc_free, .send_request = bc_send_request, diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 4df8e02d9008..b282f7130d2b 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -8,6 +8,7 @@ tipc-y += addr.o bcast.o bearer.o config.o \ core.o handler.o link.o discover.o msg.o \ name_distr.o subscr.o name_table.o net.o \ netlink.o node.o node_subscr.o port.o ref.o \ - socket.o log.o eth_media.o + socket.o log.o eth_media.o server.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o +tipc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index e5f3da507823..716de1ac6cb5 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -578,8 +578,7 @@ u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) * Returns 0 (packet sent successfully) under all circumstances, * since the broadcast link's pseudo-bearer never blocks */ -static int tipc_bcbearer_send(struct sk_buff *buf, - struct tipc_bearer *unused1, +static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1, struct tipc_media_addr *unused2) { int bp_index; diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index a93306557e00..6ee587b469fd 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -75,7 +75,8 @@ void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node); /** * tipc_nmap_equal - test for equality of node maps */ -static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b) +static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, + struct tipc_node_map *nm_b) { return !memcmp(nm_a, nm_b, sizeof(*nm_a)); } diff --git a/net/tipc/config.c b/net/tipc/config.c index f67866c765dd..c301a9a592d8 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -2,7 +2,7 @@ * net/tipc/config.c: TIPC configuration management code * * Copyright (c) 2002-2006, Ericsson AB - * Copyright (c) 2004-2007, 2010-2012, Wind River Systems + * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,12 +38,12 @@ #include "port.h" #include "name_table.h" #include "config.h" +#include "server.h" #define REPLY_TRUNCATED "<truncated>\n" -static u32 config_port_ref; - -static DEFINE_SPINLOCK(config_lock); +static DEFINE_MUTEX(config_mutex); +static struct tipc_server cfgsrv; static const void *req_tlv_area; /* request message TLV area */ static int req_tlv_space; /* request message TLV area size */ @@ -181,18 +181,7 @@ static struct sk_buff *cfg_set_own_addr(void) if (tipc_own_addr) return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (cannot change node address once assigned)"); - - /* - * Must temporarily release configuration spinlock while switching into - * networking mode as it calls tipc_eth_media_start(), which may sleep. - * Releasing the lock is harmless as other locally-issued configuration - * commands won't occur until this one completes, and remotely-issued - * configuration commands can't be received until a local configuration - * command to enable the first bearer is received and processed. - */ - spin_unlock_bh(&config_lock); tipc_core_start_net(addr); - spin_lock_bh(&config_lock); return tipc_cfg_reply_none(); } @@ -248,7 +237,7 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area { struct sk_buff *rep_tlv_buf; - spin_lock_bh(&config_lock); + mutex_lock(&config_mutex); /* Save request and reply details in a well-known location */ req_tlv_area = request_area; @@ -377,37 +366,31 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area /* Return reply buffer */ exit: - spin_unlock_bh(&config_lock); + mutex_unlock(&config_mutex); return rep_tlv_buf; } -static void cfg_named_msg_event(void *userdata, - u32 port_ref, - struct sk_buff **buf, - const unchar *msg, - u32 size, - u32 importance, - struct tipc_portid const *orig, - struct tipc_name_seq const *dest) +static void cfg_conn_msg_event(int conid, struct sockaddr_tipc *addr, + void *usr_data, void *buf, size_t len) { struct tipc_cfg_msg_hdr *req_hdr; struct tipc_cfg_msg_hdr *rep_hdr; struct sk_buff *rep_buf; + int ret; /* Validate configuration message header (ignore invalid message) */ - req_hdr = (struct tipc_cfg_msg_hdr *)msg; - if ((size < sizeof(*req_hdr)) || - (size != TCM_ALIGN(ntohl(req_hdr->tcm_len))) || + req_hdr = (struct tipc_cfg_msg_hdr *)buf; + if ((len < sizeof(*req_hdr)) || + (len != TCM_ALIGN(ntohl(req_hdr->tcm_len))) || (ntohs(req_hdr->tcm_flags) != TCM_F_REQUEST)) { pr_warn("Invalid configuration message discarded\n"); return; } /* Generate reply for request (if can't, return request) */ - rep_buf = tipc_cfg_do_cmd(orig->node, - ntohs(req_hdr->tcm_type), - msg + sizeof(*req_hdr), - size - sizeof(*req_hdr), + rep_buf = tipc_cfg_do_cmd(addr->addr.id.node, ntohs(req_hdr->tcm_type), + buf + sizeof(*req_hdr), + len - sizeof(*req_hdr), BUF_HEADROOM + MAX_H_SIZE + sizeof(*rep_hdr)); if (rep_buf) { skb_push(rep_buf, sizeof(*rep_hdr)); @@ -415,57 +398,51 @@ static void cfg_named_msg_event(void *userdata, memcpy(rep_hdr, req_hdr, sizeof(*rep_hdr)); rep_hdr->tcm_len = htonl(rep_buf->len); rep_hdr->tcm_flags &= htons(~TCM_F_REQUEST); - } else { - rep_buf = *buf; - *buf = NULL; - } - /* NEED TO ADD CODE TO HANDLE FAILED SEND (SUCH AS CONGESTION) */ - tipc_send_buf2port(port_ref, orig, rep_buf, rep_buf->len); + ret = tipc_conn_sendmsg(&cfgsrv, conid, addr, rep_buf->data, + rep_buf->len); + if (ret < 0) + pr_err("Sending cfg reply message failed, no memory\n"); + + kfree_skb(rep_buf); + } } +static struct sockaddr_tipc cfgsrv_addr __read_mostly = { + .family = AF_TIPC, + .addrtype = TIPC_ADDR_NAMESEQ, + .addr.nameseq.type = TIPC_CFG_SRV, + .addr.nameseq.lower = 0, + .addr.nameseq.upper = 0, + .scope = TIPC_ZONE_SCOPE +}; + +static struct tipc_server cfgsrv __read_mostly = { + .saddr = &cfgsrv_addr, + .imp = TIPC_CRITICAL_IMPORTANCE, + .type = SOCK_RDM, + .max_rcvbuf_size = 64 * 1024, + .name = "cfg_server", + .tipc_conn_recvmsg = cfg_conn_msg_event, + .tipc_conn_new = NULL, + .tipc_conn_shutdown = NULL +}; + int tipc_cfg_init(void) { - struct tipc_name_seq seq; - int res; - - res = tipc_createport(NULL, TIPC_CRITICAL_IMPORTANCE, - NULL, NULL, NULL, - NULL, cfg_named_msg_event, NULL, - NULL, &config_port_ref); - if (res) - goto failed; - - seq.type = TIPC_CFG_SRV; - seq.lower = seq.upper = tipc_own_addr; - res = tipc_publish(config_port_ref, TIPC_ZONE_SCOPE, &seq); - if (res) - goto failed; - - return 0; - -failed: - pr_err("Unable to create configuration service\n"); - return res; + return tipc_server_start(&cfgsrv); } void tipc_cfg_reinit(void) { - struct tipc_name_seq seq; - int res; - - seq.type = TIPC_CFG_SRV; - seq.lower = seq.upper = 0; - tipc_withdraw(config_port_ref, TIPC_ZONE_SCOPE, &seq); + tipc_server_stop(&cfgsrv); - seq.lower = seq.upper = tipc_own_addr; - res = tipc_publish(config_port_ref, TIPC_ZONE_SCOPE, &seq); - if (res) - pr_err("Unable to reinitialize configuration service\n"); + cfgsrv_addr.addr.nameseq.lower = tipc_own_addr; + cfgsrv_addr.addr.nameseq.upper = tipc_own_addr; + tipc_server_start(&cfgsrv); } void tipc_cfg_stop(void) { - tipc_deleteport(config_port_ref); - config_port_ref = 0; + tipc_server_stop(&cfgsrv); } diff --git a/net/tipc/core.c b/net/tipc/core.c index 7ec2c1eb94f1..fd4eeeaa972a 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -2,7 +2,7 @@ * net/tipc/core.c: TIPC module code * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005-2006, 2010-2011, Wind River Systems + * Copyright (c) 2005-2006, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -39,6 +39,7 @@ #include "name_table.h" #include "subscr.h" #include "config.h" +#include "port.h" #include <linux/module.h> @@ -50,7 +51,7 @@ u32 tipc_own_addr __read_mostly; int tipc_max_ports __read_mostly; int tipc_net_id __read_mostly; int tipc_remote_management __read_mostly; - +int sysctl_tipc_rmem[3] __read_mostly; /* min/default/max */ /** * tipc_buf_acquire - creates a TIPC message buffer @@ -118,6 +119,7 @@ static void tipc_core_stop(void) tipc_nametbl_stop(); tipc_ref_table_stop(); tipc_socket_stop(); + tipc_unregister_sysctl(); } /** @@ -135,20 +137,21 @@ static int tipc_core_start(void) if (!res) res = tipc_nametbl_init(); if (!res) - res = tipc_subscr_start(); - if (!res) - res = tipc_cfg_init(); - if (!res) res = tipc_netlink_start(); if (!res) res = tipc_socket_init(); + if (!res) + res = tipc_register_sysctl(); + if (!res) + res = tipc_subscr_start(); + if (!res) + res = tipc_cfg_init(); if (res) tipc_core_stop(); return res; } - static int __init tipc_init(void) { int res; @@ -160,6 +163,11 @@ static int __init tipc_init(void) tipc_max_ports = CONFIG_TIPC_PORTS; tipc_net_id = 4711; + sysctl_tipc_rmem[0] = CONN_OVERLOAD_LIMIT >> 4 << TIPC_LOW_IMPORTANCE; + sysctl_tipc_rmem[1] = CONN_OVERLOAD_LIMIT >> 4 << + TIPC_CRITICAL_IMPORTANCE; + sysctl_tipc_rmem[2] = CONN_OVERLOAD_LIMIT; + res = tipc_core_start(); if (res) pr_err("Unable to start in single node mode\n"); diff --git a/net/tipc/core.h b/net/tipc/core.h index 0207db04179a..be72f8cebc53 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -1,8 +1,8 @@ /* * net/tipc/core.h: Include file for TIPC global declarations * - * Copyright (c) 2005-2006, Ericsson AB - * Copyright (c) 2005-2007, 2010-2011, Wind River Systems + * Copyright (c) 2005-2006, 2013 Ericsson AB + * Copyright (c) 2005-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -80,6 +80,7 @@ extern u32 tipc_own_addr __read_mostly; extern int tipc_max_ports __read_mostly; extern int tipc_net_id __read_mostly; extern int tipc_remote_management __read_mostly; +extern int sysctl_tipc_rmem[3] __read_mostly; /* * Other global variables @@ -96,6 +97,18 @@ extern int tipc_netlink_start(void); extern void tipc_netlink_stop(void); extern int tipc_socket_init(void); extern void tipc_socket_stop(void); +extern int tipc_sock_create_local(int type, struct socket **res); +extern void tipc_sock_release_local(struct socket *sock); +extern int tipc_sock_accept_local(struct socket *sock, + struct socket **newsock, int flags); + +#ifdef CONFIG_SYSCTL +extern int tipc_register_sysctl(void); +extern void tipc_unregister_sysctl(void); +#else +#define tipc_register_sysctl() 0 +#define tipc_unregister_sysctl() +#endif /* * TIPC timer and signal code diff --git a/net/tipc/discover.c b/net/tipc/discover.c index eedff58d0387..ecc758c6eacf 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -70,8 +70,7 @@ struct tipc_link_req { * @dest_domain: network domain of node(s) which should respond to message * @b_ptr: ptr to bearer issuing message */ -static struct sk_buff *tipc_disc_init_msg(u32 type, - u32 dest_domain, +static struct sk_buff *tipc_disc_init_msg(u32 type, u32 dest_domain, struct tipc_bearer *b_ptr) { struct sk_buff *buf = tipc_buf_acquire(INT_H_SIZE); @@ -346,8 +345,8 @@ exit: * * Returns 0 if successful, otherwise -errno. */ -int tipc_disc_create(struct tipc_bearer *b_ptr, - struct tipc_media_addr *dest, u32 dest_domain) +int tipc_disc_create(struct tipc_bearer *b_ptr, struct tipc_media_addr *dest, + u32 dest_domain) { struct tipc_link_req *req; diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 120a676a3360..40ea40cf6204 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -62,7 +62,7 @@ static struct eth_bearer eth_bearers[MAX_ETH_BEARERS]; static int eth_started; static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *dv); + void *dv); /* * Network device notifier info */ @@ -162,8 +162,7 @@ static void setup_bearer(struct work_struct *work) */ static int enable_bearer(struct tipc_bearer *tb_ptr) { - struct net_device *dev = NULL; - struct net_device *pdev = NULL; + struct net_device *dev; struct eth_bearer *eb_ptr = ð_bearers[0]; struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; @@ -178,15 +177,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) } /* Find device with specified name */ - read_lock(&dev_base_lock); - for_each_netdev(&init_net, pdev) { - if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) { - dev = pdev; - dev_hold(dev); - break; - } - } - read_unlock(&dev_base_lock); + dev = dev_get_by_name(&init_net, driver_name); if (!dev) return -ENODEV; @@ -251,9 +242,9 @@ static void disable_bearer(struct tipc_bearer *tb_ptr) * specified device. */ static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *dv) + void *ptr) { - struct net_device *dev = (struct net_device *)dv; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct eth_bearer *eb_ptr = ð_bearers[0]; struct eth_bearer *stop = ð_bearers[MAX_ETH_BEARERS]; diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c index 2a2864c25e15..9934a32bfa87 100644 --- a/net/tipc/ib_media.c +++ b/net/tipc/ib_media.c @@ -155,8 +155,7 @@ static void setup_bearer(struct work_struct *work) */ static int enable_bearer(struct tipc_bearer *tb_ptr) { - struct net_device *dev = NULL; - struct net_device *pdev = NULL; + struct net_device *dev; struct ib_bearer *ib_ptr = &ib_bearers[0]; struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS]; char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; @@ -171,15 +170,7 @@ static int enable_bearer(struct tipc_bearer *tb_ptr) } /* Find device with specified name */ - read_lock(&dev_base_lock); - for_each_netdev(&init_net, pdev) { - if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) { - dev = pdev; - dev_hold(dev); - break; - } - } - read_unlock(&dev_base_lock); + dev = dev_get_by_name(&init_net, driver_name); if (!dev) return -ENODEV; @@ -244,9 +235,9 @@ static void disable_bearer(struct tipc_bearer *tb_ptr) * specified device. */ static int recv_notification(struct notifier_block *nb, unsigned long evt, - void *dv) + void *ptr) { - struct net_device *dev = (struct net_device *)dv; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct ib_bearer *ib_ptr = &ib_bearers[0]; struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS]; @@ -301,13 +292,7 @@ static int ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) if (str_size < 60) /* 60 = 19 * strlen("xx:") + strlen("xx\0") */ return 1; - sprintf(str_buf, "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:" - "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x", - a->value[0], a->value[1], a->value[2], a->value[3], - a->value[4], a->value[5], a->value[6], a->value[7], - a->value[8], a->value[9], a->value[10], a->value[11], - a->value[12], a->value[13], a->value[14], a->value[15], - a->value[16], a->value[17], a->value[18], a->value[19]); + sprintf(str_buf, "%20phC", a->value); return 0; } diff --git a/net/tipc/link.c b/net/tipc/link.c index a80feee5197a..0cc3d9015c5d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2,7 +2,7 @@ * net/tipc/link.c: TIPC link code * * Copyright (c) 1996-2007, 2012, Ericsson AB - * Copyright (c) 2004-2007, 2010-2011, Wind River Systems + * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,6 +41,8 @@ #include "discover.h" #include "config.h" +#include <linux/pkt_sched.h> + /* * Error message prefixes */ @@ -771,8 +773,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event) * link_bundle_buf(): Append contents of a buffer to * the tail of an existing one. */ -static int link_bundle_buf(struct tipc_link *l_ptr, - struct sk_buff *bundler, +static int link_bundle_buf(struct tipc_link *l_ptr, struct sk_buff *bundler, struct sk_buff *buf) { struct tipc_msg *bundler_msg = buf_msg(bundler); @@ -1057,40 +1058,6 @@ static int link_send_buf_fast(struct tipc_link *l_ptr, struct sk_buff *buf, } /* - * tipc_send_buf_fast: Entry for data messages where the - * destination node is known and the header is complete, - * inclusive total message length. - * Returns user data length. - */ -int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode) -{ - struct tipc_link *l_ptr; - struct tipc_node *n_ptr; - int res; - u32 selector = msg_origport(buf_msg(buf)) & 1; - u32 dummy; - - read_lock_bh(&tipc_net_lock); - n_ptr = tipc_node_find(destnode); - if (likely(n_ptr)) { - tipc_node_lock(n_ptr); - l_ptr = n_ptr->active_links[selector]; - if (likely(l_ptr)) { - res = link_send_buf_fast(l_ptr, buf, &dummy); - tipc_node_unlock(n_ptr); - read_unlock_bh(&tipc_net_lock); - return res; - } - tipc_node_unlock(n_ptr); - } - read_unlock_bh(&tipc_net_lock); - res = msg_data_sz(buf_msg(buf)); - tipc_reject_msg(buf, TIPC_ERR_NO_NODE); - return res; -} - - -/* * tipc_link_send_sections_fast: Entry for messages where the * destination processor is known and the header is complete, * except for total message length. @@ -1098,8 +1065,7 @@ int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode) */ int tipc_link_send_sections_fast(struct tipc_port *sender, struct iovec const *msg_sect, - const u32 num_sect, - unsigned int total_len, + const u32 num_sect, unsigned int total_len, u32 destaddr) { struct tipc_msg *hdr = &sender->phdr; @@ -1115,7 +1081,10 @@ again: * (Must not hold any locks while building message.) */ res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, - sender->max_pkt, !sender->user_port, &buf); + sender->max_pkt, &buf); + /* Exit if build request was invalid */ + if (unlikely(res < 0)) + return res; read_lock_bh(&tipc_net_lock); node = tipc_node_find(destaddr); @@ -1132,10 +1101,6 @@ exit: return res; } - /* Exit if build request was invalid */ - if (unlikely(res < 0)) - goto exit; - /* Exit if link (or bearer) is congested */ if (link_congested(l_ptr) || tipc_bearer_blocked(l_ptr->b_ptr)) { @@ -1189,8 +1154,7 @@ exit: */ static int link_send_sections_long(struct tipc_port *sender, struct iovec const *msg_sect, - u32 num_sect, - unsigned int total_len, + u32 num_sect, unsigned int total_len, u32 destaddr) { struct tipc_link *l_ptr; @@ -1204,6 +1168,7 @@ static int link_send_sections_long(struct tipc_port *sender, const unchar *sect_crs; int curr_sect; u32 fragm_no; + int res = 0; again: fragm_no = 1; @@ -1250,18 +1215,15 @@ again: else sz = fragm_rest; - if (likely(!sender->user_port)) { - if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) { + if (copy_from_user(buf->data + fragm_crs, sect_crs, sz)) { + res = -EFAULT; error: - for (; buf_chain; buf_chain = buf) { - buf = buf_chain->next; - kfree_skb(buf_chain); - } - return -EFAULT; + for (; buf_chain; buf_chain = buf) { + buf = buf_chain->next; + kfree_skb(buf_chain); } - } else - skb_copy_to_linear_data_offset(buf, fragm_crs, - sect_crs, sz); + return res; + } sect_crs += sz; sect_rest -= sz; fragm_crs += sz; @@ -1281,8 +1243,10 @@ error: msg_set_fragm_no(&fragm_hdr, ++fragm_no); prev = buf; buf = tipc_buf_acquire(fragm_sz + INT_H_SIZE); - if (!buf) + if (!buf) { + res = -ENOMEM; goto error; + } buf->next = NULL; prev->next = buf; @@ -1446,7 +1410,7 @@ static void link_reset_all(unsigned long addr) } static void link_retransmit_failure(struct tipc_link *l_ptr, - struct sk_buff *buf) + struct sk_buff *buf) { struct tipc_msg *msg = buf_msg(buf); @@ -1901,8 +1865,8 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr, * Send protocol message to the other endpoint. */ void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, - int probe_msg, u32 gap, u32 tolerance, - u32 priority, u32 ack_mtu) + int probe_msg, u32 gap, u32 tolerance, + u32 priority, u32 ack_mtu) { struct sk_buff *buf = NULL; struct tipc_msg *msg = l_ptr->pmsg; @@ -1988,6 +1952,7 @@ void tipc_link_send_proto_msg(struct tipc_link *l_ptr, u32 msg_typ, return; skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg)); + buf->priority = TC_PRIO_CONTROL; /* Defer message if bearer is already blocked */ if (tipc_bearer_blocked(l_ptr->b_ptr)) { @@ -2145,8 +2110,7 @@ exit: * another bearer. Owner node is locked. */ static void tipc_link_tunnel(struct tipc_link *l_ptr, - struct tipc_msg *tunnel_hdr, - struct tipc_msg *msg, + struct tipc_msg *tunnel_hdr, struct tipc_msg *msg, u32 selector) { struct tipc_link *tunnel; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index f2db8a87d9c5..ced60e2fc4f7 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -51,8 +51,8 @@ u32 tipc_msg_tot_importance(struct tipc_msg *m) } -void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, - u32 hsize, u32 destnode) +void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, + u32 destnode) { memset(m, 0, hsize); msg_set_version(m); @@ -73,8 +73,8 @@ void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, * Returns message data size or errno */ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, - u32 num_sect, unsigned int total_len, - int max_size, int usrmem, struct sk_buff **buf) + u32 num_sect, unsigned int total_len, int max_size, + struct sk_buff **buf) { int dsz, sz, hsz, pos, res, cnt; @@ -92,14 +92,9 @@ int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, return -ENOMEM; skb_copy_to_linear_data(*buf, hdr, hsz); for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) { - if (likely(usrmem)) - res = !copy_from_user((*buf)->data + pos, - msg_sect[cnt].iov_base, - msg_sect[cnt].iov_len); - else - skb_copy_to_linear_data_offset(*buf, pos, - msg_sect[cnt].iov_base, - msg_sect[cnt].iov_len); + skb_copy_to_linear_data_offset(*buf, pos, + msg_sect[cnt].iov_base, + msg_sect[cnt].iov_len); pos += msg_sect[cnt].iov_len; } if (likely(res)) diff --git a/net/tipc/msg.h b/net/tipc/msg.h index ba2a72beea68..5e4ccf5c27df 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -719,9 +719,9 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) } u32 tipc_msg_tot_importance(struct tipc_msg *m); -void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, - u32 hsize, u32 destnode); +void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type, u32 hsize, + u32 destnode); int tipc_msg_build(struct tipc_msg *hdr, struct iovec const *msg_sect, - u32 num_sect, unsigned int total_len, - int max_size, int usrmem, struct sk_buff **buf); + u32 num_sect, unsigned int total_len, int max_size, + struct sk_buff **buf); #endif diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 24b167914311..09dcd54b04e1 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -440,7 +440,7 @@ found: * sequence overlapping with the requested sequence */ static void tipc_nameseq_subscribe(struct name_seq *nseq, - struct tipc_subscription *s) + struct tipc_subscription *s) { struct sub_seq *sseq = nseq->sseqs; @@ -662,7 +662,7 @@ exit: * tipc_nametbl_publish - add name publication to network name tables */ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, - u32 scope, u32 port_ref, u32 key) + u32 scope, u32 port_ref, u32 key) { struct publication *publ; @@ -753,7 +753,7 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s) * subseq_list - print specified sub-sequence contents into the given buffer */ static int subseq_list(struct sub_seq *sseq, char *buf, int len, u32 depth, - u32 index) + u32 index) { char portIdStr[27]; const char *scope_str[] = {"", " zone", " cluster", " node"}; @@ -792,7 +792,7 @@ static int subseq_list(struct sub_seq *sseq, char *buf, int len, u32 depth, * nameseq_list - print specified name sequence contents into the given buffer */ static int nameseq_list(struct name_seq *seq, char *buf, int len, u32 depth, - u32 type, u32 lowbound, u32 upbound, u32 index) + u32 type, u32 lowbound, u32 upbound, u32 index) { struct sub_seq *sseq; char typearea[11]; @@ -849,7 +849,7 @@ static int nametbl_header(char *buf, int len, u32 depth) * nametbl_list - print specified name table contents into the given buffer */ static int nametbl_list(char *buf, int len, u32 depth_info, - u32 type, u32 lowbound, u32 upbound) + u32 type, u32 lowbound, u32 upbound) { struct hlist_head *seq_head; struct name_seq *seq; diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 71cb4dc712df..f02f48b9a216 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -87,14 +87,15 @@ extern rwlock_t tipc_nametbl_lock; struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space); u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *node); int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit, - struct tipc_port_list *dports); + struct tipc_port_list *dports); struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper, - u32 scope, u32 port_ref, u32 key); + u32 scope, u32 port_ref, u32 key); int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key); struct publication *tipc_nametbl_insert_publ(u32 type, u32 lower, u32 upper, - u32 scope, u32 node, u32 ref, u32 key); -struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, - u32 node, u32 ref, u32 key); + u32 scope, u32 node, u32 ref, + u32 key); +struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower, u32 node, + u32 ref, u32 key); void tipc_nametbl_subscribe(struct tipc_subscription *s); void tipc_nametbl_unsubscribe(struct tipc_subscription *s); int tipc_nametbl_init(void); diff --git a/net/tipc/node_subscr.c b/net/tipc/node_subscr.c index 5e34b015da45..8a7384c04add 100644 --- a/net/tipc/node_subscr.c +++ b/net/tipc/node_subscr.c @@ -42,7 +42,7 @@ * tipc_nodesub_subscribe - create "node down" subscription for specified node */ void tipc_nodesub_subscribe(struct tipc_node_subscr *node_sub, u32 addr, - void *usr_handle, net_ev_handler handle_down) + void *usr_handle, net_ev_handler handle_down) { if (in_own_node(addr)) { node_sub->node = NULL; diff --git a/net/tipc/port.c b/net/tipc/port.c index 18098cac62f2..b3ed2fcab4fb 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -2,7 +2,7 @@ * net/tipc/port.c: TIPC port code * * Copyright (c) 1992-2007, Ericsson AB - * Copyright (c) 2004-2008, 2010-2011, Wind River Systems + * Copyright (c) 2004-2008, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -46,11 +46,7 @@ #define MAX_REJECT_SIZE 1024 -static struct sk_buff *msg_queue_head; -static struct sk_buff *msg_queue_tail; - DEFINE_SPINLOCK(tipc_port_list_lock); -static DEFINE_SPINLOCK(queue_lock); static LIST_HEAD(ports); static void port_handle_node_down(unsigned long ref); @@ -119,7 +115,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, msg_set_nameupper(hdr, seq->upper); msg_set_hdr_sz(hdr, MCAST_H_SIZE); res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE, - !oport->user_port, &buf); + &buf); if (unlikely(!buf)) return res; @@ -206,14 +202,15 @@ exit: } /** - * tipc_createport_raw - create a generic TIPC port + * tipc_createport - create a generic TIPC port * * Returns pointer to (locked) TIPC port, or NULL if unable to create it */ -struct tipc_port *tipc_createport_raw(void *usr_handle, - u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), - void (*wakeup)(struct tipc_port *), - const u32 importance) +struct tipc_port *tipc_createport(struct sock *sk, + u32 (*dispatcher)(struct tipc_port *, + struct sk_buff *), + void (*wakeup)(struct tipc_port *), + const u32 importance) { struct tipc_port *p_ptr; struct tipc_msg *msg; @@ -231,14 +228,13 @@ struct tipc_port *tipc_createport_raw(void *usr_handle, return NULL; } - p_ptr->usr_handle = usr_handle; + p_ptr->sk = sk; p_ptr->max_pkt = MAX_PKT_DEFAULT; p_ptr->ref = ref; INIT_LIST_HEAD(&p_ptr->wait_list); INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list); p_ptr->dispatcher = dispatcher; p_ptr->wakeup = wakeup; - p_ptr->user_port = NULL; k_init_timer(&p_ptr->timer, (Handler)port_timeout, ref); INIT_LIST_HEAD(&p_ptr->publications); INIT_LIST_HEAD(&p_ptr->port_list); @@ -275,7 +271,6 @@ int tipc_deleteport(u32 ref) buf = port_build_peer_abort_msg(p_ptr, TIPC_ERR_NO_PORT); tipc_nodesub_unsubscribe(&p_ptr->subscription); } - kfree(p_ptr->user_port); spin_lock_bh(&tipc_port_list_lock); list_del(&p_ptr->port_list); @@ -448,7 +443,7 @@ int tipc_port_reject_sections(struct tipc_port *p_ptr, struct tipc_msg *hdr, int res; res = tipc_msg_build(hdr, msg_sect, num_sect, total_len, MAX_MSG_SIZE, - !p_ptr->user_port, &buf); + &buf); if (!buf) return res; @@ -668,215 +663,6 @@ void tipc_port_reinit(void) spin_unlock_bh(&tipc_port_list_lock); } - -/* - * port_dispatcher_sigh(): Signal handler for messages destinated - * to the tipc_port interface. - */ -static void port_dispatcher_sigh(void *dummy) -{ - struct sk_buff *buf; - - spin_lock_bh(&queue_lock); - buf = msg_queue_head; - msg_queue_head = NULL; - spin_unlock_bh(&queue_lock); - - while (buf) { - struct tipc_port *p_ptr; - struct user_port *up_ptr; - struct tipc_portid orig; - struct tipc_name_seq dseq; - void *usr_handle; - int connected; - int peer_invalid; - int published; - u32 message_type; - - struct sk_buff *next = buf->next; - struct tipc_msg *msg = buf_msg(buf); - u32 dref = msg_destport(msg); - - message_type = msg_type(msg); - if (message_type > TIPC_DIRECT_MSG) - goto reject; /* Unsupported message type */ - - p_ptr = tipc_port_lock(dref); - if (!p_ptr) - goto reject; /* Port deleted while msg in queue */ - - orig.ref = msg_origport(msg); - orig.node = msg_orignode(msg); - up_ptr = p_ptr->user_port; - usr_handle = up_ptr->usr_handle; - connected = p_ptr->connected; - peer_invalid = connected && !tipc_port_peer_msg(p_ptr, msg); - published = p_ptr->published; - - if (unlikely(msg_errcode(msg))) - goto err; - - switch (message_type) { - - case TIPC_CONN_MSG:{ - tipc_conn_msg_event cb = up_ptr->conn_msg_cb; - u32 dsz; - - tipc_port_unlock(p_ptr); - if (unlikely(!cb)) - goto reject; - if (unlikely(!connected)) { - if (tipc_connect(dref, &orig)) - goto reject; - } else if (peer_invalid) - goto reject; - dsz = msg_data_sz(msg); - if (unlikely(dsz && - (++p_ptr->conn_unacked >= - TIPC_FLOW_CONTROL_WIN))) - tipc_acknowledge(dref, - p_ptr->conn_unacked); - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), dsz); - break; - } - case TIPC_DIRECT_MSG:{ - tipc_msg_event cb = up_ptr->msg_cb; - - tipc_port_unlock(p_ptr); - if (unlikely(!cb || connected)) - goto reject; - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg), msg_importance(msg), - &orig); - break; - } - case TIPC_MCAST_MSG: - case TIPC_NAMED_MSG:{ - tipc_named_msg_event cb = up_ptr->named_msg_cb; - - tipc_port_unlock(p_ptr); - if (unlikely(!cb || connected || !published)) - goto reject; - dseq.type = msg_nametype(msg); - dseq.lower = msg_nameinst(msg); - dseq.upper = (message_type == TIPC_NAMED_MSG) - ? dseq.lower : msg_nameupper(msg); - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg), msg_importance(msg), - &orig, &dseq); - break; - } - } - if (buf) - kfree_skb(buf); - buf = next; - continue; -err: - switch (message_type) { - - case TIPC_CONN_MSG:{ - tipc_conn_shutdown_event cb = - up_ptr->conn_err_cb; - - tipc_port_unlock(p_ptr); - if (!cb || !connected || peer_invalid) - break; - tipc_disconnect(dref); - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg), msg_errcode(msg)); - break; - } - case TIPC_DIRECT_MSG:{ - tipc_msg_err_event cb = up_ptr->err_cb; - - tipc_port_unlock(p_ptr); - if (!cb || connected) - break; - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg), msg_errcode(msg), &orig); - break; - } - case TIPC_MCAST_MSG: - case TIPC_NAMED_MSG:{ - tipc_named_msg_err_event cb = - up_ptr->named_err_cb; - - tipc_port_unlock(p_ptr); - if (!cb || connected) - break; - dseq.type = msg_nametype(msg); - dseq.lower = msg_nameinst(msg); - dseq.upper = (message_type == TIPC_NAMED_MSG) - ? dseq.lower : msg_nameupper(msg); - skb_pull(buf, msg_hdr_sz(msg)); - cb(usr_handle, dref, &buf, msg_data(msg), - msg_data_sz(msg), msg_errcode(msg), &dseq); - break; - } - } - if (buf) - kfree_skb(buf); - buf = next; - continue; -reject: - tipc_reject_msg(buf, TIPC_ERR_NO_PORT); - buf = next; - } -} - -/* - * port_dispatcher(): Dispatcher for messages destinated - * to the tipc_port interface. Called with port locked. - */ -static u32 port_dispatcher(struct tipc_port *dummy, struct sk_buff *buf) -{ - buf->next = NULL; - spin_lock_bh(&queue_lock); - if (msg_queue_head) { - msg_queue_tail->next = buf; - msg_queue_tail = buf; - } else { - msg_queue_tail = msg_queue_head = buf; - tipc_k_signal((Handler)port_dispatcher_sigh, 0); - } - spin_unlock_bh(&queue_lock); - return 0; -} - -/* - * Wake up port after congestion: Called with port locked - */ -static void port_wakeup_sh(unsigned long ref) -{ - struct tipc_port *p_ptr; - struct user_port *up_ptr; - tipc_continue_event cb = NULL; - void *uh = NULL; - - p_ptr = tipc_port_lock(ref); - if (p_ptr) { - up_ptr = p_ptr->user_port; - if (up_ptr) { - cb = up_ptr->continue_event_cb; - uh = up_ptr->usr_handle; - } - tipc_port_unlock(p_ptr); - } - if (cb) - cb(uh, ref); -} - - -static void port_wakeup(struct tipc_port *p_ptr) -{ - tipc_k_signal((Handler)port_wakeup_sh, p_ptr->ref); -} - void tipc_acknowledge(u32 ref, u32 ack) { struct tipc_port *p_ptr; @@ -893,50 +679,6 @@ void tipc_acknowledge(u32 ref, u32 ack) tipc_net_route_msg(buf); } -/* - * tipc_createport(): user level call. - */ -int tipc_createport(void *usr_handle, - unsigned int importance, - tipc_msg_err_event error_cb, - tipc_named_msg_err_event named_error_cb, - tipc_conn_shutdown_event conn_error_cb, - tipc_msg_event msg_cb, - tipc_named_msg_event named_msg_cb, - tipc_conn_msg_event conn_msg_cb, - tipc_continue_event continue_event_cb, /* May be zero */ - u32 *portref) -{ - struct user_port *up_ptr; - struct tipc_port *p_ptr; - - up_ptr = kmalloc(sizeof(*up_ptr), GFP_ATOMIC); - if (!up_ptr) { - pr_warn("Port creation failed, no memory\n"); - return -ENOMEM; - } - p_ptr = tipc_createport_raw(NULL, port_dispatcher, port_wakeup, - importance); - if (!p_ptr) { - kfree(up_ptr); - return -ENOMEM; - } - - p_ptr->user_port = up_ptr; - up_ptr->usr_handle = usr_handle; - up_ptr->ref = p_ptr->ref; - up_ptr->err_cb = error_cb; - up_ptr->named_err_cb = named_error_cb; - up_ptr->conn_err_cb = conn_error_cb; - up_ptr->msg_cb = msg_cb; - up_ptr->named_msg_cb = named_msg_cb; - up_ptr->conn_msg_cb = conn_msg_cb; - up_ptr->continue_event_cb = continue_event_cb; - *portref = p_ptr->ref; - tipc_port_unlock(p_ptr); - return 0; -} - int tipc_portimportance(u32 ref, unsigned int *importance) { struct tipc_port *p_ptr; @@ -1184,7 +926,7 @@ static int tipc_port_recv_sections(struct tipc_port *sender, unsigned int num_se int res; res = tipc_msg_build(&sender->phdr, msg_sect, num_sect, total_len, - MAX_MSG_SIZE, !sender->user_port, &buf); + MAX_MSG_SIZE, &buf); if (likely(buf)) tipc_port_recv_msg(buf); return res; @@ -1322,43 +1064,3 @@ int tipc_send2port(u32 ref, struct tipc_portid const *dest, } return -ELINKCONG; } - -/** - * tipc_send_buf2port - send message buffer to port identity - */ -int tipc_send_buf2port(u32 ref, struct tipc_portid const *dest, - struct sk_buff *buf, unsigned int dsz) -{ - struct tipc_port *p_ptr; - struct tipc_msg *msg; - int res; - - p_ptr = (struct tipc_port *)tipc_ref_deref(ref); - if (!p_ptr || p_ptr->connected) - return -EINVAL; - - msg = &p_ptr->phdr; - msg_set_type(msg, TIPC_DIRECT_MSG); - msg_set_destnode(msg, dest->node); - msg_set_destport(msg, dest->ref); - msg_set_hdr_sz(msg, BASIC_H_SIZE); - msg_set_size(msg, BASIC_H_SIZE + dsz); - if (skb_cow(buf, BASIC_H_SIZE)) - return -ENOMEM; - - skb_push(buf, BASIC_H_SIZE); - skb_copy_to_linear_data(buf, msg, BASIC_H_SIZE); - - if (in_own_node(dest->node)) - res = tipc_port_recv_msg(buf); - else - res = tipc_send_buf_fast(buf, dest->node); - if (likely(res != -ELINKCONG)) { - if (res > 0) - p_ptr->sent++; - return res; - } - if (port_unreliable(p_ptr)) - return dsz; - return -ELINKCONG; -} diff --git a/net/tipc/port.h b/net/tipc/port.h index fb66e2e5f4d1..5a7026b9c345 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -2,7 +2,7 @@ * net/tipc/port.h: Include file for TIPC port code * * Copyright (c) 1994-2007, Ericsson AB - * Copyright (c) 2004-2007, 2010-2011, Wind River Systems + * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -43,60 +43,12 @@ #include "node_subscr.h" #define TIPC_FLOW_CONTROL_WIN 512 - -typedef void (*tipc_msg_err_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size, int reason, - struct tipc_portid const *attmpt_destid); - -typedef void (*tipc_named_msg_err_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size, int reason, - struct tipc_name_seq const *attmpt_dest); - -typedef void (*tipc_conn_shutdown_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size, int reason); - -typedef void (*tipc_msg_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size, unsigned int importance, - struct tipc_portid const *origin); - -typedef void (*tipc_named_msg_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size, unsigned int importance, - struct tipc_portid const *orig, - struct tipc_name_seq const *dest); - -typedef void (*tipc_conn_msg_event) (void *usr_handle, u32 portref, - struct sk_buff **buf, unsigned char const *data, - unsigned int size); - -typedef void (*tipc_continue_event) (void *usr_handle, u32 portref); - -/** - * struct user_port - TIPC user port (used with native API) - * @usr_handle: user-specified field - * @ref: object reference to associated TIPC port - * - * <various callback routines> - */ -struct user_port { - void *usr_handle; - u32 ref; - tipc_msg_err_event err_cb; - tipc_named_msg_err_event named_err_cb; - tipc_conn_shutdown_event conn_err_cb; - tipc_msg_event msg_cb; - tipc_named_msg_event named_msg_cb; - tipc_conn_msg_event conn_msg_cb; - tipc_continue_event continue_event_cb; -}; +#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \ + SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) /** * struct tipc_port - TIPC port structure - * @usr_handle: pointer to additional user-defined information about port + * @sk: pointer to socket handle * @lock: pointer to spinlock for controlling access to port * @connected: non-zero if port is currently connected to a peer port * @conn_type: TIPC type used when connection was established @@ -110,7 +62,6 @@ struct user_port { * @port_list: adjacent ports in TIPC's global list of ports * @dispatcher: ptr to routine which handles received messages * @wakeup: ptr to routine to call when port is no longer congested - * @user_port: ptr to user port associated with port (if any) * @wait_list: adjacent ports in list of ports waiting on link congestion * @waiting_pkts: * @sent: # of non-empty messages sent by port @@ -123,7 +74,7 @@ struct user_port { * @subscription: "node down" subscription used to terminate failed connections */ struct tipc_port { - void *usr_handle; + struct sock *sk; spinlock_t *lock; int connected; u32 conn_type; @@ -137,7 +88,6 @@ struct tipc_port { struct list_head port_list; u32 (*dispatcher)(struct tipc_port *, struct sk_buff *); void (*wakeup)(struct tipc_port *); - struct user_port *user_port; struct list_head wait_list; u32 waiting_pkts; u32 sent; @@ -156,24 +106,16 @@ struct tipc_port_list; /* * TIPC port manipulation routines */ -struct tipc_port *tipc_createport_raw(void *usr_handle, - u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), - void (*wakeup)(struct tipc_port *), const u32 importance); +struct tipc_port *tipc_createport(struct sock *sk, + u32 (*dispatcher)(struct tipc_port *, + struct sk_buff *), + void (*wakeup)(struct tipc_port *), + const u32 importance); int tipc_reject_msg(struct sk_buff *buf, u32 err); -int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode); - void tipc_acknowledge(u32 port_ref, u32 ack); -int tipc_createport(void *usr_handle, - unsigned int importance, tipc_msg_err_event error_cb, - tipc_named_msg_err_event named_error_cb, - tipc_conn_shutdown_event conn_error_cb, tipc_msg_event msg_cb, - tipc_named_msg_event named_msg_cb, - tipc_conn_msg_event conn_msg_cb, - tipc_continue_event continue_event_cb, u32 *portref); - int tipc_deleteport(u32 portref); int tipc_portimportance(u32 portref, unsigned int *importance); @@ -186,9 +128,9 @@ int tipc_portunreturnable(u32 portref, unsigned int *isunreturnable); int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable); int tipc_publish(u32 portref, unsigned int scope, - struct tipc_name_seq const *name_seq); + struct tipc_name_seq const *name_seq); int tipc_withdraw(u32 portref, unsigned int scope, - struct tipc_name_seq const *name_seq); + struct tipc_name_seq const *name_seq); int tipc_connect(u32 portref, struct tipc_portid const *port); @@ -220,9 +162,6 @@ int tipc_send2port(u32 portref, struct tipc_portid const *dest, unsigned int num_sect, struct iovec const *msg_sect, unsigned int total_len); -int tipc_send_buf2port(u32 portref, struct tipc_portid const *dest, - struct sk_buff *buf, unsigned int dsz); - int tipc_multicast(u32 portref, struct tipc_name_seq const *seq, unsigned int section_count, struct iovec const *msg, unsigned int total_len); diff --git a/net/tipc/server.c b/net/tipc/server.c new file mode 100644 index 000000000000..fd3fa57a410e --- /dev/null +++ b/net/tipc/server.c @@ -0,0 +1,605 @@ +/* + * net/tipc/server.c: TIPC server infrastructure + * + * Copyright (c) 2012-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "server.h" +#include "core.h" +#include <net/sock.h> + +/* Number of messages to send before rescheduling */ +#define MAX_SEND_MSG_COUNT 25 +#define MAX_RECV_MSG_COUNT 25 +#define CF_CONNECTED 1 + +#define sock2con(x) ((struct tipc_conn *)(x)->sk_user_data) + +/** + * struct tipc_conn - TIPC connection structure + * @kref: reference counter to connection object + * @conid: connection identifier + * @sock: socket handler associated with connection + * @flags: indicates connection state + * @server: pointer to connected server + * @rwork: receive work item + * @usr_data: user-specified field + * @rx_action: what to do when connection socket is active + * @outqueue: pointer to first outbound message in queue + * @outqueue_lock: controll access to the outqueue + * @outqueue: list of connection objects for its server + * @swork: send work item + */ +struct tipc_conn { + struct kref kref; + int conid; + struct socket *sock; + unsigned long flags; + struct tipc_server *server; + struct work_struct rwork; + int (*rx_action) (struct tipc_conn *con); + void *usr_data; + struct list_head outqueue; + spinlock_t outqueue_lock; + struct work_struct swork; +}; + +/* An entry waiting to be sent */ +struct outqueue_entry { + struct list_head list; + struct kvec iov; + struct sockaddr_tipc dest; +}; + +static void tipc_recv_work(struct work_struct *work); +static void tipc_send_work(struct work_struct *work); +static void tipc_clean_outqueues(struct tipc_conn *con); + +static void tipc_conn_kref_release(struct kref *kref) +{ + struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); + struct tipc_server *s = con->server; + + if (con->sock) { + tipc_sock_release_local(con->sock); + con->sock = NULL; + } + + tipc_clean_outqueues(con); + + if (con->conid) + s->tipc_conn_shutdown(con->conid, con->usr_data); + + kfree(con); +} + +static void conn_put(struct tipc_conn *con) +{ + kref_put(&con->kref, tipc_conn_kref_release); +} + +static void conn_get(struct tipc_conn *con) +{ + kref_get(&con->kref); +} + +static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid) +{ + struct tipc_conn *con; + + spin_lock_bh(&s->idr_lock); + con = idr_find(&s->conn_idr, conid); + if (con) + conn_get(con); + spin_unlock_bh(&s->idr_lock); + return con; +} + +static void sock_data_ready(struct sock *sk, int unused) +{ + struct tipc_conn *con; + + read_lock(&sk->sk_callback_lock); + con = sock2con(sk); + if (con && test_bit(CF_CONNECTED, &con->flags)) { + conn_get(con); + if (!queue_work(con->server->rcv_wq, &con->rwork)) + conn_put(con); + } + read_unlock(&sk->sk_callback_lock); +} + +static void sock_write_space(struct sock *sk) +{ + struct tipc_conn *con; + + read_lock(&sk->sk_callback_lock); + con = sock2con(sk); + if (con && test_bit(CF_CONNECTED, &con->flags)) { + conn_get(con); + if (!queue_work(con->server->send_wq, &con->swork)) + conn_put(con); + } + read_unlock(&sk->sk_callback_lock); +} + +static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con) +{ + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + sk->sk_data_ready = sock_data_ready; + sk->sk_write_space = sock_write_space; + sk->sk_user_data = con; + + con->sock = sock; + + write_unlock_bh(&sk->sk_callback_lock); +} + +static void tipc_unregister_callbacks(struct tipc_conn *con) +{ + struct sock *sk = con->sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + sk->sk_user_data = NULL; + write_unlock_bh(&sk->sk_callback_lock); +} + +static void tipc_close_conn(struct tipc_conn *con) +{ + struct tipc_server *s = con->server; + + if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { + spin_lock_bh(&s->idr_lock); + idr_remove(&s->conn_idr, con->conid); + s->idr_in_use--; + spin_unlock_bh(&s->idr_lock); + + tipc_unregister_callbacks(con); + + /* We shouldn't flush pending works as we may be in the + * thread. In fact the races with pending rx/tx work structs + * are harmless for us here as we have already deleted this + * connection from server connection list and set + * sk->sk_user_data to 0 before releasing connection object. + */ + kernel_sock_shutdown(con->sock, SHUT_RDWR); + + conn_put(con); + } +} + +static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s) +{ + struct tipc_conn *con; + int ret; + + con = kzalloc(sizeof(struct tipc_conn), GFP_ATOMIC); + if (!con) + return ERR_PTR(-ENOMEM); + + kref_init(&con->kref); + INIT_LIST_HEAD(&con->outqueue); + spin_lock_init(&con->outqueue_lock); + INIT_WORK(&con->swork, tipc_send_work); + INIT_WORK(&con->rwork, tipc_recv_work); + + spin_lock_bh(&s->idr_lock); + ret = idr_alloc(&s->conn_idr, con, 0, 0, GFP_ATOMIC); + if (ret < 0) { + kfree(con); + spin_unlock_bh(&s->idr_lock); + return ERR_PTR(-ENOMEM); + } + con->conid = ret; + s->idr_in_use++; + spin_unlock_bh(&s->idr_lock); + + set_bit(CF_CONNECTED, &con->flags); + con->server = s; + + return con; +} + +static int tipc_receive_from_sock(struct tipc_conn *con) +{ + struct msghdr msg = {}; + struct tipc_server *s = con->server; + struct sockaddr_tipc addr; + struct kvec iov; + void *buf; + int ret; + + buf = kmem_cache_alloc(s->rcvbuf_cache, GFP_ATOMIC); + if (!buf) { + ret = -ENOMEM; + goto out_close; + } + + iov.iov_base = buf; + iov.iov_len = s->max_rcvbuf_size; + msg.msg_name = &addr; + ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len, + MSG_DONTWAIT); + if (ret <= 0) { + kmem_cache_free(s->rcvbuf_cache, buf); + goto out_close; + } + + s->tipc_conn_recvmsg(con->conid, &addr, con->usr_data, buf, ret); + + kmem_cache_free(s->rcvbuf_cache, buf); + + return 0; + +out_close: + if (ret != -EWOULDBLOCK) + tipc_close_conn(con); + else if (ret == 0) + /* Don't return success if we really got EOF */ + ret = -EAGAIN; + + return ret; +} + +static int tipc_accept_from_sock(struct tipc_conn *con) +{ + struct tipc_server *s = con->server; + struct socket *sock = con->sock; + struct socket *newsock; + struct tipc_conn *newcon; + int ret; + + ret = tipc_sock_accept_local(sock, &newsock, O_NONBLOCK); + if (ret < 0) + return ret; + + newcon = tipc_alloc_conn(con->server); + if (IS_ERR(newcon)) { + ret = PTR_ERR(newcon); + sock_release(newsock); + return ret; + } + + newcon->rx_action = tipc_receive_from_sock; + tipc_register_callbacks(newsock, newcon); + + /* Notify that new connection is incoming */ + newcon->usr_data = s->tipc_conn_new(newcon->conid); + + /* Wake up receive process in case of 'SYN+' message */ + newsock->sk->sk_data_ready(newsock->sk, 0); + return ret; +} + +static struct socket *tipc_create_listen_sock(struct tipc_conn *con) +{ + struct tipc_server *s = con->server; + struct socket *sock = NULL; + int ret; + + ret = tipc_sock_create_local(s->type, &sock); + if (ret < 0) + return NULL; + ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, + (char *)&s->imp, sizeof(s->imp)); + if (ret < 0) + goto create_err; + ret = kernel_bind(sock, (struct sockaddr *)s->saddr, sizeof(*s->saddr)); + if (ret < 0) + goto create_err; + + switch (s->type) { + case SOCK_STREAM: + case SOCK_SEQPACKET: + con->rx_action = tipc_accept_from_sock; + + ret = kernel_listen(sock, 0); + if (ret < 0) + goto create_err; + break; + case SOCK_DGRAM: + case SOCK_RDM: + con->rx_action = tipc_receive_from_sock; + break; + default: + pr_err("Unknown socket type %d\n", s->type); + goto create_err; + } + return sock; + +create_err: + sock_release(sock); + con->sock = NULL; + return NULL; +} + +static int tipc_open_listening_sock(struct tipc_server *s) +{ + struct socket *sock; + struct tipc_conn *con; + + con = tipc_alloc_conn(s); + if (IS_ERR(con)) + return PTR_ERR(con); + + sock = tipc_create_listen_sock(con); + if (!sock) { + idr_remove(&s->conn_idr, con->conid); + s->idr_in_use--; + kfree(con); + return -EINVAL; + } + + tipc_register_callbacks(sock, con); + return 0; +} + +static struct outqueue_entry *tipc_alloc_entry(void *data, int len) +{ + struct outqueue_entry *entry; + void *buf; + + entry = kmalloc(sizeof(struct outqueue_entry), GFP_ATOMIC); + if (!entry) + return NULL; + + buf = kmalloc(len, GFP_ATOMIC); + if (!buf) { + kfree(entry); + return NULL; + } + + memcpy(buf, data, len); + entry->iov.iov_base = buf; + entry->iov.iov_len = len; + + return entry; +} + +static void tipc_free_entry(struct outqueue_entry *e) +{ + kfree(e->iov.iov_base); + kfree(e); +} + +static void tipc_clean_outqueues(struct tipc_conn *con) +{ + struct outqueue_entry *e, *safe; + + spin_lock_bh(&con->outqueue_lock); + list_for_each_entry_safe(e, safe, &con->outqueue, list) { + list_del(&e->list); + tipc_free_entry(e); + } + spin_unlock_bh(&con->outqueue_lock); +} + +int tipc_conn_sendmsg(struct tipc_server *s, int conid, + struct sockaddr_tipc *addr, void *data, size_t len) +{ + struct outqueue_entry *e; + struct tipc_conn *con; + + con = tipc_conn_lookup(s, conid); + if (!con) + return -EINVAL; + + e = tipc_alloc_entry(data, len); + if (!e) { + conn_put(con); + return -ENOMEM; + } + + if (addr) + memcpy(&e->dest, addr, sizeof(struct sockaddr_tipc)); + + spin_lock_bh(&con->outqueue_lock); + list_add_tail(&e->list, &con->outqueue); + spin_unlock_bh(&con->outqueue_lock); + + if (test_bit(CF_CONNECTED, &con->flags)) + if (!queue_work(s->send_wq, &con->swork)) + conn_put(con); + + return 0; +} + +void tipc_conn_terminate(struct tipc_server *s, int conid) +{ + struct tipc_conn *con; + + con = tipc_conn_lookup(s, conid); + if (con) { + tipc_close_conn(con); + conn_put(con); + } +} + +static void tipc_send_to_sock(struct tipc_conn *con) +{ + int count = 0; + struct tipc_server *s = con->server; + struct outqueue_entry *e; + struct msghdr msg; + int ret; + + spin_lock_bh(&con->outqueue_lock); + while (1) { + e = list_entry(con->outqueue.next, struct outqueue_entry, + list); + if ((struct list_head *) e == &con->outqueue) + break; + spin_unlock_bh(&con->outqueue_lock); + + memset(&msg, 0, sizeof(msg)); + msg.msg_flags = MSG_DONTWAIT; + + if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) { + msg.msg_name = &e->dest; + msg.msg_namelen = sizeof(struct sockaddr_tipc); + } + ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1, + e->iov.iov_len); + if (ret == -EWOULDBLOCK || ret == 0) { + cond_resched(); + goto out; + } else if (ret < 0) { + goto send_err; + } + + /* Don't starve users filling buffers */ + if (++count >= MAX_SEND_MSG_COUNT) { + cond_resched(); + count = 0; + } + + spin_lock_bh(&con->outqueue_lock); + list_del(&e->list); + tipc_free_entry(e); + } + spin_unlock_bh(&con->outqueue_lock); +out: + return; + +send_err: + tipc_close_conn(con); +} + +static void tipc_recv_work(struct work_struct *work) +{ + struct tipc_conn *con = container_of(work, struct tipc_conn, rwork); + int count = 0; + + while (test_bit(CF_CONNECTED, &con->flags)) { + if (con->rx_action(con)) + break; + + /* Don't flood Rx machine */ + if (++count >= MAX_RECV_MSG_COUNT) { + cond_resched(); + count = 0; + } + } + conn_put(con); +} + +static void tipc_send_work(struct work_struct *work) +{ + struct tipc_conn *con = container_of(work, struct tipc_conn, swork); + + if (test_bit(CF_CONNECTED, &con->flags)) + tipc_send_to_sock(con); + + conn_put(con); +} + +static void tipc_work_stop(struct tipc_server *s) +{ + destroy_workqueue(s->rcv_wq); + destroy_workqueue(s->send_wq); +} + +static int tipc_work_start(struct tipc_server *s) +{ + s->rcv_wq = alloc_workqueue("tipc_rcv", WQ_UNBOUND, 1); + if (!s->rcv_wq) { + pr_err("can't start tipc receive workqueue\n"); + return -ENOMEM; + } + + s->send_wq = alloc_workqueue("tipc_send", WQ_UNBOUND, 1); + if (!s->send_wq) { + pr_err("can't start tipc send workqueue\n"); + destroy_workqueue(s->rcv_wq); + return -ENOMEM; + } + + return 0; +} + +int tipc_server_start(struct tipc_server *s) +{ + int ret; + + spin_lock_init(&s->idr_lock); + idr_init(&s->conn_idr); + s->idr_in_use = 0; + + s->rcvbuf_cache = kmem_cache_create(s->name, s->max_rcvbuf_size, + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!s->rcvbuf_cache) + return -ENOMEM; + + ret = tipc_work_start(s); + if (ret < 0) { + kmem_cache_destroy(s->rcvbuf_cache); + return ret; + } + ret = tipc_open_listening_sock(s); + if (ret < 0) { + tipc_work_stop(s); + kmem_cache_destroy(s->rcvbuf_cache); + return ret; + } + s->enabled = 1; + return ret; +} + +void tipc_server_stop(struct tipc_server *s) +{ + struct tipc_conn *con; + int total = 0; + int id; + + if (!s->enabled) + return; + + s->enabled = 0; + spin_lock_bh(&s->idr_lock); + for (id = 0; total < s->idr_in_use; id++) { + con = idr_find(&s->conn_idr, id); + if (con) { + total++; + spin_unlock_bh(&s->idr_lock); + tipc_close_conn(con); + spin_lock_bh(&s->idr_lock); + } + } + spin_unlock_bh(&s->idr_lock); + + tipc_work_stop(s); + kmem_cache_destroy(s->rcvbuf_cache); + idr_destroy(&s->conn_idr); +} diff --git a/net/tipc/server.h b/net/tipc/server.h new file mode 100644 index 000000000000..98b23f20bc0f --- /dev/null +++ b/net/tipc/server.h @@ -0,0 +1,94 @@ +/* + * net/tipc/server.h: Include file for TIPC server code + * + * Copyright (c) 2012-2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_SERVER_H +#define _TIPC_SERVER_H + +#include "core.h" + +#define TIPC_SERVER_NAME_LEN 32 + +/** + * struct tipc_server - TIPC server structure + * @conn_idr: identifier set of connection + * @idr_lock: protect the connection identifier set + * @idr_in_use: amount of allocated identifier entry + * @rcvbuf_cache: memory cache of server receive buffer + * @rcv_wq: receive workqueue + * @send_wq: send workqueue + * @max_rcvbuf_size: maximum permitted receive message length + * @tipc_conn_new: callback will be called when new connection is incoming + * @tipc_conn_shutdown: callback will be called when connection is shut down + * @tipc_conn_recvmsg: callback will be called when message arrives + * @saddr: TIPC server address + * @name: server name + * @imp: message importance + * @type: socket type + * @enabled: identify whether server is launched or not + */ +struct tipc_server { + struct idr conn_idr; + spinlock_t idr_lock; + int idr_in_use; + struct kmem_cache *rcvbuf_cache; + struct workqueue_struct *rcv_wq; + struct workqueue_struct *send_wq; + int max_rcvbuf_size; + void *(*tipc_conn_new) (int conid); + void (*tipc_conn_shutdown) (int conid, void *usr_data); + void (*tipc_conn_recvmsg) (int conid, struct sockaddr_tipc *addr, + void *usr_data, void *buf, size_t len); + struct sockaddr_tipc *saddr; + const char name[TIPC_SERVER_NAME_LEN]; + int imp; + int type; + int enabled; +}; + +int tipc_conn_sendmsg(struct tipc_server *s, int conid, + struct sockaddr_tipc *addr, void *data, size_t len); + +/** + * tipc_conn_terminate - terminate connection with server + * + * Note: Must call it in process context since it might sleep + */ +void tipc_conn_terminate(struct tipc_server *s, int conid); + +int tipc_server_start(struct tipc_server *s); + +void tipc_server_stop(struct tipc_server *s); + +#endif diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 515ce38e4f4c..ce8249c76827 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2,7 +2,7 @@ * net/tipc/socket.c: TIPC socket API * * Copyright (c) 2001-2007, 2012 Ericsson AB - * Copyright (c) 2004-2008, 2010-2012, Wind River Systems + * Copyright (c) 2004-2008, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -43,8 +43,6 @@ #define SS_LISTENING -1 /* socket is listening */ #define SS_READY -2 /* socket is connectionless */ -#define CONN_OVERLOAD_LIMIT ((TIPC_FLOW_CONTROL_WIN * 2 + 1) * \ - SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE)) #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ struct tipc_sock { @@ -65,12 +63,15 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf); static void wakeupdispatch(struct tipc_port *tport); static void tipc_data_ready(struct sock *sk, int len); static void tipc_write_space(struct sock *sk); +static int release(struct socket *sock); +static int accept(struct socket *sock, struct socket *new_sock, int flags); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; static const struct proto_ops msg_ops; static struct proto tipc_proto; +static struct proto tipc_proto_kern; static int sockets_enabled; @@ -143,7 +144,7 @@ static void reject_rx_queue(struct sock *sk) } /** - * tipc_create - create a TIPC socket + * tipc_sk_create - create a TIPC socket * @net: network namespace (must be default network) * @sock: pre-allocated socket structure * @protocol: protocol indicator (must be 0) @@ -154,8 +155,8 @@ static void reject_rx_queue(struct sock *sk) * * Returns 0 on success, errno otherwise */ -static int tipc_create(struct net *net, struct socket *sock, int protocol, - int kern) +static int tipc_sk_create(struct net *net, struct socket *sock, int protocol, + int kern) { const struct proto_ops *ops; socket_state state; @@ -185,13 +186,17 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, } /* Allocate socket's protocol area */ - sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); + if (!kern) + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto); + else + sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto_kern); + if (sk == NULL) return -ENOMEM; /* Allocate TIPC port for socket to use */ - tp_ptr = tipc_createport_raw(sk, &dispatch, &wakeupdispatch, - TIPC_LOW_IMPORTANCE); + tp_ptr = tipc_createport(sk, &dispatch, &wakeupdispatch, + TIPC_LOW_IMPORTANCE); if (unlikely(!tp_ptr)) { sk_free(sk); return -ENOMEM; @@ -203,6 +208,7 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, sock_init_data(sock, sk); sk->sk_backlog_rcv = backlog_rcv; + sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; tipc_sk(sk)->p = tp_ptr; @@ -220,6 +226,78 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, } /** + * tipc_sock_create_local - create TIPC socket from inside TIPC module + * @type: socket type - SOCK_RDM or SOCK_SEQPACKET + * + * We cannot use sock_creat_kern here because it bumps module user count. + * Since socket owner and creator is the same module we must make sure + * that module count remains zero for module local sockets, otherwise + * we cannot do rmmod. + * + * Returns 0 on success, errno otherwise + */ +int tipc_sock_create_local(int type, struct socket **res) +{ + int rc; + struct sock *sk; + + rc = sock_create_lite(AF_TIPC, type, 0, res); + if (rc < 0) { + pr_err("Failed to create kernel socket\n"); + return rc; + } + tipc_sk_create(&init_net, *res, 0, 1); + + sk = (*res)->sk; + + return 0; +} + +/** + * tipc_sock_release_local - release socket created by tipc_sock_create_local + * @sock: the socket to be released. + * + * Module reference count is not incremented when such sockets are created, + * so we must keep it from being decremented when they are released. + */ +void tipc_sock_release_local(struct socket *sock) +{ + release(sock); + sock->ops = NULL; + sock_release(sock); +} + +/** + * tipc_sock_accept_local - accept a connection on a socket created + * with tipc_sock_create_local. Use this function to avoid that + * module reference count is inadvertently incremented. + * + * @sock: the accepting socket + * @newsock: reference to the new socket to be created + * @flags: socket flags + */ + +int tipc_sock_accept_local(struct socket *sock, struct socket **newsock, + int flags) +{ + struct sock *sk = sock->sk; + int ret; + + ret = sock_create_lite(sk->sk_family, sk->sk_type, + sk->sk_protocol, newsock); + if (ret < 0) + return ret; + + ret = accept(sock, *newsock, flags); + if (ret < 0) { + sock_release(*newsock); + return ret; + } + (*newsock)->ops = sock->ops; + return ret; +} + +/** * release - destroy a TIPC socket * @sock: socket to destroy * @@ -324,7 +402,9 @@ static int bind(struct socket *sock, struct sockaddr *uaddr, int uaddr_len) else if (addr->addrtype != TIPC_ADDR_NAMESEQ) return -EAFNOSUPPORT; - if (addr->addr.nameseq.type < TIPC_RESERVED_TYPES) + if ((addr->addr.nameseq.type < TIPC_RESERVED_TYPES) && + (addr->addr.nameseq.type != TIPC_TOP_SRV) && + (addr->addr.nameseq.type != TIPC_CFG_SRV)) return -EACCES; return (addr->scope > 0) ? @@ -519,8 +599,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, res = -EISCONN; goto exit; } - if ((tport->published) || - ((sock->type == SOCK_STREAM) && (total_len != 0))) { + if (tport->published) { res = -EOPNOTSUPP; goto exit; } @@ -810,7 +889,7 @@ static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg) * Returns 0 if successful, otherwise errno */ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, - struct tipc_port *tport) + struct tipc_port *tport) { u32 anc_data[3]; u32 err; @@ -1011,8 +1090,7 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, lock_sock(sk); - if (unlikely((sock->state == SS_UNCONNECTED) || - (sock->state == SS_CONNECTING))) { + if (unlikely((sock->state == SS_UNCONNECTED))) { res = -ENOTCONN; goto exit; } @@ -1233,10 +1311,10 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) * For all connectionless messages, by default new queue limits are * as belows: * - * TIPC_LOW_IMPORTANCE (5MB) - * TIPC_MEDIUM_IMPORTANCE (10MB) - * TIPC_HIGH_IMPORTANCE (20MB) - * TIPC_CRITICAL_IMPORTANCE (40MB) + * TIPC_LOW_IMPORTANCE (4 MB) + * TIPC_MEDIUM_IMPORTANCE (8 MB) + * TIPC_HIGH_IMPORTANCE (16 MB) + * TIPC_CRITICAL_IMPORTANCE (32 MB) * * Returns overload limit according to corresponding message importance */ @@ -1246,9 +1324,10 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf) unsigned int limit; if (msg_connected(msg)) - limit = CONN_OVERLOAD_LIMIT; + limit = sysctl_tipc_rmem[2]; else - limit = sk->sk_rcvbuf << (msg_importance(msg) + 5); + limit = sk->sk_rcvbuf >> TIPC_CRITICAL_IMPORTANCE << + msg_importance(msg); return limit; } @@ -1327,7 +1406,7 @@ static int backlog_rcv(struct sock *sk, struct sk_buff *buf) */ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) { - struct sock *sk = (struct sock *)tport->usr_handle; + struct sock *sk = tport->sk; u32 res; /* @@ -1358,7 +1437,7 @@ static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf) */ static void wakeupdispatch(struct tipc_port *tport) { - struct sock *sk = (struct sock *)tport->usr_handle; + struct sock *sk = tport->sk; sk->sk_write_space(sk); } @@ -1531,7 +1610,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) buf = skb_peek(&sk->sk_receive_queue); - res = tipc_create(sock_net(sock->sk), new_sock, 0, 0); + res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); if (res) goto exit; @@ -1657,8 +1736,8 @@ restart: * * Returns 0 on success, errno otherwise */ -static int setsockopt(struct socket *sock, - int lvl, int opt, char __user *ov, unsigned int ol) +static int setsockopt(struct socket *sock, int lvl, int opt, char __user *ov, + unsigned int ol) { struct sock *sk = sock->sk; struct tipc_port *tport = tipc_sk_port(sk); @@ -1716,8 +1795,8 @@ static int setsockopt(struct socket *sock, * * Returns 0 on success, errno otherwise */ -static int getsockopt(struct socket *sock, - int lvl, int opt, char __user *ov, int __user *ol) +static int getsockopt(struct socket *sock, int lvl, int opt, char __user *ov, + int __user *ol) { struct sock *sk = sock->sk; struct tipc_port *tport = tipc_sk_port(sk); @@ -1841,13 +1920,20 @@ static const struct proto_ops stream_ops = { static const struct net_proto_family tipc_family_ops = { .owner = THIS_MODULE, .family = AF_TIPC, - .create = tipc_create + .create = tipc_sk_create }; static struct proto tipc_proto = { .name = "TIPC", .owner = THIS_MODULE, - .obj_size = sizeof(struct tipc_sock) + .obj_size = sizeof(struct tipc_sock), + .sysctl_rmem = sysctl_tipc_rmem +}; + +static struct proto tipc_proto_kern = { + .name = "TIPC", + .obj_size = sizeof(struct tipc_sock), + .sysctl_rmem = sysctl_tipc_rmem }; /** diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 6b42d47029af..d38bb45d82e9 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -2,7 +2,7 @@ * net/tipc/subscr.c: TIPC network topology service * * Copyright (c) 2000-2006, Ericsson AB - * Copyright (c) 2005-2007, 2010-2011, Wind River Systems + * Copyright (c) 2005-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -41,33 +41,42 @@ /** * struct tipc_subscriber - TIPC network topology subscriber - * @port_ref: object reference to server port connecting to subscriber - * @lock: pointer to spinlock controlling access to subscriber's server port - * @subscriber_list: adjacent subscribers in top. server's list of subscribers + * @conid: connection identifier to server connecting to subscriber + * @lock: controll access to subscriber * @subscription_list: list of subscription objects for this subscriber */ struct tipc_subscriber { - u32 port_ref; - spinlock_t *lock; - struct list_head subscriber_list; + int conid; + spinlock_t lock; struct list_head subscription_list; }; -/** - * struct top_srv - TIPC network topology subscription service - * @setup_port: reference to TIPC port that handles subscription requests - * @subscription_count: number of active subscriptions (not subscribers!) - * @subscriber_list: list of ports subscribing to service - * @lock: spinlock govering access to subscriber list - */ -struct top_srv { - u32 setup_port; - atomic_t subscription_count; - struct list_head subscriber_list; - spinlock_t lock; +static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, + void *usr_data, void *buf, size_t len); +static void *subscr_named_msg_event(int conid); +static void subscr_conn_shutdown_event(int conid, void *usr_data); + +static atomic_t subscription_count = ATOMIC_INIT(0); + +static struct sockaddr_tipc topsrv_addr __read_mostly = { + .family = AF_TIPC, + .addrtype = TIPC_ADDR_NAMESEQ, + .addr.nameseq.type = TIPC_TOP_SRV, + .addr.nameseq.lower = TIPC_TOP_SRV, + .addr.nameseq.upper = TIPC_TOP_SRV, + .scope = TIPC_NODE_SCOPE }; -static struct top_srv topsrv; +static struct tipc_server topsrv __read_mostly = { + .saddr = &topsrv_addr, + .imp = TIPC_CRITICAL_IMPORTANCE, + .type = SOCK_SEQPACKET, + .max_rcvbuf_size = sizeof(struct tipc_subscr), + .name = "topology_server", + .tipc_conn_recvmsg = subscr_conn_msg_event, + .tipc_conn_new = subscr_named_msg_event, + .tipc_conn_shutdown = subscr_conn_shutdown_event, +}; /** * htohl - convert value to endianness used by destination @@ -81,20 +90,13 @@ static u32 htohl(u32 in, int swap) return swap ? swab32(in) : in; } -/** - * subscr_send_event - send a message containing a tipc_event to the subscriber - * - * Note: Must not hold subscriber's server port lock, since tipc_send() will - * try to take the lock if the message is rejected and returned! - */ -static void subscr_send_event(struct tipc_subscription *sub, - u32 found_lower, - u32 found_upper, - u32 event, - u32 port_ref, +static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper, u32 event, u32 port_ref, u32 node) { - struct iovec msg_sect; + struct tipc_subscriber *subscriber = sub->subscriber; + struct kvec msg_sect; + int ret; msg_sect.iov_base = (void *)&sub->evt; msg_sect.iov_len = sizeof(struct tipc_event); @@ -104,7 +106,10 @@ static void subscr_send_event(struct tipc_subscription *sub, sub->evt.found_upper = htohl(found_upper, sub->swap); sub->evt.port.ref = htohl(port_ref, sub->swap); sub->evt.port.node = htohl(node, sub->swap); - tipc_send(sub->server_ref, 1, &msg_sect, msg_sect.iov_len); + ret = tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL, + msg_sect.iov_base, msg_sect.iov_len); + if (ret < 0) + pr_err("Sending subscription event failed, no memory\n"); } /** @@ -112,10 +117,8 @@ static void subscr_send_event(struct tipc_subscription *sub, * * Returns 1 if there is overlap, otherwise 0. */ -int tipc_subscr_overlap(struct tipc_subscription *sub, - u32 found_lower, +int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper) - { if (found_lower < sub->seq.lower) found_lower = sub->seq.lower; @@ -131,13 +134,9 @@ int tipc_subscr_overlap(struct tipc_subscription *sub, * * Protected by nameseq.lock in name_table.c */ -void tipc_subscr_report_overlap(struct tipc_subscription *sub, - u32 found_lower, - u32 found_upper, - u32 event, - u32 port_ref, - u32 node, - int must) +void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper, u32 event, u32 port_ref, + u32 node, int must) { if (!tipc_subscr_overlap(sub, found_lower, found_upper)) return; @@ -147,21 +146,24 @@ void tipc_subscr_report_overlap(struct tipc_subscription *sub, subscr_send_event(sub, found_lower, found_upper, event, port_ref, node); } -/** - * subscr_timeout - subscription timeout has occurred - */ static void subscr_timeout(struct tipc_subscription *sub) { - struct tipc_port *server_port; + struct tipc_subscriber *subscriber = sub->subscriber; + + /* The spin lock per subscriber is used to protect its members */ + spin_lock_bh(&subscriber->lock); - /* Validate server port reference (in case subscriber is terminating) */ - server_port = tipc_port_lock(sub->server_ref); - if (server_port == NULL) + /* Validate if the connection related to the subscriber is + * closed (in case subscriber is terminating) + */ + if (subscriber->conid == 0) { + spin_unlock_bh(&subscriber->lock); return; + } /* Validate timeout (in case subscription is being cancelled) */ if (sub->timeout == TIPC_WAIT_FOREVER) { - tipc_port_unlock(server_port); + spin_unlock_bh(&subscriber->lock); return; } @@ -171,8 +173,7 @@ static void subscr_timeout(struct tipc_subscription *sub) /* Unlink subscription from subscriber */ list_del(&sub->subscription_list); - /* Release subscriber's server port */ - tipc_port_unlock(server_port); + spin_unlock_bh(&subscriber->lock); /* Notify subscriber of timeout */ subscr_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, @@ -181,64 +182,54 @@ static void subscr_timeout(struct tipc_subscription *sub) /* Now destroy subscription */ k_term_timer(&sub->timer); kfree(sub); - atomic_dec(&topsrv.subscription_count); + atomic_dec(&subscription_count); } /** * subscr_del - delete a subscription within a subscription list * - * Called with subscriber port locked. + * Called with subscriber lock held. */ static void subscr_del(struct tipc_subscription *sub) { tipc_nametbl_unsubscribe(sub); list_del(&sub->subscription_list); kfree(sub); - atomic_dec(&topsrv.subscription_count); + atomic_dec(&subscription_count); } /** * subscr_terminate - terminate communication with a subscriber * - * Called with subscriber port locked. Routine must temporarily release lock - * to enable subscription timeout routine(s) to finish without deadlocking; - * the lock is then reclaimed to allow caller to release it upon return. - * (This should work even in the unlikely event some other thread creates - * a new object reference in the interim that uses this lock; this routine will - * simply wait for it to be released, then claim it.) + * Note: Must call it in process context since it might sleep. */ static void subscr_terminate(struct tipc_subscriber *subscriber) { - u32 port_ref; + tipc_conn_terminate(&topsrv, subscriber->conid); +} + +static void subscr_release(struct tipc_subscriber *subscriber) +{ struct tipc_subscription *sub; struct tipc_subscription *sub_temp; - /* Invalidate subscriber reference */ - port_ref = subscriber->port_ref; - subscriber->port_ref = 0; - spin_unlock_bh(subscriber->lock); + spin_lock_bh(&subscriber->lock); - /* Sever connection to subscriber */ - tipc_shutdown(port_ref); - tipc_deleteport(port_ref); + /* Invalidate subscriber reference */ + subscriber->conid = 0; /* Destroy any existing subscriptions for subscriber */ list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, subscription_list) { if (sub->timeout != TIPC_WAIT_FOREVER) { + spin_unlock_bh(&subscriber->lock); k_cancel_timer(&sub->timer); k_term_timer(&sub->timer); + spin_lock_bh(&subscriber->lock); } subscr_del(sub); } - - /* Remove subscriber from topology server's subscriber list */ - spin_lock_bh(&topsrv.lock); - list_del(&subscriber->subscriber_list); - spin_unlock_bh(&topsrv.lock); - - /* Reclaim subscriber lock */ - spin_lock_bh(subscriber->lock); + spin_unlock_bh(&subscriber->lock); /* Now destroy subscriber */ kfree(subscriber); @@ -247,7 +238,7 @@ static void subscr_terminate(struct tipc_subscriber *subscriber) /** * subscr_cancel - handle subscription cancellation request * - * Called with subscriber port locked. Routine must temporarily release lock + * Called with subscriber lock held. Routine must temporarily release lock * to enable the subscription timeout routine to finish without deadlocking; * the lock is then reclaimed to allow caller to release it upon return. * @@ -274,10 +265,10 @@ static void subscr_cancel(struct tipc_subscr *s, /* Cancel subscription timer (if used), then delete subscription */ if (sub->timeout != TIPC_WAIT_FOREVER) { sub->timeout = TIPC_WAIT_FOREVER; - spin_unlock_bh(subscriber->lock); + spin_unlock_bh(&subscriber->lock); k_cancel_timer(&sub->timer); k_term_timer(&sub->timer); - spin_lock_bh(subscriber->lock); + spin_lock_bh(&subscriber->lock); } subscr_del(sub); } @@ -285,7 +276,7 @@ static void subscr_cancel(struct tipc_subscr *s, /** * subscr_subscribe - create subscription for subscriber * - * Called with subscriber port locked. + * Called with subscriber lock held. */ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, struct tipc_subscriber *subscriber) @@ -304,7 +295,7 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, } /* Refuse subscription if global limit exceeded */ - if (atomic_read(&topsrv.subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { + if (atomic_read(&subscription_count) >= TIPC_MAX_SUBSCRIPTIONS) { pr_warn("Subscription rejected, limit reached (%u)\n", TIPC_MAX_SUBSCRIPTIONS); subscr_terminate(subscriber); @@ -335,10 +326,10 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, } INIT_LIST_HEAD(&sub->nameseq_list); list_add(&sub->subscription_list, &subscriber->subscription_list); - sub->server_ref = subscriber->port_ref; + sub->subscriber = subscriber; sub->swap = swap; memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); - atomic_inc(&topsrv.subscription_count); + atomic_inc(&subscription_count); if (sub->timeout != TIPC_WAIT_FOREVER) { k_init_timer(&sub->timer, (Handler)subscr_timeout, (unsigned long)sub); @@ -348,196 +339,51 @@ static struct tipc_subscription *subscr_subscribe(struct tipc_subscr *s, return sub; } -/** - * subscr_conn_shutdown_event - handle termination request from subscriber - * - * Called with subscriber's server port unlocked. - */ -static void subscr_conn_shutdown_event(void *usr_handle, - u32 port_ref, - struct sk_buff **buf, - unsigned char const *data, - unsigned int size, - int reason) +/* Handle one termination request for the subscriber */ +static void subscr_conn_shutdown_event(int conid, void *usr_data) { - struct tipc_subscriber *subscriber = usr_handle; - spinlock_t *subscriber_lock; - - if (tipc_port_lock(port_ref) == NULL) - return; - - subscriber_lock = subscriber->lock; - subscr_terminate(subscriber); - spin_unlock_bh(subscriber_lock); + subscr_release((struct tipc_subscriber *)usr_data); } -/** - * subscr_conn_msg_event - handle new subscription request from subscriber - * - * Called with subscriber's server port unlocked. - */ -static void subscr_conn_msg_event(void *usr_handle, - u32 port_ref, - struct sk_buff **buf, - const unchar *data, - u32 size) +/* Handle one request to create a new subscription for the subscriber */ +static void subscr_conn_msg_event(int conid, struct sockaddr_tipc *addr, + void *usr_data, void *buf, size_t len) { - struct tipc_subscriber *subscriber = usr_handle; - spinlock_t *subscriber_lock; + struct tipc_subscriber *subscriber = usr_data; struct tipc_subscription *sub; - /* - * Lock subscriber's server port (& make a local copy of lock pointer, - * in case subscriber is deleted while processing subscription request) - */ - if (tipc_port_lock(port_ref) == NULL) - return; - - subscriber_lock = subscriber->lock; - - if (size != sizeof(struct tipc_subscr)) { - subscr_terminate(subscriber); - spin_unlock_bh(subscriber_lock); - } else { - sub = subscr_subscribe((struct tipc_subscr *)data, subscriber); - spin_unlock_bh(subscriber_lock); - if (sub != NULL) { - - /* - * We must release the server port lock before adding a - * subscription to the name table since TIPC needs to be - * able to (re)acquire the port lock if an event message - * issued by the subscription process is rejected and - * returned. The subscription cannot be deleted while - * it is being added to the name table because: - * a) the single-threading of the native API port code - * ensures the subscription cannot be cancelled and - * the subscriber connection cannot be broken, and - * b) the name table lock ensures the subscription - * timeout code cannot delete the subscription, - * so the subscription object is still protected. - */ - tipc_nametbl_subscribe(sub); - } - } + spin_lock_bh(&subscriber->lock); + sub = subscr_subscribe((struct tipc_subscr *)buf, subscriber); + if (sub) + tipc_nametbl_subscribe(sub); + spin_unlock_bh(&subscriber->lock); } -/** - * subscr_named_msg_event - handle request to establish a new subscriber - */ -static void subscr_named_msg_event(void *usr_handle, - u32 port_ref, - struct sk_buff **buf, - const unchar *data, - u32 size, - u32 importance, - struct tipc_portid const *orig, - struct tipc_name_seq const *dest) + +/* Handle one request to establish a new subscriber */ +static void *subscr_named_msg_event(int conid) { struct tipc_subscriber *subscriber; - u32 server_port_ref; /* Create subscriber object */ subscriber = kzalloc(sizeof(struct tipc_subscriber), GFP_ATOMIC); if (subscriber == NULL) { pr_warn("Subscriber rejected, no memory\n"); - return; + return NULL; } INIT_LIST_HEAD(&subscriber->subscription_list); - INIT_LIST_HEAD(&subscriber->subscriber_list); - - /* Create server port & establish connection to subscriber */ - tipc_createport(subscriber, - importance, - NULL, - NULL, - subscr_conn_shutdown_event, - NULL, - NULL, - subscr_conn_msg_event, - NULL, - &subscriber->port_ref); - if (subscriber->port_ref == 0) { - pr_warn("Subscriber rejected, unable to create port\n"); - kfree(subscriber); - return; - } - tipc_connect(subscriber->port_ref, orig); - - /* Lock server port (& save lock address for future use) */ - subscriber->lock = tipc_port_lock(subscriber->port_ref)->lock; - - /* Add subscriber to topology server's subscriber list */ - spin_lock_bh(&topsrv.lock); - list_add(&subscriber->subscriber_list, &topsrv.subscriber_list); - spin_unlock_bh(&topsrv.lock); - - /* Unlock server port */ - server_port_ref = subscriber->port_ref; - spin_unlock_bh(subscriber->lock); - - /* Send an ACK- to complete connection handshaking */ - tipc_send(server_port_ref, 0, NULL, 0); + subscriber->conid = conid; + spin_lock_init(&subscriber->lock); - /* Handle optional subscription request */ - if (size != 0) { - subscr_conn_msg_event(subscriber, server_port_ref, - buf, data, size); - } + return (void *)subscriber; } int tipc_subscr_start(void) { - struct tipc_name_seq seq = {TIPC_TOP_SRV, TIPC_TOP_SRV, TIPC_TOP_SRV}; - int res; - - spin_lock_init(&topsrv.lock); - INIT_LIST_HEAD(&topsrv.subscriber_list); - - res = tipc_createport(NULL, - TIPC_CRITICAL_IMPORTANCE, - NULL, - NULL, - NULL, - NULL, - subscr_named_msg_event, - NULL, - NULL, - &topsrv.setup_port); - if (res) - goto failed; - - res = tipc_publish(topsrv.setup_port, TIPC_NODE_SCOPE, &seq); - if (res) { - tipc_deleteport(topsrv.setup_port); - topsrv.setup_port = 0; - goto failed; - } - - return 0; - -failed: - pr_err("Failed to create subscription service\n"); - return res; + return tipc_server_start(&topsrv); } void tipc_subscr_stop(void) { - struct tipc_subscriber *subscriber; - struct tipc_subscriber *subscriber_temp; - spinlock_t *subscriber_lock; - - if (topsrv.setup_port) { - tipc_deleteport(topsrv.setup_port); - topsrv.setup_port = 0; - - list_for_each_entry_safe(subscriber, subscriber_temp, - &topsrv.subscriber_list, - subscriber_list) { - subscriber_lock = subscriber->lock; - spin_lock_bh(subscriber_lock); - subscr_terminate(subscriber); - spin_unlock_bh(subscriber_lock); - } - } + tipc_server_stop(&topsrv); } diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 218d2e07f0cc..393e417bee3f 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -2,7 +2,7 @@ * net/tipc/subscr.h: Include file for TIPC network topology service * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005-2007, Wind River Systems + * Copyright (c) 2005-2007, 2012-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -37,10 +37,14 @@ #ifndef _TIPC_SUBSCR_H #define _TIPC_SUBSCR_H +#include "server.h" + struct tipc_subscription; +struct tipc_subscriber; /** * struct tipc_subscription - TIPC network topology subscription object + * @subscriber: pointer to its subscriber * @seq: name sequence associated with subscription * @timeout: duration of subscription (in ms) * @filter: event filtering to be done for subscription @@ -52,28 +56,23 @@ struct tipc_subscription; * @evt: template for events generated by subscription */ struct tipc_subscription { + struct tipc_subscriber *subscriber; struct tipc_name_seq seq; u32 timeout; u32 filter; struct timer_list timer; struct list_head nameseq_list; struct list_head subscription_list; - u32 server_ref; int swap; struct tipc_event evt; }; -int tipc_subscr_overlap(struct tipc_subscription *sub, - u32 found_lower, +int tipc_subscr_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper); -void tipc_subscr_report_overlap(struct tipc_subscription *sub, - u32 found_lower, - u32 found_upper, - u32 event, - u32 port_ref, - u32 node, - int must_report); +void tipc_subscr_report_overlap(struct tipc_subscription *sub, u32 found_lower, + u32 found_upper, u32 event, u32 port_ref, + u32 node, int must); int tipc_subscr_start(void); diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c new file mode 100644 index 000000000000..f3fef93325a8 --- /dev/null +++ b/net/tipc/sysctl.c @@ -0,0 +1,64 @@ +/* + * net/tipc/sysctl.c: sysctl interface to TIPC subsystem + * + * Copyright (c) 2013, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" + +#include <linux/sysctl.h> + +static struct ctl_table_header *tipc_ctl_hdr; + +static struct ctl_table tipc_table[] = { + { + .procname = "tipc_rmem", + .data = &sysctl_tipc_rmem, + .maxlen = sizeof(sysctl_tipc_rmem), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + {} +}; + +int tipc_register_sysctl(void) +{ + tipc_ctl_hdr = register_net_sysctl(&init_net, "net/tipc", tipc_table); + if (tipc_ctl_hdr == NULL) + return -ENOMEM; + return 0; +} + +void tipc_unregister_sysctl(void) +{ + unregister_net_sysctl_table(tipc_ctl_hdr); +} diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c index 8800604c93f4..b3d515021b74 100644 --- a/net/unix/sysctl_net_unix.c +++ b/net/unix/sysctl_net_unix.c @@ -15,7 +15,7 @@ #include <net/af_unix.h> -static ctl_table unix_table[] = { +static struct ctl_table unix_table[] = { { .procname = "max_dgram_qlen", .data = &init_net.unx.sysctl_max_dgram_qlen, diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 3f77f42a3b58..593071dabd1c 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -144,18 +144,18 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid); * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function - * mods with VSOCK_HASH_SIZE - 1 to ensure this. + * mods with VSOCK_HASH_SIZE to ensure this. */ #define VSOCK_HASH_SIZE 251 #define MAX_PORT_RETRIES 24 -#define VSOCK_HASH(addr) ((addr)->svm_port % (VSOCK_HASH_SIZE - 1)) +#define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE) #define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)]) #define vsock_unbound_sockets (&vsock_bind_table[VSOCK_HASH_SIZE]) /* XXX This can probably be implemented in a better way. */ #define VSOCK_CONN_HASH(src, dst) \ - (((src)->svm_cid ^ (dst)->svm_port) % (VSOCK_HASH_SIZE - 1)) + (((src)->svm_cid ^ (dst)->svm_port) % VSOCK_HASH_SIZE) #define vsock_connected_sockets(src, dst) \ (&vsock_connected_table[VSOCK_CONN_HASH(src, dst)]) #define vsock_connected_sockets_vsk(vsk) \ @@ -165,6 +165,18 @@ static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; static struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; static DEFINE_SPINLOCK(vsock_table_lock); +/* Autobind this socket to the local address if necessary. */ +static int vsock_auto_bind(struct vsock_sock *vsk) +{ + struct sock *sk = sk_vsock(vsk); + struct sockaddr_vm local_addr; + + if (vsock_addr_bound(&vsk->local_addr)) + return 0; + vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); + return __vsock_bind(sk, &local_addr); +} + static void vsock_init_tables(void) { int i; @@ -956,15 +968,10 @@ static int vsock_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, lock_sock(sk); - if (!vsock_addr_bound(&vsk->local_addr)) { - struct sockaddr_vm local_addr; - - vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); - err = __vsock_bind(sk, &local_addr); - if (err != 0) - goto out; + err = vsock_auto_bind(vsk); + if (err) + goto out; - } /* If the provided message contains an address, use that. Otherwise * fall back on the socket's remote handle (if it has been connected). @@ -1038,15 +1045,9 @@ static int vsock_dgram_connect(struct socket *sock, lock_sock(sk); - if (!vsock_addr_bound(&vsk->local_addr)) { - struct sockaddr_vm local_addr; - - vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); - err = __vsock_bind(sk, &local_addr); - if (err != 0) - goto out; - - } + err = vsock_auto_bind(vsk); + if (err) + goto out; if (!transport->dgram_allow(remote_addr->svm_cid, remote_addr->svm_port)) { @@ -1163,17 +1164,9 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, memcpy(&vsk->remote_addr, remote_addr, sizeof(vsk->remote_addr)); - /* Autobind this socket to the local address if necessary. */ - if (!vsock_addr_bound(&vsk->local_addr)) { - struct sockaddr_vm local_addr; - - vsock_addr_init(&local_addr, VMADDR_CID_ANY, - VMADDR_PORT_ANY); - err = __vsock_bind(sk, &local_addr); - if (err != 0) - goto out; - - } + err = vsock_auto_bind(vsk); + if (err) + goto out; sk->sk_state = SS_CONNECTING; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index daff75200e25..ffc11df02af2 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -625,13 +625,14 @@ static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg) /* Attach the packet to the socket's receive queue as an sk_buff. */ skb = alloc_skb(size, GFP_ATOMIC); - if (skb) { - /* sk_receive_skb() will do a sock_put(), so hold here. */ - sock_hold(sk); - skb_put(skb, size); - memcpy(skb->data, dg, size); - sk_receive_skb(sk, skb, 0); - } + if (!skb) + return VMCI_ERROR_NO_MEM; + + /* sk_receive_skb() will do a sock_put(), so hold here. */ + sock_hold(sk); + skb_put(skb, size); + memcpy(skb->data, dg, size); + sk_receive_skb(sk, skb, 0); return VMCI_SUCCESS; } @@ -939,10 +940,9 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work) * reset to prevent that. */ vmci_transport_send_reset(sk, pkt); - goto out; + break; } -out: release_sock(sk); kfree(recv_pkt_info); /* Release reference obtained in the stream callback when we fetched diff --git a/net/wireless/chan.c b/net/wireless/chan.c index fd556ac05fdb..50f6195c8b70 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -54,6 +54,8 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) control_freq = chandef->chan->center_freq; switch (chandef->width) { + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_20_NOHT: if (chandef->center_freq1 != control_freq) @@ -152,6 +154,12 @@ static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c) int width; switch (c->width) { + case NL80211_CHAN_WIDTH_5: + width = 5; + break; + case NL80211_CHAN_WIDTH_10: + width = 10; + break; case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_20_NOHT: width = 20; @@ -194,6 +202,16 @@ cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, if (c1->width == c2->width) return NULL; + /* + * can't be compatible if one of them is 5 or 10 MHz, + * but they don't have the same width. + */ + if (c1->width == NL80211_CHAN_WIDTH_5 || + c1->width == NL80211_CHAN_WIDTH_10 || + c2->width == NL80211_CHAN_WIDTH_5 || + c2->width == NL80211_CHAN_WIDTH_10) + return NULL; + if (c1->width == NL80211_CHAN_WIDTH_20_NOHT || c1->width == NL80211_CHAN_WIDTH_20) return c2; @@ -264,11 +282,17 @@ static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy, u32 bandwidth) { struct ieee80211_channel *c; - u32 freq; + u32 freq, start_freq, end_freq; + + if (bandwidth <= 20) { + start_freq = center_freq; + end_freq = center_freq; + } else { + start_freq = center_freq - bandwidth/2 + 10; + end_freq = center_freq + bandwidth/2 - 10; + } - for (freq = center_freq - bandwidth/2 + 10; - freq <= center_freq + bandwidth/2 - 10; - freq += 20) { + for (freq = start_freq; freq <= end_freq; freq += 20) { c = ieee80211_get_channel(wiphy, freq); if (!c) return -EINVAL; @@ -310,11 +334,17 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, u32 prohibited_flags) { struct ieee80211_channel *c; - u32 freq; + u32 freq, start_freq, end_freq; + + if (bandwidth <= 20) { + start_freq = center_freq; + end_freq = center_freq; + } else { + start_freq = center_freq - bandwidth/2 + 10; + end_freq = center_freq + bandwidth/2 - 10; + } - for (freq = center_freq - bandwidth/2 + 10; - freq <= center_freq + bandwidth/2 - 10; - freq += 20) { + for (freq = start_freq; freq <= end_freq; freq += 20) { c = ieee80211_get_channel(wiphy, freq); if (!c) return false; @@ -349,6 +379,12 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, control_freq = chandef->chan->center_freq; switch (chandef->width) { + case NL80211_CHAN_WIDTH_5: + width = 5; + break; + case NL80211_CHAN_WIDTH_10: + width = 10; + break; case NL80211_CHAN_WIDTH_20: if (!ht_cap->ht_supported) return false; @@ -405,6 +441,11 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, if (width > 20) prohibited_flags |= IEEE80211_CHAN_NO_OFDM; + /* 5 and 10 MHz are only defined for the OFDM PHY */ + if (width < 20) + prohibited_flags |= IEEE80211_CHAN_NO_OFDM; + + if (!cfg80211_secondary_chans_ok(wiphy, chandef->center_freq1, width, prohibited_flags)) return false; diff --git a/net/wireless/core.c b/net/wireless/core.c index 73405e00c800..4f9f216665e9 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -34,13 +34,12 @@ MODULE_AUTHOR("Johannes Berg"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("wireless configuration support"); +MODULE_ALIAS_GENL_FAMILY(NL80211_GENL_NAME); -/* RCU-protected (and cfg80211_mutex for writers) */ +/* RCU-protected (and RTNL for writers) */ LIST_HEAD(cfg80211_rdev_list); int cfg80211_rdev_list_generation; -DEFINE_MUTEX(cfg80211_mutex); - /* for debugfs */ static struct dentry *ieee80211_debugfs_dir; @@ -52,12 +51,11 @@ module_param(cfg80211_disable_40mhz_24ghz, bool, 0644); MODULE_PARM_DESC(cfg80211_disable_40mhz_24ghz, "Disable 40MHz support in the 2.4GHz band"); -/* requires cfg80211_mutex to be held! */ struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx) { struct cfg80211_registered_device *result = NULL, *rdev; - assert_cfg80211_lock(); + ASSERT_RTNL(); list_for_each_entry(rdev, &cfg80211_rdev_list, list) { if (rdev->wiphy_idx == wiphy_idx) { @@ -76,12 +74,11 @@ int get_wiphy_idx(struct wiphy *wiphy) return rdev->wiphy_idx; } -/* requires cfg80211_rdev_mutex to be held! */ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx) { struct cfg80211_registered_device *rdev; - assert_cfg80211_lock(); + ASSERT_RTNL(); rdev = cfg80211_rdev_by_wiphy_idx(wiphy_idx); if (!rdev) @@ -89,35 +86,13 @@ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx) return &rdev->wiphy; } -struct cfg80211_registered_device * -cfg80211_get_dev_from_ifindex(struct net *net, int ifindex) -{ - struct cfg80211_registered_device *rdev = ERR_PTR(-ENODEV); - struct net_device *dev; - - mutex_lock(&cfg80211_mutex); - dev = dev_get_by_index(net, ifindex); - if (!dev) - goto out; - if (dev->ieee80211_ptr) { - rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy); - mutex_lock(&rdev->mtx); - } else - rdev = ERR_PTR(-ENODEV); - dev_put(dev); - out: - mutex_unlock(&cfg80211_mutex); - return rdev; -} - -/* requires cfg80211_mutex to be held */ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, char *newname) { struct cfg80211_registered_device *rdev2; int wiphy_idx, taken = -1, result, digits; - assert_cfg80211_lock(); + ASSERT_RTNL(); /* prohibit calling the thing phy%d when %d is not its number */ sscanf(newname, PHY_NAME "%d%n", &wiphy_idx, &taken); @@ -215,8 +190,7 @@ static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { - lockdep_assert_held(&rdev->devlist_mtx); - lockdep_assert_held(&rdev->sched_scan_mtx); + ASSERT_RTNL(); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_P2P_DEVICE)) return; @@ -230,18 +204,15 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, rdev->opencount--; if (rdev->scan_req && rdev->scan_req->wdev == wdev) { - bool busy = work_busy(&rdev->scan_done_wk); - /* - * If the work isn't pending or running (in which case it would - * be waiting for the lock we hold) the driver didn't properly - * cancel the scan when the interface was removed. In this case - * warn and leak the scan request object to not crash later. + * If the scan request wasn't notified as done, set it + * to aborted and leak it after a warning. The driver + * should have notified us that it ended at the latest + * during rdev_stop_p2p_device(). */ - WARN_ON(!busy); - - rdev->scan_req->aborted = true; - ___cfg80211_scan_done(rdev, !busy); + if (WARN_ON(!rdev->scan_req->notified)) + rdev->scan_req->aborted = true; + ___cfg80211_scan_done(rdev, !rdev->scan_req->notified); } } @@ -255,8 +226,6 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) rtnl_lock(); - /* read-only iteration need not hold the devlist_mtx */ - list_for_each_entry(wdev, &rdev->wdev_list, list) { if (wdev->netdev) { dev_close(wdev->netdev); @@ -265,12 +234,7 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) /* otherwise, check iftype */ switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: - /* but this requires it */ - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); cfg80211_stop_p2p_device(rdev, wdev); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); break; default: break; @@ -298,10 +262,7 @@ static void cfg80211_event_work(struct work_struct *work) event_work); rtnl_lock(); - cfg80211_lock_rdev(rdev); - cfg80211_process_rdev_events(rdev); - cfg80211_unlock_rdev(rdev); rtnl_unlock(); } @@ -309,7 +270,7 @@ static void cfg80211_event_work(struct work_struct *work) struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) { - static int wiphy_counter; + static atomic_t wiphy_counter = ATOMIC_INIT(0); struct cfg80211_registered_device *rdev; int alloc_size; @@ -331,26 +292,21 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->ops = ops; - mutex_lock(&cfg80211_mutex); - - rdev->wiphy_idx = wiphy_counter++; + rdev->wiphy_idx = atomic_inc_return(&wiphy_counter); if (unlikely(rdev->wiphy_idx < 0)) { - wiphy_counter--; - mutex_unlock(&cfg80211_mutex); /* ugh, wrapped! */ + atomic_dec(&wiphy_counter); kfree(rdev); return NULL; } - mutex_unlock(&cfg80211_mutex); + /* atomic_inc_return makes it start at 1, make it start at 0 */ + rdev->wiphy_idx--; /* give it a proper name */ dev_set_name(&rdev->wiphy.dev, PHY_NAME "%d", rdev->wiphy_idx); - mutex_init(&rdev->mtx); - mutex_init(&rdev->devlist_mtx); - mutex_init(&rdev->sched_scan_mtx); INIT_LIST_HEAD(&rdev->wdev_list); INIT_LIST_HEAD(&rdev->beacon_registrations); spin_lock_init(&rdev->beacon_registrations_lock); @@ -496,8 +452,13 @@ int wiphy_register(struct wiphy *wiphy) u16 ifmodes = wiphy->interface_modes; #ifdef CONFIG_PM - if (WARN_ON((wiphy->wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && - !(wiphy->wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY))) + if (WARN_ON(wiphy->wowlan && + (wiphy->wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && + !(wiphy->wowlan->flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY))) + return -EINVAL; + if (WARN_ON(wiphy->wowlan && + !wiphy->wowlan->flags && !wiphy->wowlan->n_patterns && + !wiphy->wowlan->tcp)) return -EINVAL; #endif @@ -587,25 +548,28 @@ int wiphy_register(struct wiphy *wiphy) } #ifdef CONFIG_PM - if (rdev->wiphy.wowlan.n_patterns) { - if (WARN_ON(!rdev->wiphy.wowlan.pattern_min_len || - rdev->wiphy.wowlan.pattern_min_len > - rdev->wiphy.wowlan.pattern_max_len)) - return -EINVAL; - } + if (WARN_ON(rdev->wiphy.wowlan && rdev->wiphy.wowlan->n_patterns && + (!rdev->wiphy.wowlan->pattern_min_len || + rdev->wiphy.wowlan->pattern_min_len > + rdev->wiphy.wowlan->pattern_max_len))) + return -EINVAL; #endif /* check and set up bitrates */ ieee80211_set_bitrate_flags(wiphy); - mutex_lock(&cfg80211_mutex); res = device_add(&rdev->wiphy.dev); + if (res) + return res; + + res = rfkill_register(rdev->rfkill); if (res) { - mutex_unlock(&cfg80211_mutex); + device_del(&rdev->wiphy.dev); return res; } + rtnl_lock(); /* set up regulatory info */ wiphy_regulatory_register(wiphy); @@ -631,25 +595,7 @@ int wiphy_register(struct wiphy *wiphy) } cfg80211_debugfs_rdev_add(rdev); - mutex_unlock(&cfg80211_mutex); - - /* - * due to a locking dependency this has to be outside of the - * cfg80211_mutex lock - */ - res = rfkill_register(rdev->rfkill); - if (res) { - device_del(&rdev->wiphy.dev); - - mutex_lock(&cfg80211_mutex); - debugfs_remove_recursive(rdev->wiphy.debugfsdir); - list_del_rcu(&rdev->list); - wiphy_regulatory_deregister(wiphy); - mutex_unlock(&cfg80211_mutex); - return res; - } - rtnl_lock(); rdev->wiphy.registered = true; rtnl_unlock(); return 0; @@ -679,25 +625,19 @@ void wiphy_unregister(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - rtnl_lock(); - rdev->wiphy.registered = false; - rtnl_unlock(); - - rfkill_unregister(rdev->rfkill); - - /* protect the device list */ - mutex_lock(&cfg80211_mutex); - wait_event(rdev->dev_wait, ({ int __count; - mutex_lock(&rdev->devlist_mtx); + rtnl_lock(); __count = rdev->opencount; - mutex_unlock(&rdev->devlist_mtx); + rtnl_unlock(); __count == 0; })); - mutex_lock(&rdev->devlist_mtx); + rfkill_unregister(rdev->rfkill); + + rtnl_lock(); + rdev->wiphy.registered = false; + BUG_ON(!list_empty(&rdev->wdev_list)); - mutex_unlock(&rdev->devlist_mtx); /* * First remove the hardware from everywhere, this makes @@ -708,20 +648,6 @@ void wiphy_unregister(struct wiphy *wiphy) synchronize_rcu(); /* - * Try to grab rdev->mtx. If a command is still in progress, - * hopefully the driver will refuse it since it's tearing - * down the device already. We wait for this command to complete - * before unlinking the item from the list. - * Note: as codified by the BUG_ON above we cannot get here if - * a virtual interface is still present. Hence, we can only get - * to lock contention here if userspace issues a command that - * identified the hardware by wiphy index. - */ - cfg80211_lock_rdev(rdev); - /* nothing */ - cfg80211_unlock_rdev(rdev); - - /* * If this device got a regulatory hint tell core its * free to listen now to a new shiny device regulatory hint */ @@ -730,15 +656,17 @@ void wiphy_unregister(struct wiphy *wiphy) cfg80211_rdev_list_generation++; device_del(&rdev->wiphy.dev); - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); flush_work(&rdev->scan_done_wk); cancel_work_sync(&rdev->conn_work); flush_work(&rdev->event_work); cancel_delayed_work_sync(&rdev->dfs_update_channels_wk); - if (rdev->wowlan && rdev->ops->set_wakeup) +#ifdef CONFIG_PM + if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup) rdev_set_wakeup(rdev, false); +#endif cfg80211_rdev_free_wowlan(rdev); } EXPORT_SYMBOL(wiphy_unregister); @@ -748,9 +676,6 @@ void cfg80211_dev_free(struct cfg80211_registered_device *rdev) struct cfg80211_internal_bss *scan, *tmp; struct cfg80211_beacon_registration *reg, *treg; rfkill_destroy(rdev->rfkill); - mutex_destroy(&rdev->mtx); - mutex_destroy(&rdev->devlist_mtx); - mutex_destroy(&rdev->sched_scan_mtx); list_for_each_entry_safe(reg, treg, &rdev->beacon_registrations, list) { list_del(®->list); kfree(reg); @@ -775,36 +700,6 @@ void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked) } EXPORT_SYMBOL(wiphy_rfkill_set_hw_state); -static void wdev_cleanup_work(struct work_struct *work) -{ - struct wireless_dev *wdev; - struct cfg80211_registered_device *rdev; - - wdev = container_of(work, struct wireless_dev, cleanup_work); - rdev = wiphy_to_dev(wdev->wiphy); - - mutex_lock(&rdev->sched_scan_mtx); - - if (WARN_ON(rdev->scan_req && rdev->scan_req->wdev == wdev)) { - rdev->scan_req->aborted = true; - ___cfg80211_scan_done(rdev, true); - } - - if (WARN_ON(rdev->sched_scan_req && - rdev->sched_scan_req->dev == wdev->netdev)) { - __cfg80211_stop_sched_scan(rdev, false); - } - - mutex_unlock(&rdev->sched_scan_mtx); - - mutex_lock(&rdev->devlist_mtx); - rdev->opencount--; - mutex_unlock(&rdev->devlist_mtx); - wake_up(&rdev->dev_wait); - - dev_put(wdev->netdev); -} - void cfg80211_unregister_wdev(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); @@ -814,8 +709,6 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) if (WARN_ON(wdev->netdev)) return; - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); list_del_rcu(&wdev->list); rdev->devlist_generation++; @@ -827,8 +720,6 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) WARN_ON_ONCE(1); break; } - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); } EXPORT_SYMBOL(cfg80211_unregister_wdev); @@ -847,7 +738,7 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, } void cfg80211_leave(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev) + struct wireless_dev *wdev) { struct net_device *dev = wdev->netdev; @@ -857,9 +748,7 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: - mutex_lock(&rdev->sched_scan_mtx); __cfg80211_stop_sched_scan(rdev, false); - mutex_unlock(&rdev->sched_scan_mtx); wdev_lock(wdev); #ifdef CONFIG_CFG80211_WEXT @@ -868,8 +757,8 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, wdev->wext.ie_len = 0; wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; #endif - __cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, true); + cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, true); wdev_unlock(wdev); break; case NL80211_IFTYPE_MESH_POINT: @@ -886,10 +775,9 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, } static int cfg80211_netdev_notifier_call(struct notifier_block *nb, - unsigned long state, - void *ndev) + unsigned long state, void *ptr) { - struct net_device *dev = ndev; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev; int ret; @@ -912,13 +800,11 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, * are added with nl80211. */ mutex_init(&wdev->mtx); - INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work); INIT_LIST_HEAD(&wdev->event_list); spin_lock_init(&wdev->event_lock); INIT_LIST_HEAD(&wdev->mgmt_registrations); spin_lock_init(&wdev->mgmt_registrations_lock); - mutex_lock(&rdev->devlist_mtx); wdev->identifier = ++rdev->wdev_id; list_add_rcu(&wdev->list, &rdev->wdev_list); rdev->devlist_generation++; @@ -930,8 +816,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, pr_err("failed to add phy80211 symlink to netdev!\n"); } wdev->netdev = dev; - wdev->sme_state = CFG80211_SME_IDLE; - mutex_unlock(&rdev->devlist_mtx); #ifdef CONFIG_CFG80211_WEXT wdev->wext.default_key = -1; wdev->wext.default_mgmt_key = -1; @@ -957,26 +841,22 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; case NETDEV_DOWN: cfg80211_update_iface_num(rdev, wdev->iftype, -1); - dev_hold(dev); - queue_work(cfg80211_wq, &wdev->cleanup_work); + if (rdev->scan_req && rdev->scan_req->wdev == wdev) { + if (WARN_ON(!rdev->scan_req->notified)) + rdev->scan_req->aborted = true; + ___cfg80211_scan_done(rdev, true); + } + + if (WARN_ON(rdev->sched_scan_req && + rdev->sched_scan_req->dev == wdev->netdev)) { + __cfg80211_stop_sched_scan(rdev, false); + } + + rdev->opencount--; + wake_up(&rdev->dev_wait); break; case NETDEV_UP: - /* - * If we have a really quick DOWN/UP succession we may - * have this work still pending ... cancel it and see - * if it was pending, in which case we need to account - * for some of the work it would have done. - */ - if (cancel_work_sync(&wdev->cleanup_work)) { - mutex_lock(&rdev->devlist_mtx); - rdev->opencount--; - mutex_unlock(&rdev->devlist_mtx); - dev_put(dev); - } cfg80211_update_iface_num(rdev, wdev->iftype, 1); - cfg80211_lock_rdev(rdev); - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); switch (wdev->iftype) { #ifdef CONFIG_CFG80211_WEXT @@ -1008,10 +888,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; } wdev_unlock(wdev); - mutex_unlock(&rdev->sched_scan_mtx); rdev->opencount++; - mutex_unlock(&rdev->devlist_mtx); - cfg80211_unlock_rdev(rdev); /* * Configure power management to the driver here so that its @@ -1028,12 +905,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; case NETDEV_UNREGISTER: /* - * NB: cannot take rdev->mtx here because this may be - * called within code protected by it when interfaces - * are removed with nl80211. - */ - mutex_lock(&rdev->devlist_mtx); - /* * It is possible to get NETDEV_UNREGISTER * multiple times. To detect that, check * that the interface is still on the list @@ -1049,7 +920,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, kfree(wdev->wext.keys); #endif } - mutex_unlock(&rdev->devlist_mtx); /* * synchronise (so that we won't find this netdev * from other code any more) and then clear the list @@ -1063,15 +933,19 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, * freed. */ cfg80211_process_wdev_events(wdev); + + if (WARN_ON(wdev->current_bss)) { + cfg80211_unhold_bss(wdev->current_bss); + cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); + wdev->current_bss = NULL; + } break; case NETDEV_PRE_UP: if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype))) return notifier_from_errno(-EOPNOTSUPP); if (rfkill_blocked(rdev->rfkill)) return notifier_from_errno(-ERFKILL); - mutex_lock(&rdev->devlist_mtx); ret = cfg80211_can_add_interface(rdev, wdev->iftype); - mutex_unlock(&rdev->devlist_mtx); if (ret) return notifier_from_errno(ret); break; @@ -1089,12 +963,10 @@ static void __net_exit cfg80211_pernet_exit(struct net *net) struct cfg80211_registered_device *rdev; rtnl_lock(); - mutex_lock(&cfg80211_mutex); list_for_each_entry(rdev, &cfg80211_rdev_list, list) { if (net_eq(wiphy_net(&rdev->wiphy), net)) WARN_ON(cfg80211_switch_netns(rdev, &init_net)); } - mutex_unlock(&cfg80211_mutex); rtnl_unlock(); } diff --git a/net/wireless/core.h b/net/wireless/core.h index fd35dae547c4..a6b45bf00f33 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -5,7 +5,6 @@ */ #ifndef __NET_WIRELESS_CORE_H #define __NET_WIRELESS_CORE_H -#include <linux/mutex.h> #include <linux/list.h> #include <linux/netdevice.h> #include <linux/rbtree.h> @@ -23,11 +22,6 @@ struct cfg80211_registered_device { const struct cfg80211_ops *ops; struct list_head list; - /* we hold this mutex during any call so that - * we cannot do multiple calls at once, and also - * to avoid the deregister call to proceed while - * any call is in progress */ - struct mutex mtx; /* rfkill support */ struct rfkill_ops rfkill_ops; @@ -49,9 +43,7 @@ struct cfg80211_registered_device { /* wiphy index, internal only */ int wiphy_idx; - /* associated wireless interfaces */ - struct mutex devlist_mtx; - /* protected by devlist_mtx or RCU */ + /* associated wireless interfaces, protected by rtnl or RCU */ struct list_head wdev_list; int devlist_generation, wdev_id; int opencount; /* also protected by devlist_mtx */ @@ -75,8 +67,6 @@ struct cfg80211_registered_device { struct work_struct scan_done_wk; struct work_struct sched_scan_results_wk; - struct mutex sched_scan_mtx; - #ifdef CONFIG_NL80211_TESTMODE struct genl_info *testmode_info; #endif @@ -84,8 +74,6 @@ struct cfg80211_registered_device { struct work_struct conn_work; struct work_struct event_work; - struct cfg80211_wowlan *wowlan; - struct delayed_work dfs_update_channels_wk; /* netlink port which started critical protocol (0 means not started) */ @@ -106,29 +94,26 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy) static inline void cfg80211_rdev_free_wowlan(struct cfg80211_registered_device *rdev) { +#ifdef CONFIG_PM int i; - if (!rdev->wowlan) + if (!rdev->wiphy.wowlan_config) return; - for (i = 0; i < rdev->wowlan->n_patterns; i++) - kfree(rdev->wowlan->patterns[i].mask); - kfree(rdev->wowlan->patterns); - if (rdev->wowlan->tcp && rdev->wowlan->tcp->sock) - sock_release(rdev->wowlan->tcp->sock); - kfree(rdev->wowlan->tcp); - kfree(rdev->wowlan); + for (i = 0; i < rdev->wiphy.wowlan_config->n_patterns; i++) + kfree(rdev->wiphy.wowlan_config->patterns[i].mask); + kfree(rdev->wiphy.wowlan_config->patterns); + if (rdev->wiphy.wowlan_config->tcp && + rdev->wiphy.wowlan_config->tcp->sock) + sock_release(rdev->wiphy.wowlan_config->tcp->sock); + kfree(rdev->wiphy.wowlan_config->tcp); + kfree(rdev->wiphy.wowlan_config); +#endif } extern struct workqueue_struct *cfg80211_wq; -extern struct mutex cfg80211_mutex; extern struct list_head cfg80211_rdev_list; extern int cfg80211_rdev_list_generation; -static inline void assert_cfg80211_lock(void) -{ - lockdep_assert_held(&cfg80211_mutex); -} - struct cfg80211_internal_bss { struct list_head list; struct list_head hidden_list; @@ -161,27 +146,11 @@ static inline void cfg80211_unhold_bss(struct cfg80211_internal_bss *bss) struct cfg80211_registered_device *cfg80211_rdev_by_wiphy_idx(int wiphy_idx); int get_wiphy_idx(struct wiphy *wiphy); -/* requires cfg80211_rdev_mutex to be held! */ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx); -/* identical to cfg80211_get_dev_from_info but only operate on ifindex */ -extern struct cfg80211_registered_device * -cfg80211_get_dev_from_ifindex(struct net *net, int ifindex); - int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, struct net *net); -static inline void cfg80211_lock_rdev(struct cfg80211_registered_device *rdev) -{ - mutex_lock(&rdev->mtx); -} - -static inline void cfg80211_unlock_rdev(struct cfg80211_registered_device *rdev) -{ - BUG_ON(IS_ERR(rdev) || !rdev); - mutex_unlock(&rdev->mtx); -} - static inline void wdev_lock(struct wireless_dev *wdev) __acquires(wdev) { @@ -196,7 +165,7 @@ static inline void wdev_unlock(struct wireless_dev *wdev) mutex_unlock(&wdev->mtx); } -#define ASSERT_RDEV_LOCK(rdev) lockdep_assert_held(&(rdev)->mtx) +#define ASSERT_RDEV_LOCK(rdev) ASSERT_RTNL() #define ASSERT_WDEV_LOCK(wdev) lockdep_assert_held(&(wdev)->mtx) static inline bool cfg80211_has_monitors_only(struct cfg80211_registered_device *rdev) @@ -314,38 +283,21 @@ int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, struct net_device *dev); /* MLME */ -int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_auth_type auth_type, - const u8 *bssid, - const u8 *ssid, int ssid_len, - const u8 *ie, int ie_len, - const u8 *key, int key_len, int key_idx, - const u8 *sae_data, int sae_data_len); int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, - struct net_device *dev, struct ieee80211_channel *chan, - enum nl80211_auth_type auth_type, const u8 *bssid, + struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_auth_type auth_type, + const u8 *bssid, const u8 *ssid, int ssid_len, const u8 *ie, int ie_len, const u8 *key, int key_len, int key_idx, const u8 *sae_data, int sae_data_len); -int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - const u8 *bssid, - const u8 *ssid, int ssid_len, - struct cfg80211_assoc_request *req); int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, struct net_device *dev, struct ieee80211_channel *chan, const u8 *bssid, const u8 *ssid, int ssid_len, struct cfg80211_assoc_request *req); -int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *bssid, - const u8 *ie, int ie_len, u16 reason, - bool local_state_change); int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *bssid, const u8 *ie, int ie_len, u16 reason, @@ -356,11 +308,6 @@ int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, bool local_state_change); void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, struct net_device *dev); -void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, - const u8 *req_ie, size_t req_ie_len, - const u8 *resp_ie, size_t resp_ie_len, - u16 status, bool wextev, - struct cfg80211_bss *bss); int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, u16 frame_type, const u8 *match_data, int match_len); @@ -376,19 +323,19 @@ void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, const struct ieee80211_vht_cap *vht_capa_mask); -/* SME */ -int __cfg80211_connect(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct cfg80211_connect_params *connect, - struct cfg80211_cached_keys *connkeys, - const u8 *prev_bssid); +/* SME events */ int cfg80211_connect(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_connect_params *connect, - struct cfg80211_cached_keys *connkeys); -int __cfg80211_disconnect(struct cfg80211_registered_device *rdev, - struct net_device *dev, u16 reason, - bool wextev); + struct cfg80211_cached_keys *connkeys, + const u8 *prev_bssid); +void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, + const u8 *req_ie, size_t req_ie_len, + const u8 *resp_ie, size_t resp_ie_len, + u16 status, bool wextev, + struct cfg80211_bss *bss); +void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, + size_t ie_len, u16 reason, bool from_ap); int cfg80211_disconnect(struct cfg80211_registered_device *rdev, struct net_device *dev, u16 reason, bool wextev); @@ -399,21 +346,21 @@ void __cfg80211_roamed(struct wireless_dev *wdev, int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); +/* SME implementation */ void cfg80211_conn_work(struct work_struct *work); -void cfg80211_sme_failed_assoc(struct wireless_dev *wdev); -bool cfg80211_sme_failed_reassoc(struct wireless_dev *wdev); +void cfg80211_sme_scan_done(struct net_device *dev); +bool cfg80211_sme_rx_assoc_resp(struct wireless_dev *wdev, u16 status); +void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len); +void cfg80211_sme_disassoc(struct wireless_dev *wdev); +void cfg80211_sme_deauth(struct wireless_dev *wdev); +void cfg80211_sme_auth_timeout(struct wireless_dev *wdev); +void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev); /* internal helpers */ bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher); int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, bool pairwise, const u8 *mac_addr); -void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, - size_t ie_len, u16 reason, bool from_ap); -void cfg80211_sme_scan_done(struct net_device *dev); -void cfg80211_sme_rx_auth(struct net_device *dev, const u8 *buf, size_t len); -void cfg80211_sme_disassoc(struct net_device *dev, - struct cfg80211_internal_bss *bss); void __cfg80211_scan_done(struct work_struct *wk); void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak); void __cfg80211_sched_scan_results(struct work_struct *wk); diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c index 920cabe0461b..90d050036624 100644 --- a/net/wireless/debugfs.c +++ b/net/wireless/debugfs.c @@ -74,7 +74,7 @@ static ssize_t ht40allow_map_read(struct file *file, if (!buf) return -ENOMEM; - mutex_lock(&cfg80211_mutex); + rtnl_lock(); for (band = 0; band < IEEE80211_NUM_BANDS; band++) { sband = wiphy->bands[band]; @@ -85,7 +85,7 @@ static ssize_t ht40allow_map_read(struct file *file, buf, buf_size, offset); } - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); r = simple_read_from_buffer(user_buf, count, ppos, buf, offset); diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index d80e47194d49..39bff7d36768 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -43,7 +43,6 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid) cfg80211_hold_bss(bss_from_pub(bss)); wdev->current_bss = bss_from_pub(bss); - wdev->sme_state = CFG80211_SME_CONNECTED; cfg80211_upload_connect_keys(wdev); nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid, @@ -64,8 +63,6 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp) trace_cfg80211_ibss_joined(dev, bssid); - CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTING); - ev = kzalloc(sizeof(*ev), gfp); if (!ev) return; @@ -120,7 +117,6 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, #ifdef CONFIG_CFG80211_WEXT wdev->wext.ibss.chandef = params->chandef; #endif - wdev->sme_state = CFG80211_SME_CONNECTING; err = cfg80211_can_use_chan(rdev, wdev, params->chandef.chan, params->channel_fixed @@ -134,7 +130,6 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, err = rdev_join_ibss(rdev, dev, params); if (err) { wdev->connect_keys = NULL; - wdev->sme_state = CFG80211_SME_IDLE; return err; } @@ -152,11 +147,11 @@ int cfg80211_join_ibss(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - mutex_lock(&rdev->devlist_mtx); + ASSERT_RTNL(); + wdev_lock(wdev); err = __cfg80211_join_ibss(rdev, dev, params, connkeys); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } @@ -186,7 +181,6 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) } wdev->current_bss = NULL; - wdev->sme_state = CFG80211_SME_IDLE; wdev->ssid_len = 0; #ifdef CONFIG_CFG80211_WEXT if (!nowext) @@ -359,11 +353,9 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, wdev->wext.ibss.channel_fixed = false; } - mutex_lock(&rdev->devlist_mtx); wdev_lock(wdev); err = cfg80211_ibss_wext_join(rdev, wdev); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } @@ -429,11 +421,9 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev, memcpy(wdev->wext.ibss.ssid, ssid, len); wdev->wext.ibss.ssid_len = len; - mutex_lock(&rdev->devlist_mtx); wdev_lock(wdev); err = cfg80211_ibss_wext_join(rdev, wdev); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } @@ -512,11 +502,9 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev, } else wdev->wext.ibss.bssid = NULL; - mutex_lock(&rdev->devlist_mtx); wdev_lock(wdev); err = cfg80211_ibss_wext_join(rdev, wdev); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 0bb93f3061a4..30c49202ee4d 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -18,6 +18,7 @@ #define MESH_PATH_TO_ROOT_TIMEOUT 6000 #define MESH_ROOT_INTERVAL 5000 #define MESH_ROOT_CONFIRMATION_INTERVAL 2000 +#define MESH_DEFAULT_PLINK_TIMEOUT 1800 /* timeout in seconds */ /* * Minimum interval between two consecutive PREQs originated by the same @@ -75,6 +76,7 @@ const struct mesh_config default_mesh_config = { .dot11MeshHWMPconfirmationInterval = MESH_ROOT_CONFIRMATION_INTERVAL, .power_mode = NL80211_MESH_POWER_ACTIVE, .dot11MeshAwakeWindowDuration = MESH_DEFAULT_AWAKE_WINDOW, + .plink_timeout = MESH_DEFAULT_PLINK_TIMEOUT, }; const struct mesh_setup default_mesh_setup = { @@ -82,6 +84,7 @@ const struct mesh_setup default_mesh_setup = { .sync_method = IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET, .path_sel_proto = IEEE80211_PATH_PROTOCOL_HWMP, .path_metric = IEEE80211_PATH_METRIC_AIRTIME, + .auth_id = 0, /* open */ .ie = NULL, .ie_len = 0, .is_secure = false, @@ -159,6 +162,16 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, setup->chandef.center_freq1 = setup->chandef.chan->center_freq; } + /* + * check if basic rates are available otherwise use mandatory rates as + * basic rates + */ + if (!setup->basic_rates) { + struct ieee80211_supported_band *sband = + rdev->wiphy.bands[setup->chandef.chan->band]; + setup->basic_rates = ieee80211_mandatory_rates(sband); + } + if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef)) return -EINVAL; @@ -185,11 +198,9 @@ int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - mutex_lock(&rdev->devlist_mtx); wdev_lock(wdev); err = __cfg80211_join_mesh(rdev, dev, setup, conf); wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); return err; } diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 0c7b7dd855f6..bfac5e186f57 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -18,37 +18,18 @@ #include "rdev-ops.h" -void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - - trace_cfg80211_send_rx_auth(dev); - wdev_lock(wdev); - - nl80211_send_rx_auth(rdev, dev, buf, len, GFP_KERNEL); - cfg80211_sme_rx_auth(dev, buf, len); - - wdev_unlock(wdev); -} -EXPORT_SYMBOL(cfg80211_send_rx_auth); - -void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, +void cfg80211_rx_assoc_resp(struct net_device *dev, struct cfg80211_bss *bss, const u8 *buf, size_t len) { - u16 status_code; struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; u8 *ie = mgmt->u.assoc_resp.variable; int ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable); + u16 status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code); trace_cfg80211_send_rx_assoc(dev, bss); - wdev_lock(wdev); - - status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code); /* * This is a bit of a hack, we don't notify userspace of @@ -56,174 +37,135 @@ void cfg80211_send_rx_assoc(struct net_device *dev, struct cfg80211_bss *bss, * and got a reject -- we only try again with an assoc * frame instead of reassoc. */ - if (status_code != WLAN_STATUS_SUCCESS && wdev->conn && - cfg80211_sme_failed_reassoc(wdev)) { + if (cfg80211_sme_rx_assoc_resp(wdev, status_code)) { + cfg80211_unhold_bss(bss_from_pub(bss)); cfg80211_put_bss(wiphy, bss); - goto out; + return; } nl80211_send_rx_assoc(rdev, dev, buf, len, GFP_KERNEL); - - if (status_code != WLAN_STATUS_SUCCESS && wdev->conn) { - cfg80211_sme_failed_assoc(wdev); - /* - * do not call connect_result() now because the - * sme will schedule work that does it later. - */ - cfg80211_put_bss(wiphy, bss); - goto out; - } - - if (!wdev->conn && wdev->sme_state == CFG80211_SME_IDLE) { - /* - * This is for the userspace SME, the CONNECTING - * state will be changed to CONNECTED by - * __cfg80211_connect_result() below. - */ - wdev->sme_state = CFG80211_SME_CONNECTING; - } - - /* this consumes the bss reference */ + /* update current_bss etc., consumes the bss reference */ __cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, ie, len - ieoffs, status_code, status_code == WLAN_STATUS_SUCCESS, bss); - out: - wdev_unlock(wdev); } -EXPORT_SYMBOL(cfg80211_send_rx_assoc); +EXPORT_SYMBOL(cfg80211_rx_assoc_resp); -void __cfg80211_send_deauth(struct net_device *dev, - const u8 *buf, size_t len) +static void cfg80211_process_auth(struct wireless_dev *wdev, + const u8 *buf, size_t len) { - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; - const u8 *bssid = mgmt->bssid; - bool was_current = false; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); - trace___cfg80211_send_deauth(dev); - ASSERT_WDEV_LOCK(wdev); - - if (wdev->current_bss && - ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) { - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(wiphy, &wdev->current_bss->pub); - wdev->current_bss = NULL; - was_current = true; - } + nl80211_send_rx_auth(rdev, wdev->netdev, buf, len, GFP_KERNEL); + cfg80211_sme_rx_auth(wdev, buf, len); +} - nl80211_send_deauth(rdev, dev, buf, len, GFP_KERNEL); +static void cfg80211_process_deauth(struct wireless_dev *wdev, + const u8 *buf, size_t len) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; + const u8 *bssid = mgmt->bssid; + u16 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); + bool from_ap = !ether_addr_equal(mgmt->sa, wdev->netdev->dev_addr); - if (wdev->sme_state == CFG80211_SME_CONNECTED && was_current) { - u16 reason_code; - bool from_ap; + nl80211_send_deauth(rdev, wdev->netdev, buf, len, GFP_KERNEL); - reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); + if (!wdev->current_bss || + !ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) + return; - from_ap = !ether_addr_equal(mgmt->sa, dev->dev_addr); - __cfg80211_disconnected(dev, NULL, 0, reason_code, from_ap); - } else if (wdev->sme_state == CFG80211_SME_CONNECTING) { - __cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, NULL, 0, - WLAN_STATUS_UNSPECIFIED_FAILURE, - false, NULL); - } + __cfg80211_disconnected(wdev->netdev, NULL, 0, reason_code, from_ap); + cfg80211_sme_deauth(wdev); } -EXPORT_SYMBOL(__cfg80211_send_deauth); -void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len) +static void cfg80211_process_disassoc(struct wireless_dev *wdev, + const u8 *buf, size_t len) { - struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; + const u8 *bssid = mgmt->bssid; + u16 reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); + bool from_ap = !ether_addr_equal(mgmt->sa, wdev->netdev->dev_addr); - wdev_lock(wdev); - __cfg80211_send_deauth(dev, buf, len); - wdev_unlock(wdev); + nl80211_send_disassoc(rdev, wdev->netdev, buf, len, GFP_KERNEL); + + if (WARN_ON(!wdev->current_bss || + !ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) + return; + + __cfg80211_disconnected(wdev->netdev, NULL, 0, reason_code, from_ap); + cfg80211_sme_disassoc(wdev); } -EXPORT_SYMBOL(cfg80211_send_deauth); -void __cfg80211_send_disassoc(struct net_device *dev, - const u8 *buf, size_t len) +void cfg80211_rx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; - const u8 *bssid = mgmt->bssid; - u16 reason_code; - bool from_ap; + struct ieee80211_mgmt *mgmt = (void *)buf; - trace___cfg80211_send_disassoc(dev); ASSERT_WDEV_LOCK(wdev); - nl80211_send_disassoc(rdev, dev, buf, len, GFP_KERNEL); + trace_cfg80211_rx_mlme_mgmt(dev, buf, len); - if (wdev->sme_state != CFG80211_SME_CONNECTED) + if (WARN_ON(len < 2)) return; - if (wdev->current_bss && - ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) { - cfg80211_sme_disassoc(dev, wdev->current_bss); - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(wiphy, &wdev->current_bss->pub); - wdev->current_bss = NULL; - } else - WARN_ON(1); - - - reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); - - from_ap = !ether_addr_equal(mgmt->sa, dev->dev_addr); - __cfg80211_disconnected(dev, NULL, 0, reason_code, from_ap); + if (ieee80211_is_auth(mgmt->frame_control)) + cfg80211_process_auth(wdev, buf, len); + else if (ieee80211_is_deauth(mgmt->frame_control)) + cfg80211_process_deauth(wdev, buf, len); + else if (ieee80211_is_disassoc(mgmt->frame_control)) + cfg80211_process_disassoc(wdev, buf, len); } -EXPORT_SYMBOL(__cfg80211_send_disassoc); +EXPORT_SYMBOL(cfg80211_rx_mlme_mgmt); -void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len) +void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr) { struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + trace_cfg80211_send_auth_timeout(dev, addr); - wdev_lock(wdev); - __cfg80211_send_disassoc(dev, buf, len); - wdev_unlock(wdev); + nl80211_send_auth_timeout(rdev, dev, addr, GFP_KERNEL); + cfg80211_sme_auth_timeout(wdev); } -EXPORT_SYMBOL(cfg80211_send_disassoc); +EXPORT_SYMBOL(cfg80211_auth_timeout); -void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr) +void cfg80211_assoc_timeout(struct net_device *dev, struct cfg80211_bss *bss) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - trace_cfg80211_send_auth_timeout(dev, addr); - wdev_lock(wdev); + trace_cfg80211_send_assoc_timeout(dev, bss->bssid); - nl80211_send_auth_timeout(rdev, dev, addr, GFP_KERNEL); - if (wdev->sme_state == CFG80211_SME_CONNECTING) - __cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0, - WLAN_STATUS_UNSPECIFIED_FAILURE, - false, NULL); + nl80211_send_assoc_timeout(rdev, dev, bss->bssid, GFP_KERNEL); + cfg80211_sme_assoc_timeout(wdev); - wdev_unlock(wdev); + cfg80211_unhold_bss(bss_from_pub(bss)); + cfg80211_put_bss(wiphy, bss); } -EXPORT_SYMBOL(cfg80211_send_auth_timeout); +EXPORT_SYMBOL(cfg80211_assoc_timeout); -void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr) +void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct ieee80211_mgmt *mgmt = (void *)buf; - trace_cfg80211_send_assoc_timeout(dev, addr); - wdev_lock(wdev); + ASSERT_WDEV_LOCK(wdev); - nl80211_send_assoc_timeout(rdev, dev, addr, GFP_KERNEL); - if (wdev->sme_state == CFG80211_SME_CONNECTING) - __cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0, - WLAN_STATUS_UNSPECIFIED_FAILURE, - false, NULL); + trace_cfg80211_tx_mlme_mgmt(dev, buf, len); - wdev_unlock(wdev); + if (WARN_ON(len < 2)) + return; + + if (ieee80211_is_deauth(mgmt->frame_control)) + cfg80211_process_deauth(wdev, buf, len); + else + cfg80211_process_disassoc(wdev, buf, len); } -EXPORT_SYMBOL(cfg80211_send_assoc_timeout); +EXPORT_SYMBOL(cfg80211_tx_mlme_mgmt); void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, enum nl80211_key_type key_type, int key_id, @@ -253,18 +195,27 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, EXPORT_SYMBOL(cfg80211_michael_mic_failure); /* some MLME handling for userspace SME */ -int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_auth_type auth_type, - const u8 *bssid, - const u8 *ssid, int ssid_len, - const u8 *ie, int ie_len, - const u8 *key, int key_len, int key_idx, - const u8 *sae_data, int sae_data_len) +int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_auth_type auth_type, + const u8 *bssid, + const u8 *ssid, int ssid_len, + const u8 *ie, int ie_len, + const u8 *key, int key_len, int key_idx, + const u8 *sae_data, int sae_data_len) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_auth_request req; + struct cfg80211_auth_request req = { + .ie = ie, + .ie_len = ie_len, + .sae_data = sae_data, + .sae_data_len = sae_data_len, + .auth_type = auth_type, + .key = key, + .key_len = key_len, + .key_idx = key_idx, + }; int err; ASSERT_WDEV_LOCK(wdev); @@ -277,18 +228,8 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, ether_addr_equal(bssid, wdev->current_bss->pub.bssid)) return -EALREADY; - memset(&req, 0, sizeof(req)); - - req.ie = ie; - req.ie_len = ie_len; - req.sae_data = sae_data; - req.sae_data_len = sae_data_len; - req.auth_type = auth_type; req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); - req.key = key; - req.key_len = key_len; - req.key_idx = key_idx; if (!req.bss) return -ENOENT; @@ -304,28 +245,6 @@ out: return err; } -int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, - struct net_device *dev, struct ieee80211_channel *chan, - enum nl80211_auth_type auth_type, const u8 *bssid, - const u8 *ssid, int ssid_len, - const u8 *ie, int ie_len, - const u8 *key, int key_len, int key_idx, - const u8 *sae_data, int sae_data_len) -{ - int err; - - mutex_lock(&rdev->devlist_mtx); - wdev_lock(dev->ieee80211_ptr); - err = __cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, - ssid, ssid_len, ie, ie_len, - key, key_len, key_idx, - sae_data, sae_data_len); - wdev_unlock(dev->ieee80211_ptr); - mutex_unlock(&rdev->devlist_mtx); - - return err; -} - /* Do a logical ht_capa &= ht_capa_mask. */ void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, const struct ieee80211_ht_cap *ht_capa_mask) @@ -360,30 +279,21 @@ void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, p1[i] &= p2[i]; } -int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - const u8 *bssid, - const u8 *ssid, int ssid_len, - struct cfg80211_assoc_request *req) +int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ieee80211_channel *chan, + const u8 *bssid, + const u8 *ssid, int ssid_len, + struct cfg80211_assoc_request *req) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; - bool was_connected = false; ASSERT_WDEV_LOCK(wdev); - if (wdev->current_bss && req->prev_bssid && - ether_addr_equal(wdev->current_bss->pub.bssid, req->prev_bssid)) { - /* - * Trying to reassociate: Allow this to proceed and let the old - * association to be dropped when the new one is completed. - */ - if (wdev->sme_state == CFG80211_SME_CONNECTED) { - was_connected = true; - wdev->sme_state = CFG80211_SME_CONNECTING; - } - } else if (wdev->current_bss) + if (wdev->current_bss && + (!req->prev_bssid || !ether_addr_equal(wdev->current_bss->pub.bssid, + req->prev_bssid))) return -EALREADY; cfg80211_oper_and_ht_capa(&req->ht_capa_mask, @@ -393,52 +303,28 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, req->bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); - if (!req->bss) { - if (was_connected) - wdev->sme_state = CFG80211_SME_CONNECTED; + if (!req->bss) return -ENOENT; - } err = cfg80211_can_use_chan(rdev, wdev, chan, CHAN_MODE_SHARED); if (err) goto out; err = rdev_assoc(rdev, dev, req); + if (!err) + cfg80211_hold_bss(bss_from_pub(req->bss)); out: - if (err) { - if (was_connected) - wdev->sme_state = CFG80211_SME_CONNECTED; + if (err) cfg80211_put_bss(&rdev->wiphy, req->bss); - } return err; } -int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - const u8 *bssid, - const u8 *ssid, int ssid_len, - struct cfg80211_assoc_request *req) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - mutex_lock(&rdev->devlist_mtx); - wdev_lock(wdev); - err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid, - ssid, ssid_len, req); - wdev_unlock(wdev); - mutex_unlock(&rdev->devlist_mtx); - - return err; -} - -int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *bssid, - const u8 *ie, int ie_len, u16 reason, - bool local_state_change) +int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *bssid, + const u8 *ie, int ie_len, u16 reason, + bool local_state_change) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_deauth_request req = { @@ -451,79 +337,51 @@ int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, ASSERT_WDEV_LOCK(wdev); - if (local_state_change && (!wdev->current_bss || - !ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) + if (local_state_change && + (!wdev->current_bss || + !ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) return 0; return rdev_deauth(rdev, dev, &req); } -int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *bssid, - const u8 *ie, int ie_len, u16 reason, - bool local_state_change) +int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *bssid, + const u8 *ie, int ie_len, u16 reason, + bool local_state_change) { struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_disassoc_request req = { + .reason_code = reason, + .local_state_change = local_state_change, + .ie = ie, + .ie_len = ie_len, + }; int err; - wdev_lock(wdev); - err = __cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason, - local_state_change); - wdev_unlock(wdev); - - return err; -} - -static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *bssid, - const u8 *ie, int ie_len, u16 reason, - bool local_state_change) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_disassoc_request req; - ASSERT_WDEV_LOCK(wdev); - if (wdev->sme_state != CFG80211_SME_CONNECTED) - return -ENOTCONN; - - if (WARN(!wdev->current_bss, "sme_state=%d\n", wdev->sme_state)) + if (!wdev->current_bss) return -ENOTCONN; - memset(&req, 0, sizeof(req)); - req.reason_code = reason; - req.local_state_change = local_state_change; - req.ie = ie; - req.ie_len = ie_len; if (ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) req.bss = &wdev->current_bss->pub; else return -ENOTCONN; - return rdev_disassoc(rdev, dev, &req); -} - -int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *bssid, - const u8 *ie, int ie_len, u16 reason, - bool local_state_change) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - int err; - - wdev_lock(wdev); - err = __cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason, - local_state_change); - wdev_unlock(wdev); + err = rdev_disassoc(rdev, dev, &req); + if (err) + return err; - return err; + /* driver should have reported the disassoc */ + WARN_ON(wdev->current_bss); + return 0; } void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, struct net_device *dev) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_deauth_request req; u8 bssid[ETH_ALEN]; ASSERT_WDEV_LOCK(wdev); @@ -531,23 +389,12 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, if (!rdev->ops->deauth) return; - memset(&req, 0, sizeof(req)); - req.reason_code = WLAN_REASON_DEAUTH_LEAVING; - req.ie = NULL; - req.ie_len = 0; - if (!wdev->current_bss) return; memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN); - req.bssid = bssid; - rdev_deauth(rdev, dev, &req); - - if (wdev->current_bss) { - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(&rdev->wiphy, &wdev->current_bss->pub); - wdev->current_bss = NULL; - } + cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0, + WLAN_REASON_DEAUTH_LEAVING, false); } struct cfg80211_mgmt_registration { @@ -848,7 +695,7 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work) dfs_update_channels_wk); wiphy = &rdev->wiphy; - mutex_lock(&cfg80211_mutex); + rtnl_lock(); for (bandid = 0; bandid < IEEE80211_NUM_BANDS; bandid++) { sband = wiphy->bands[bandid]; if (!sband) @@ -881,7 +728,7 @@ void cfg80211_dfs_channels_update_work(struct work_struct *work) check_again = true; } } - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); /* reschedule if there are other channels waiting to be cleared again */ if (check_again) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b14b7e3cb6e6..25d217d90807 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -37,10 +37,10 @@ static void nl80211_post_doit(struct genl_ops *ops, struct sk_buff *skb, /* the netlink family */ static struct genl_family nl80211_fam = { - .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ - .name = "nl80211", /* have users key off the name instead */ - .hdrsize = 0, /* no private header */ - .version = 1, /* no particular meaning now */ + .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ + .name = NL80211_GENL_NAME, /* have users key off the name instead */ + .hdrsize = 0, /* no private header */ + .version = 1, /* no particular meaning now */ .maxattr = NL80211_ATTR_MAX, .netnsok = true, .pre_doit = nl80211_pre_doit, @@ -59,7 +59,7 @@ __cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs) int wiphy_idx = -1; int ifidx = -1; - assert_cfg80211_lock(); + ASSERT_RTNL(); if (!have_ifidx && !have_wdev_id) return ERR_PTR(-EINVAL); @@ -80,7 +80,6 @@ __cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs) if (have_wdev_id && rdev->wiphy_idx != wiphy_idx) continue; - mutex_lock(&rdev->devlist_mtx); list_for_each_entry(wdev, &rdev->wdev_list, list) { if (have_ifidx && wdev->netdev && wdev->netdev->ifindex == ifidx) { @@ -92,7 +91,6 @@ __cfg80211_wdev_from_attrs(struct net *netns, struct nlattr **attrs) break; } } - mutex_unlock(&rdev->devlist_mtx); if (result) break; @@ -109,7 +107,7 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) struct cfg80211_registered_device *rdev = NULL, *tmp; struct net_device *netdev; - assert_cfg80211_lock(); + ASSERT_RTNL(); if (!attrs[NL80211_ATTR_WIPHY] && !attrs[NL80211_ATTR_IFINDEX] && @@ -128,14 +126,12 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) tmp = cfg80211_rdev_by_wiphy_idx(wdev_id >> 32); if (tmp) { /* make sure wdev exists */ - mutex_lock(&tmp->devlist_mtx); list_for_each_entry(wdev, &tmp->wdev_list, list) { if (wdev->identifier != (u32)wdev_id) continue; found = true; break; } - mutex_unlock(&tmp->devlist_mtx); if (!found) tmp = NULL; @@ -182,19 +178,6 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) /* * This function returns a pointer to the driver * that the genl_info item that is passed refers to. - * If successful, it returns non-NULL and also locks - * the driver's mutex! - * - * This means that you need to call cfg80211_unlock_rdev() - * before being allowed to acquire &cfg80211_mutex! - * - * This is necessary because we need to lock the global - * mutex to get an item off the list safely, and then - * we lock the rdev mutex so it doesn't go away under us. - * - * We don't want to keep cfg80211_mutex locked - * for all the time in order to allow requests on - * other interfaces to go through at the same time. * * The result of this can be a PTR_ERR and hence must * be checked with IS_ERR() for errors. @@ -202,20 +185,7 @@ __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) static struct cfg80211_registered_device * cfg80211_get_dev_from_info(struct net *netns, struct genl_info *info) { - struct cfg80211_registered_device *rdev; - - mutex_lock(&cfg80211_mutex); - rdev = __cfg80211_rdev_from_attrs(netns, info->attrs); - - /* if it is not an error we grab the lock on - * it to assure it won't be going away while - * we operate on it */ - if (!IS_ERR(rdev)) - mutex_lock(&rdev->mtx); - - mutex_unlock(&cfg80211_mutex); - - return rdev; + return __cfg80211_rdev_from_attrs(netns, info->attrs); } /* policy for the attributes */ @@ -378,6 +348,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_MDID] = { .type = NLA_U16 }, [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, + [NL80211_ATTR_PEER_AID] = { .type = NLA_U16 }, }; /* policy for the key attributes */ @@ -455,7 +426,6 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, int err; rtnl_lock(); - mutex_lock(&cfg80211_mutex); if (!cb->args[0]) { err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, @@ -484,14 +454,12 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, *rdev = wiphy_to_dev(wiphy); *wdev = NULL; - mutex_lock(&(*rdev)->devlist_mtx); list_for_each_entry(tmp, &(*rdev)->wdev_list, list) { if (tmp->identifier == cb->args[1]) { *wdev = tmp; break; } } - mutex_unlock(&(*rdev)->devlist_mtx); if (!*wdev) { err = -ENODEV; @@ -499,19 +467,14 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, } } - cfg80211_lock_rdev(*rdev); - - mutex_unlock(&cfg80211_mutex); return 0; out_unlock: - mutex_unlock(&cfg80211_mutex); rtnl_unlock(); return err; } static void nl80211_finish_wdev_dump(struct cfg80211_registered_device *rdev) { - cfg80211_unlock_rdev(rdev); rtnl_unlock(); } @@ -837,12 +800,9 @@ static int nl80211_key_allowed(struct wireless_dev *wdev) case NL80211_IFTYPE_MESH_POINT: break; case NL80211_IFTYPE_ADHOC: - if (!wdev->current_bss) - return -ENOLINK; - break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - if (wdev->sme_state != CFG80211_SME_CONNECTED) + if (!wdev->current_bss) return -ENOLINK; break; default: @@ -945,7 +905,7 @@ nla_put_failure: static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev, struct sk_buff *msg) { - const struct wiphy_wowlan_tcp_support *tcp = rdev->wiphy.wowlan.tcp; + const struct wiphy_wowlan_tcp_support *tcp = rdev->wiphy.wowlan->tcp; struct nlattr *nl_tcp; if (!tcp) @@ -988,37 +948,37 @@ static int nl80211_send_wowlan(struct sk_buff *msg, { struct nlattr *nl_wowlan; - if (!dev->wiphy.wowlan.flags && !dev->wiphy.wowlan.n_patterns) + if (!dev->wiphy.wowlan) return 0; nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED); if (!nl_wowlan) return -ENOBUFS; - if (((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY) && + if (((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_ANY) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT) && + ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_DISCONNECT) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT) && + ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) && + ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && + ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) && + ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) && + ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || - ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_RFKILL_RELEASE) && + ((dev->wiphy.wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) return -ENOBUFS; - if (dev->wiphy.wowlan.n_patterns) { + if (dev->wiphy.wowlan->n_patterns) { struct nl80211_wowlan_pattern_support pat = { - .max_patterns = dev->wiphy.wowlan.n_patterns, - .min_pattern_len = dev->wiphy.wowlan.pattern_min_len, - .max_pattern_len = dev->wiphy.wowlan.pattern_max_len, - .max_pkt_offset = dev->wiphy.wowlan.max_pkt_offset, + .max_patterns = dev->wiphy.wowlan->n_patterns, + .min_pattern_len = dev->wiphy.wowlan->pattern_min_len, + .max_pattern_len = dev->wiphy.wowlan->pattern_max_len, + .max_pkt_offset = dev->wiphy.wowlan->max_pkt_offset, }; if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, @@ -1151,10 +1111,16 @@ nl80211_send_mgmt_stypes(struct sk_buff *msg, return 0; } +struct nl80211_dump_wiphy_state { + s64 filter_wiphy; + long start; + long split_start, band_start, chan_start; + bool split; +}; + static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, struct sk_buff *msg, u32 portid, u32 seq, - int flags, bool split, long *split_start, - long *band_start, long *chan_start) + int flags, struct nl80211_dump_wiphy_state *state) { void *hdr; struct nlattr *nl_bands, *nl_band; @@ -1165,19 +1131,14 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, int i; const struct ieee80211_txrx_stypes *mgmt_stypes = dev->wiphy.mgmt_stypes; - long start = 0, start_chan = 0, start_band = 0; u32 features; hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY); if (!hdr) return -ENOBUFS; - /* allow always using the variables */ - if (!split) { - split_start = &start; - band_start = &start_band; - chan_start = &start_chan; - } + if (WARN_ON(!state)) + return -EINVAL; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) || nla_put_string(msg, NL80211_ATTR_WIPHY_NAME, @@ -1186,7 +1147,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, cfg80211_rdev_list_generation)) goto nla_put_failure; - switch (*split_start) { + switch (state->split_start) { case 0: if (nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT, dev->wiphy.retry_short) || @@ -1228,9 +1189,12 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) && nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP)) goto nla_put_failure; + if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ) && + nla_put_flag(msg, WIPHY_FLAG_SUPPORTS_5_10_MHZ)) + goto nla_put_failure; - (*split_start)++; - if (split) + state->split_start++; + if (state->split) break; case 1: if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES, @@ -1274,22 +1238,23 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, } } - (*split_start)++; - if (split) + state->split_start++; + if (state->split) break; case 2: if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES, dev->wiphy.interface_modes)) goto nla_put_failure; - (*split_start)++; - if (split) + state->split_start++; + if (state->split) break; case 3: nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS); if (!nl_bands) goto nla_put_failure; - for (band = *band_start; band < IEEE80211_NUM_BANDS; band++) { + for (band = state->band_start; + band < IEEE80211_NUM_BANDS; band++) { struct ieee80211_supported_band *sband; sband = dev->wiphy.bands[band]; @@ -1301,12 +1266,12 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (!nl_band) goto nla_put_failure; - switch (*chan_start) { + switch (state->chan_start) { case 0: if (nl80211_send_band_rateinfo(msg, sband)) goto nla_put_failure; - (*chan_start)++; - if (split) + state->chan_start++; + if (state->split) break; default: /* add frequencies */ @@ -1315,7 +1280,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (!nl_freqs) goto nla_put_failure; - for (i = *chan_start - 1; + for (i = state->chan_start - 1; i < sband->n_channels; i++) { nl_freq = nla_nest_start(msg, i); @@ -1324,26 +1289,27 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, chan = &sband->channels[i]; - if (nl80211_msg_put_channel(msg, chan, - split)) + if (nl80211_msg_put_channel( + msg, chan, + state->split)) goto nla_put_failure; nla_nest_end(msg, nl_freq); - if (split) + if (state->split) break; } if (i < sband->n_channels) - *chan_start = i + 2; + state->chan_start = i + 2; else - *chan_start = 0; + state->chan_start = 0; nla_nest_end(msg, nl_freqs); } nla_nest_end(msg, nl_band); - if (split) { + if (state->split) { /* start again here */ - if (*chan_start) + if (state->chan_start) band--; break; } @@ -1351,14 +1317,14 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, nla_nest_end(msg, nl_bands); if (band < IEEE80211_NUM_BANDS) - *band_start = band + 1; + state->band_start = band + 1; else - *band_start = 0; + state->band_start = 0; /* if bands & channels are done, continue outside */ - if (*band_start == 0 && *chan_start == 0) - (*split_start)++; - if (split) + if (state->band_start == 0 && state->chan_start == 0) + state->split_start++; + if (state->split) break; case 4: nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS); @@ -1424,7 +1390,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, } CMD(start_p2p_device, START_P2P_DEVICE); CMD(set_mcast_rate, SET_MCAST_RATE); - if (split) { + if (state->split) { CMD(crit_proto_start, CRIT_PROTOCOL_START); CMD(crit_proto_stop, CRIT_PROTOCOL_STOP); } @@ -1448,8 +1414,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, } nla_nest_end(msg, nl_cmds); - (*split_start)++; - if (split) + state->split_start++; + if (state->split) break; case 5: if (dev->ops->remain_on_channel && @@ -1465,29 +1431,30 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, if (nl80211_send_mgmt_stypes(msg, mgmt_stypes)) goto nla_put_failure; - (*split_start)++; - if (split) + state->split_start++; + if (state->split) break; case 6: #ifdef CONFIG_PM - if (nl80211_send_wowlan(msg, dev, split)) + if (nl80211_send_wowlan(msg, dev, state->split)) goto nla_put_failure; - (*split_start)++; - if (split) + state->split_start++; + if (state->split) break; #else - (*split_start)++; + state->split_start++; #endif case 7: if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES, dev->wiphy.software_iftypes)) goto nla_put_failure; - if (nl80211_put_iface_combinations(&dev->wiphy, msg, split)) + if (nl80211_put_iface_combinations(&dev->wiphy, msg, + state->split)) goto nla_put_failure; - (*split_start)++; - if (split) + state->split_start++; + if (state->split) break; case 8: if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) && @@ -1501,7 +1468,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, * dump is split, otherwise it makes it too big. Therefore * only advertise it in that case. */ - if (split) + if (state->split) features |= NL80211_FEATURE_ADVERTISE_CHAN_LIMITS; if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, features)) goto nla_put_failure; @@ -1528,7 +1495,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, * case we'll continue with more data in the next round, * but break unconditionally so unsplit data stops here. */ - (*split_start)++; + state->split_start++; break; case 9: if (dev->wiphy.extended_capabilities && @@ -1547,7 +1514,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, goto nla_put_failure; /* done */ - *split_start = 0; + state->split_start = 0; break; } return genlmsg_end(msg, hdr); @@ -1557,66 +1524,78 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, return -EMSGSIZE; } +static int nl80211_dump_wiphy_parse(struct sk_buff *skb, + struct netlink_callback *cb, + struct nl80211_dump_wiphy_state *state) +{ + struct nlattr **tb = nl80211_fam.attrbuf; + int ret = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, + tb, nl80211_fam.maxattr, nl80211_policy); + /* ignore parse errors for backward compatibility */ + if (ret) + return 0; + + state->split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP]; + if (tb[NL80211_ATTR_WIPHY]) + state->filter_wiphy = nla_get_u32(tb[NL80211_ATTR_WIPHY]); + if (tb[NL80211_ATTR_WDEV]) + state->filter_wiphy = nla_get_u64(tb[NL80211_ATTR_WDEV]) >> 32; + if (tb[NL80211_ATTR_IFINDEX]) { + struct net_device *netdev; + struct cfg80211_registered_device *rdev; + int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]); + + netdev = dev_get_by_index(sock_net(skb->sk), ifidx); + if (!netdev) + return -ENODEV; + if (netdev->ieee80211_ptr) { + rdev = wiphy_to_dev( + netdev->ieee80211_ptr->wiphy); + state->filter_wiphy = rdev->wiphy_idx; + } + dev_put(netdev); + } + + return 0; +} + static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) { int idx = 0, ret; - int start = cb->args[0]; + struct nl80211_dump_wiphy_state *state = (void *)cb->args[0]; struct cfg80211_registered_device *dev; - s64 filter_wiphy = -1; - bool split = false; - struct nlattr **tb; - int res; - - /* will be zeroed in nlmsg_parse() */ - tb = kmalloc(sizeof(*tb) * (NL80211_ATTR_MAX + 1), GFP_KERNEL); - if (!tb) - return -ENOMEM; - mutex_lock(&cfg80211_mutex); - res = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - tb, NL80211_ATTR_MAX, nl80211_policy); - if (res == 0) { - split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP]; - if (tb[NL80211_ATTR_WIPHY]) - filter_wiphy = nla_get_u32(tb[NL80211_ATTR_WIPHY]); - if (tb[NL80211_ATTR_WDEV]) - filter_wiphy = nla_get_u64(tb[NL80211_ATTR_WDEV]) >> 32; - if (tb[NL80211_ATTR_IFINDEX]) { - struct net_device *netdev; - int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]); - - netdev = dev_get_by_index(sock_net(skb->sk), ifidx); - if (!netdev) { - mutex_unlock(&cfg80211_mutex); - kfree(tb); - return -ENODEV; - } - if (netdev->ieee80211_ptr) { - dev = wiphy_to_dev( - netdev->ieee80211_ptr->wiphy); - filter_wiphy = dev->wiphy_idx; - } - dev_put(netdev); + rtnl_lock(); + if (!state) { + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) { + rtnl_unlock(); + return -ENOMEM; } + state->filter_wiphy = -1; + ret = nl80211_dump_wiphy_parse(skb, cb, state); + if (ret) { + kfree(state); + rtnl_unlock(); + return ret; + } + cb->args[0] = (long)state; } - kfree(tb); list_for_each_entry(dev, &cfg80211_rdev_list, list) { if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk))) continue; - if (++idx <= start) + if (++idx <= state->start) continue; - if (filter_wiphy != -1 && dev->wiphy_idx != filter_wiphy) + if (state->filter_wiphy != -1 && + state->filter_wiphy != dev->wiphy_idx) continue; /* attempt to fit multiple wiphy data chunks into the skb */ do { ret = nl80211_send_wiphy(dev, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - NLM_F_MULTI, - split, &cb->args[1], - &cb->args[2], - &cb->args[3]); + NLM_F_MULTI, state); if (ret < 0) { /* * If sending the wiphy data didn't fit (ENOBUFS @@ -1635,33 +1614,40 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) !skb->len && cb->min_dump_alloc < 4096) { cb->min_dump_alloc = 4096; - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); return 1; } idx--; break; } - } while (cb->args[1] > 0); + } while (state->split_start > 0); break; } - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); - cb->args[0] = idx; + state->start = idx; return skb->len; } +static int nl80211_dump_wiphy_done(struct netlink_callback *cb) +{ + kfree((void *)cb->args[0]); + return 0; +} + static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; struct cfg80211_registered_device *dev = info->user_ptr[0]; + struct nl80211_dump_wiphy_state state = {}; msg = nlmsg_new(4096, GFP_KERNEL); if (!msg) return -ENOMEM; if (nl80211_send_wiphy(dev, msg, info->snd_portid, info->snd_seq, 0, - false, NULL, NULL, NULL) < 0) { + &state) < 0) { nlmsg_free(msg); return -ENOBUFS; } @@ -1778,6 +1764,11 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, IEEE80211_CHAN_DISABLED)) return -EINVAL; + if ((chandef->width == NL80211_CHAN_WIDTH_5 || + chandef->width == NL80211_CHAN_WIDTH_10) && + !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ)) + return -EINVAL; + return 0; } @@ -1799,7 +1790,6 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, if (result) return result; - mutex_lock(&rdev->devlist_mtx); switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: @@ -1823,7 +1813,6 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, default: result = -EINVAL; } - mutex_unlock(&rdev->devlist_mtx); return result; } @@ -1872,6 +1861,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) u32 frag_threshold = 0, rts_threshold = 0; u8 coverage_class = 0; + ASSERT_RTNL(); + /* * Try to find the wiphy and netdev. Normally this * function shouldn't need the netdev, but this is @@ -1881,31 +1872,25 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) * also passed a netdev to set_wiphy, so that it is * possible to let that go to the right netdev! */ - mutex_lock(&cfg80211_mutex); if (info->attrs[NL80211_ATTR_IFINDEX]) { int ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]); netdev = dev_get_by_index(genl_info_net(info), ifindex); - if (netdev && netdev->ieee80211_ptr) { + if (netdev && netdev->ieee80211_ptr) rdev = wiphy_to_dev(netdev->ieee80211_ptr->wiphy); - mutex_lock(&rdev->mtx); - } else + else netdev = NULL; } if (!netdev) { rdev = __cfg80211_rdev_from_attrs(genl_info_net(info), info->attrs); - if (IS_ERR(rdev)) { - mutex_unlock(&cfg80211_mutex); + if (IS_ERR(rdev)) return PTR_ERR(rdev); - } wdev = NULL; netdev = NULL; result = 0; - - mutex_lock(&rdev->mtx); } else wdev = netdev->ieee80211_ptr; @@ -1918,8 +1903,6 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) result = cfg80211_dev_rename( rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME])); - mutex_unlock(&cfg80211_mutex); - if (result) goto bad_res; @@ -2126,7 +2109,6 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } bad_res: - mutex_unlock(&rdev->mtx); if (netdev) dev_put(netdev); return result; @@ -2224,7 +2206,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; - mutex_lock(&cfg80211_mutex); + rtnl_lock(); list_for_each_entry(rdev, &cfg80211_rdev_list, list) { if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk))) continue; @@ -2234,7 +2216,6 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * } if_idx = 0; - mutex_lock(&rdev->devlist_mtx); list_for_each_entry(wdev, &rdev->wdev_list, list) { if (if_idx < if_start) { if_idx++; @@ -2243,17 +2224,15 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev) < 0) { - mutex_unlock(&rdev->devlist_mtx); goto out; } if_idx++; } - mutex_unlock(&rdev->devlist_mtx); wp_idx++; } out: - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); cb->args[0] = wp_idx; cb->args[1] = if_idx; @@ -2286,6 +2265,7 @@ static const struct nla_policy mntr_flags_policy[NL80211_MNTR_FLAG_MAX + 1] = { [NL80211_MNTR_FLAG_CONTROL] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_OTHER_BSS] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_COOK_FRAMES] = { .type = NLA_FLAG }, + [NL80211_MNTR_FLAG_ACTIVE] = { .type = NLA_FLAG }, }; static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags) @@ -2397,6 +2377,10 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) change = true; } + if (flags && (*flags & NL80211_MNTR_FLAG_ACTIVE) && + !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) + return -EOPNOTSUPP; + if (change) err = cfg80211_change_iface(rdev, dev, ntype, flags, ¶ms); else @@ -2454,6 +2438,11 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, &flags); + + if (!err && (flags & NL80211_MNTR_FLAG_ACTIVE) && + !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) + return -EOPNOTSUPP; + wdev = rdev_add_virtual_intf(rdev, nla_data(info->attrs[NL80211_ATTR_IFNAME]), type, err ? NULL : &flags, ¶ms); @@ -2486,11 +2475,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) INIT_LIST_HEAD(&wdev->mgmt_registrations); spin_lock_init(&wdev->mgmt_registrations_lock); - mutex_lock(&rdev->devlist_mtx); wdev->identifier = ++rdev->wdev_id; list_add_rcu(&wdev->list, &rdev->wdev_list); rdev->devlist_generation++; - mutex_unlock(&rdev->devlist_mtx); break; default: break; @@ -2933,61 +2920,58 @@ static int nl80211_set_mac_acl(struct sk_buff *skb, struct genl_info *info) return err; } -static int nl80211_parse_beacon(struct genl_info *info, +static int nl80211_parse_beacon(struct nlattr *attrs[], struct cfg80211_beacon_data *bcn) { bool haveinfo = false; - if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_BEACON_TAIL]) || - !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]) || - !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE_PROBE_RESP]) || - !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE_ASSOC_RESP])) + if (!is_valid_ie_attr(attrs[NL80211_ATTR_BEACON_TAIL]) || + !is_valid_ie_attr(attrs[NL80211_ATTR_IE]) || + !is_valid_ie_attr(attrs[NL80211_ATTR_IE_PROBE_RESP]) || + !is_valid_ie_attr(attrs[NL80211_ATTR_IE_ASSOC_RESP])) return -EINVAL; memset(bcn, 0, sizeof(*bcn)); - if (info->attrs[NL80211_ATTR_BEACON_HEAD]) { - bcn->head = nla_data(info->attrs[NL80211_ATTR_BEACON_HEAD]); - bcn->head_len = nla_len(info->attrs[NL80211_ATTR_BEACON_HEAD]); + if (attrs[NL80211_ATTR_BEACON_HEAD]) { + bcn->head = nla_data(attrs[NL80211_ATTR_BEACON_HEAD]); + bcn->head_len = nla_len(attrs[NL80211_ATTR_BEACON_HEAD]); if (!bcn->head_len) return -EINVAL; haveinfo = true; } - if (info->attrs[NL80211_ATTR_BEACON_TAIL]) { - bcn->tail = nla_data(info->attrs[NL80211_ATTR_BEACON_TAIL]); - bcn->tail_len = - nla_len(info->attrs[NL80211_ATTR_BEACON_TAIL]); + if (attrs[NL80211_ATTR_BEACON_TAIL]) { + bcn->tail = nla_data(attrs[NL80211_ATTR_BEACON_TAIL]); + bcn->tail_len = nla_len(attrs[NL80211_ATTR_BEACON_TAIL]); haveinfo = true; } if (!haveinfo) return -EINVAL; - if (info->attrs[NL80211_ATTR_IE]) { - bcn->beacon_ies = nla_data(info->attrs[NL80211_ATTR_IE]); - bcn->beacon_ies_len = nla_len(info->attrs[NL80211_ATTR_IE]); + if (attrs[NL80211_ATTR_IE]) { + bcn->beacon_ies = nla_data(attrs[NL80211_ATTR_IE]); + bcn->beacon_ies_len = nla_len(attrs[NL80211_ATTR_IE]); } - if (info->attrs[NL80211_ATTR_IE_PROBE_RESP]) { + if (attrs[NL80211_ATTR_IE_PROBE_RESP]) { bcn->proberesp_ies = - nla_data(info->attrs[NL80211_ATTR_IE_PROBE_RESP]); + nla_data(attrs[NL80211_ATTR_IE_PROBE_RESP]); bcn->proberesp_ies_len = - nla_len(info->attrs[NL80211_ATTR_IE_PROBE_RESP]); + nla_len(attrs[NL80211_ATTR_IE_PROBE_RESP]); } - if (info->attrs[NL80211_ATTR_IE_ASSOC_RESP]) { + if (attrs[NL80211_ATTR_IE_ASSOC_RESP]) { bcn->assocresp_ies = - nla_data(info->attrs[NL80211_ATTR_IE_ASSOC_RESP]); + nla_data(attrs[NL80211_ATTR_IE_ASSOC_RESP]); bcn->assocresp_ies_len = - nla_len(info->attrs[NL80211_ATTR_IE_ASSOC_RESP]); + nla_len(attrs[NL80211_ATTR_IE_ASSOC_RESP]); } - if (info->attrs[NL80211_ATTR_PROBE_RESP]) { - bcn->probe_resp = - nla_data(info->attrs[NL80211_ATTR_PROBE_RESP]); - bcn->probe_resp_len = - nla_len(info->attrs[NL80211_ATTR_PROBE_RESP]); + if (attrs[NL80211_ATTR_PROBE_RESP]) { + bcn->probe_resp = nla_data(attrs[NL80211_ATTR_PROBE_RESP]); + bcn->probe_resp_len = nla_len(attrs[NL80211_ATTR_PROBE_RESP]); } return 0; @@ -2999,8 +2983,6 @@ static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev; bool ret = false; - mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->wdev_list, list) { if (wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO) @@ -3014,8 +2996,6 @@ static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev, break; } - mutex_unlock(&rdev->devlist_mtx); - return ret; } @@ -3070,7 +3050,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) !info->attrs[NL80211_ATTR_BEACON_HEAD]) return -EINVAL; - err = nl80211_parse_beacon(info, ¶ms.beacon); + err = nl80211_parse_beacon(info->attrs, ¶ms.beacon); if (err) return err; @@ -3177,13 +3157,10 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) params.radar_required = true; } - mutex_lock(&rdev->devlist_mtx); err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, params.chandef.chan, CHAN_MODE_SHARED, radar_detect_width); - mutex_unlock(&rdev->devlist_mtx); - if (err) return err; @@ -3225,7 +3202,7 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) if (!wdev->beacon_interval) return -EINVAL; - err = nl80211_parse_beacon(info, ¶ms); + err = nl80211_parse_beacon(info->attrs, ¶ms); if (err) return err; @@ -3383,6 +3360,32 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, return true; } +static bool nl80211_put_signal(struct sk_buff *msg, u8 mask, s8 *signal, + int id) +{ + void *attr; + int i = 0; + + if (!mask) + return true; + + attr = nla_nest_start(msg, id); + if (!attr) + return false; + + for (i = 0; i < IEEE80211_MAX_CHAINS; i++) { + if (!(mask & BIT(i))) + continue; + + if (nla_put_u8(msg, i, signal[i])) + return false; + } + + nla_nest_end(msg, attr); + + return true; +} + static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, @@ -3454,6 +3457,18 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, default: break; } + if (sinfo->filled & STATION_INFO_CHAIN_SIGNAL) { + if (!nl80211_put_signal(msg, sinfo->chains, + sinfo->chain_signal, + NL80211_STA_INFO_CHAIN_SIGNAL)) + goto nla_put_failure; + } + if (sinfo->filled & STATION_INFO_CHAIN_SIGNAL_AVG) { + if (!nl80211_put_signal(msg, sinfo->chains, + sinfo->chain_signal_avg, + NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) + goto nla_put_failure; + } if (sinfo->filled & STATION_INFO_TX_BITRATE) { if (!nl80211_put_sta_rate(msg, &sinfo->txrate, NL80211_STA_INFO_TX_BITRATE)) @@ -3841,6 +3856,8 @@ static int nl80211_set_station_tdls(struct genl_info *info, struct station_parameters *params) { /* Dummy STA entry gets updated once the peer capabilities are known */ + if (info->attrs[NL80211_ATTR_PEER_AID]) + params->aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) params->ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); @@ -3981,7 +3998,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) return -EINVAL; - if (!info->attrs[NL80211_ATTR_STA_AID]) + if (!info->attrs[NL80211_ATTR_STA_AID] && + !info->attrs[NL80211_ATTR_PEER_AID]) return -EINVAL; mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); @@ -3992,7 +4010,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); - params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); + if (info->attrs[NL80211_ATTR_PEER_AID]) + params.aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); + else + params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); if (!params.aid || params.aid > IEEE80211_MAX_AID) return -EINVAL; @@ -4044,7 +4065,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD; /* TDLS peers cannot be added */ - if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) + if ((params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) || + info->attrs[NL80211_ATTR_PEER_AID]) return -EINVAL; /* but don't bother the driver with it */ params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); @@ -4070,7 +4092,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) return -EINVAL; /* TDLS peers cannot be added */ - if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) + if ((params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) || + info->attrs[NL80211_ATTR_PEER_AID]) return -EINVAL; break; case NL80211_IFTYPE_STATION: @@ -4592,7 +4615,9 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, nla_put_u32(msg, NL80211_MESHCONF_POWER_MODE, cur_params.power_mode) || nla_put_u16(msg, NL80211_MESHCONF_AWAKE_WINDOW, - cur_params.dot11MeshAwakeWindowDuration)) + cur_params.dot11MeshAwakeWindowDuration) || + nla_put_u32(msg, NL80211_MESHCONF_PLINK_TIMEOUT, + cur_params.plink_timeout)) goto nla_put_failure; nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); @@ -4633,6 +4658,7 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL] = { .type = NLA_U16 }, [NL80211_MESHCONF_POWER_MODE] = { .type = NLA_U32 }, [NL80211_MESHCONF_AWAKE_WINDOW] = { .type = NLA_U16 }, + [NL80211_MESHCONF_PLINK_TIMEOUT] = { .type = NLA_U32 }, }; static const struct nla_policy @@ -4641,6 +4667,7 @@ static const struct nla_policy [NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG }, + [NL80211_MESH_SETUP_AUTH_PROTOCOL] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_USERSPACE_MPM] = { .type = NLA_FLAG }, [NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, @@ -4743,9 +4770,9 @@ do { \ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, 0, 1, mask, NL80211_MESHCONF_FORWARDING, nla_get_u8); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, 1, 255, + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, -255, 0, mask, NL80211_MESHCONF_RSSI_THRESHOLD, - nla_get_u32); + nla_get_s32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, 0, 16, mask, NL80211_MESHCONF_HT_OPMODE, nla_get_u16); @@ -4769,6 +4796,9 @@ do { \ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration, 0, 65535, mask, NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, 1, 0xffffffff, + mask, NL80211_MESHCONF_PLINK_TIMEOUT, + nla_get_u32); if (mask_out) *mask_out = mask; @@ -4826,6 +4856,13 @@ static int nl80211_parse_mesh_setup(struct genl_info *info, if (setup->is_secure) setup->user_mpm = true; + if (tb[NL80211_MESH_SETUP_AUTH_PROTOCOL]) { + if (!setup->user_mpm) + return -EINVAL; + setup->auth_id = + nla_get_u8(tb[NL80211_MESH_SETUP_AUTH_PROTOCOL]); + } + return 0; } @@ -4868,18 +4905,13 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) void *hdr = NULL; struct nlattr *nl_reg_rules; unsigned int i; - int err = -EINVAL; - - mutex_lock(&cfg80211_mutex); if (!cfg80211_regdomain) - goto out; + return -EINVAL; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) { - err = -ENOBUFS; - goto out; - } + if (!msg) + return -ENOBUFS; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_REG); @@ -4938,8 +4970,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) nla_nest_end(msg, nl_reg_rules); genlmsg_end(msg, hdr); - err = genlmsg_reply(msg, info); - goto out; + return genlmsg_reply(msg, info); nla_put_failure_rcu: rcu_read_unlock(); @@ -4947,10 +4978,7 @@ nla_put_failure: genlmsg_cancel(msg, hdr); put_failure: nlmsg_free(msg); - err = -EMSGSIZE; -out: - mutex_unlock(&cfg80211_mutex); - return err; + return -EMSGSIZE; } static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) @@ -5016,12 +5044,9 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) } } - mutex_lock(&cfg80211_mutex); - r = set_regdom(rd); /* set_regdom took ownership */ rd = NULL; - mutex_unlock(&cfg80211_mutex); bad_reg: kfree(rd); @@ -5071,7 +5096,6 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->scan) return -EOPNOTSUPP; - mutex_lock(&rdev->sched_scan_mtx); if (rdev->scan_req) { err = -EBUSY; goto unlock; @@ -5257,7 +5281,6 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) } unlock: - mutex_unlock(&rdev->sched_scan_mtx); return err; } @@ -5329,8 +5352,6 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, if (ie_len > wiphy->max_sched_scan_ie_len) return -EINVAL; - mutex_lock(&rdev->sched_scan_mtx); - if (rdev->sched_scan_req) { err = -EINPROGRESS; goto out; @@ -5498,7 +5519,6 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, out_free: kfree(request); out: - mutex_unlock(&rdev->sched_scan_mtx); return err; } @@ -5506,17 +5526,12 @@ static int nl80211_stop_sched_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - int err; if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) || !rdev->ops->sched_scan_stop) return -EOPNOTSUPP; - mutex_lock(&rdev->sched_scan_mtx); - err = __cfg80211_stop_sched_scan(rdev, false); - mutex_unlock(&rdev->sched_scan_mtx); - - return err; + return __cfg80211_stop_sched_scan(rdev, false); } static int nl80211_start_radar_detection(struct sk_buff *skb, @@ -5548,12 +5563,11 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (!rdev->ops->start_radar_detection) return -EOPNOTSUPP; - mutex_lock(&rdev->devlist_mtx); err = cfg80211_can_use_iftype_chan(rdev, wdev, wdev->iftype, chandef.chan, CHAN_MODE_SHARED, BIT(chandef.width)); if (err) - goto err_locked; + return err; err = rdev->ops->start_radar_detection(&rdev->wiphy, dev, &chandef); if (!err) { @@ -5561,9 +5575,6 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, wdev->cac_started = true; wdev->cac_start_time = jiffies; } -err_locked: - mutex_unlock(&rdev->devlist_mtx); - return err; } @@ -5946,10 +5957,13 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) if (local_state_change) return 0; - return cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, - ssid, ssid_len, ie, ie_len, - key.p.key, key.p.key_len, key.idx, - sae_data, sae_data_len); + wdev_lock(dev->ieee80211_ptr); + err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, + ssid, ssid_len, ie, ie_len, + key.p.key, key.p.key_len, key.idx, + sae_data, sae_data_len); + wdev_unlock(dev->ieee80211_ptr); + return err; } static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, @@ -6116,9 +6130,12 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) } err = nl80211_crypto_settings(rdev, info, &req.crypto, 1); - if (!err) + if (!err) { + wdev_lock(dev->ieee80211_ptr); err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, ssid, ssid_len, &req); + wdev_unlock(dev->ieee80211_ptr); + } return err; } @@ -6128,7 +6145,7 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; const u8 *ie = NULL, *bssid; - int ie_len = 0; + int ie_len = 0, err; u16 reason_code; bool local_state_change; @@ -6163,8 +6180,11 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info) local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; - return cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code, - local_state_change); + wdev_lock(dev->ieee80211_ptr); + err = cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code, + local_state_change); + wdev_unlock(dev->ieee80211_ptr); + return err; } static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) @@ -6172,7 +6192,7 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; const u8 *ie = NULL, *bssid; - int ie_len = 0; + int ie_len = 0, err; u16 reason_code; bool local_state_change; @@ -6207,8 +6227,11 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; - return cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code, - local_state_change); + wdev_lock(dev->ieee80211_ptr); + err = cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code, + local_state_change); + wdev_unlock(dev->ieee80211_ptr); + return err; } static bool @@ -6295,11 +6318,16 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef)) return -EINVAL; - if (ibss.chandef.width > NL80211_CHAN_WIDTH_40) - return -EINVAL; - if (ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT && - !(rdev->wiphy.features & NL80211_FEATURE_HT_IBSS)) + switch (ibss.chandef.width) { + case NL80211_CHAN_WIDTH_20_NOHT: + break; + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_40: + if (rdev->wiphy.features & NL80211_FEATURE_HT_IBSS) + break; + default: return -EINVAL; + } ibss.channel_fixed = !!info->attrs[NL80211_ATTR_FREQ_FIXED]; ibss.privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; @@ -6426,6 +6454,8 @@ static int nl80211_testmode_dump(struct sk_buff *skb, void *data = NULL; int data_len = 0; + rtnl_lock(); + if (cb->args[0]) { /* * 0 is a valid index, but not valid for args[0], @@ -6437,18 +6467,16 @@ static int nl80211_testmode_dump(struct sk_buff *skb, nl80211_fam.attrbuf, nl80211_fam.maxattr, nl80211_policy); if (err) - return err; + goto out_err; - mutex_lock(&cfg80211_mutex); rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), nl80211_fam.attrbuf); if (IS_ERR(rdev)) { - mutex_unlock(&cfg80211_mutex); - return PTR_ERR(rdev); + err = PTR_ERR(rdev); + goto out_err; } phy_idx = rdev->wiphy_idx; rdev = NULL; - mutex_unlock(&cfg80211_mutex); if (nl80211_fam.attrbuf[NL80211_ATTR_TESTDATA]) cb->args[1] = @@ -6460,14 +6488,11 @@ static int nl80211_testmode_dump(struct sk_buff *skb, data_len = nla_len((void *)cb->args[1]); } - mutex_lock(&cfg80211_mutex); rdev = cfg80211_rdev_by_wiphy_idx(phy_idx); if (!rdev) { - mutex_unlock(&cfg80211_mutex); - return -ENOENT; + err = -ENOENT; + goto out_err; } - cfg80211_lock_rdev(rdev); - mutex_unlock(&cfg80211_mutex); if (!rdev->ops->testmode_dump) { err = -EOPNOTSUPP; @@ -6508,7 +6533,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, /* see above */ cb->args[0] = phy_idx + 1; out_err: - cfg80211_unlock_rdev(rdev); + rtnl_unlock(); return err; } @@ -6588,12 +6613,14 @@ EXPORT_SYMBOL(cfg80211_testmode_alloc_event_skb); void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) { + struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0]; void *hdr = ((void **)skb->cb)[1]; struct nlattr *data = ((void **)skb->cb)[2]; nla_nest_end(skb, data); genlmsg_end(skb, hdr); - genlmsg_multicast(skb, 0, nl80211_testmode_mcgrp.id, gfp); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), skb, 0, + nl80211_testmode_mcgrp.id, gfp); } EXPORT_SYMBOL(cfg80211_testmode_event); #endif @@ -6716,7 +6743,9 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) sizeof(connect.vht_capa)); } - err = cfg80211_connect(rdev, dev, &connect, connkeys); + wdev_lock(dev->ieee80211_ptr); + err = cfg80211_connect(rdev, dev, &connect, connkeys, NULL); + wdev_unlock(dev->ieee80211_ptr); if (err) kfree(connkeys); return err; @@ -6727,6 +6756,7 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; u16 reason; + int ret; if (!info->attrs[NL80211_ATTR_REASON_CODE]) reason = WLAN_REASON_DEAUTH_LEAVING; @@ -6740,7 +6770,10 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; - return cfg80211_disconnect(rdev, dev, reason, true); + wdev_lock(dev->ieee80211_ptr); + ret = cfg80211_disconnect(rdev, dev, reason, true); + wdev_unlock(dev->ieee80211_ptr); + return ret; } static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) @@ -7159,6 +7192,9 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; switch (wdev->iftype) { + case NL80211_IFTYPE_P2P_DEVICE: + if (!info->attrs[NL80211_ATTR_WIPHY_FREQ]) + return -EINVAL; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_P2P_CLIENT: @@ -7166,7 +7202,6 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_GO: - case NL80211_IFTYPE_P2P_DEVICE: break; default: return -EOPNOTSUPP; @@ -7194,9 +7229,18 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); - err = nl80211_parse_chandef(rdev, info, &chandef); - if (err) - return err; + /* get the channel if any has been specified, otherwise pass NULL to + * the driver. The latter will use the current one + */ + chandef.chan = NULL; + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { + err = nl80211_parse_chandef(rdev, info, &chandef); + if (err) + return err; + } + + if (!chandef.chan && offchan) + return -EINVAL; if (!dont_wait_for_ack) { msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); @@ -7501,6 +7545,23 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) setup.chandef.chan = NULL; } + if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) { + u8 *rates = nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); + int n_rates = + nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); + struct ieee80211_supported_band *sband; + + if (!setup.chandef.chan) + return -EINVAL; + + sband = rdev->wiphy.bands[setup.chandef.chan->band]; + + err = ieee80211_get_ratemask(sband, rates, n_rates, + &setup.basic_rates); + if (err) + return err; + } + return cfg80211_join_mesh(rdev, dev, &setup, &cfg); } @@ -7516,28 +7577,29 @@ static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info) static int nl80211_send_wowlan_patterns(struct sk_buff *msg, struct cfg80211_registered_device *rdev) { + struct cfg80211_wowlan *wowlan = rdev->wiphy.wowlan_config; struct nlattr *nl_pats, *nl_pat; int i, pat_len; - if (!rdev->wowlan->n_patterns) + if (!wowlan->n_patterns) return 0; nl_pats = nla_nest_start(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN); if (!nl_pats) return -ENOBUFS; - for (i = 0; i < rdev->wowlan->n_patterns; i++) { + for (i = 0; i < wowlan->n_patterns; i++) { nl_pat = nla_nest_start(msg, i + 1); if (!nl_pat) return -ENOBUFS; - pat_len = rdev->wowlan->patterns[i].pattern_len; + pat_len = wowlan->patterns[i].pattern_len; if (nla_put(msg, NL80211_WOWLAN_PKTPAT_MASK, DIV_ROUND_UP(pat_len, 8), - rdev->wowlan->patterns[i].mask) || + wowlan->patterns[i].mask) || nla_put(msg, NL80211_WOWLAN_PKTPAT_PATTERN, - pat_len, rdev->wowlan->patterns[i].pattern) || + pat_len, wowlan->patterns[i].pattern) || nla_put_u32(msg, NL80211_WOWLAN_PKTPAT_OFFSET, - rdev->wowlan->patterns[i].pkt_offset)) + wowlan->patterns[i].pkt_offset)) return -ENOBUFS; nla_nest_end(msg, nl_pat); } @@ -7596,16 +7658,15 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) void *hdr; u32 size = NLMSG_DEFAULT_SIZE; - if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns && - !rdev->wiphy.wowlan.tcp) + if (!rdev->wiphy.wowlan) return -EOPNOTSUPP; - if (rdev->wowlan && rdev->wowlan->tcp) { + if (rdev->wiphy.wowlan_config && rdev->wiphy.wowlan_config->tcp) { /* adjust size to have room for all the data */ - size += rdev->wowlan->tcp->tokens_size + - rdev->wowlan->tcp->payload_len + - rdev->wowlan->tcp->wake_len + - rdev->wowlan->tcp->wake_len / 8; + size += rdev->wiphy.wowlan_config->tcp->tokens_size + + rdev->wiphy.wowlan_config->tcp->payload_len + + rdev->wiphy.wowlan_config->tcp->wake_len + + rdev->wiphy.wowlan_config->tcp->wake_len / 8; } msg = nlmsg_new(size, GFP_KERNEL); @@ -7617,33 +7678,34 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) if (!hdr) goto nla_put_failure; - if (rdev->wowlan) { + if (rdev->wiphy.wowlan_config) { struct nlattr *nl_wowlan; nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS); if (!nl_wowlan) goto nla_put_failure; - if ((rdev->wowlan->any && + if ((rdev->wiphy.wowlan_config->any && nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || - (rdev->wowlan->disconnect && + (rdev->wiphy.wowlan_config->disconnect && nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || - (rdev->wowlan->magic_pkt && + (rdev->wiphy.wowlan_config->magic_pkt && nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || - (rdev->wowlan->gtk_rekey_failure && + (rdev->wiphy.wowlan_config->gtk_rekey_failure && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || - (rdev->wowlan->eap_identity_req && + (rdev->wiphy.wowlan_config->eap_identity_req && nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || - (rdev->wowlan->four_way_handshake && + (rdev->wiphy.wowlan_config->four_way_handshake && nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || - (rdev->wowlan->rfkill_release && + (rdev->wiphy.wowlan_config->rfkill_release && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) goto nla_put_failure; if (nl80211_send_wowlan_patterns(msg, rdev)) goto nla_put_failure; - if (nl80211_send_wowlan_tcp(msg, rdev->wowlan->tcp)) + if (nl80211_send_wowlan_tcp(msg, + rdev->wiphy.wowlan_config->tcp)) goto nla_put_failure; nla_nest_end(msg, nl_wowlan); @@ -7669,7 +7731,7 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, u32 data_size, wake_size, tokens_size = 0, wake_mask_size; int err, port; - if (!rdev->wiphy.wowlan.tcp) + if (!rdev->wiphy.wowlan->tcp) return -EINVAL; err = nla_parse(tb, MAX_NL80211_WOWLAN_TCP, @@ -7689,16 +7751,16 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, return -EINVAL; data_size = nla_len(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD]); - if (data_size > rdev->wiphy.wowlan.tcp->data_payload_max) + if (data_size > rdev->wiphy.wowlan->tcp->data_payload_max) return -EINVAL; if (nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) > - rdev->wiphy.wowlan.tcp->data_interval_max || + rdev->wiphy.wowlan->tcp->data_interval_max || nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) == 0) return -EINVAL; wake_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]); - if (wake_size > rdev->wiphy.wowlan.tcp->wake_payload_max) + if (wake_size > rdev->wiphy.wowlan->tcp->wake_payload_max) return -EINVAL; wake_mask_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_MASK]); @@ -7713,13 +7775,13 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, if (!tok->len || tokens_size % tok->len) return -EINVAL; - if (!rdev->wiphy.wowlan.tcp->tok) + if (!rdev->wiphy.wowlan->tcp->tok) return -EINVAL; - if (tok->len > rdev->wiphy.wowlan.tcp->tok->max_len) + if (tok->len > rdev->wiphy.wowlan->tcp->tok->max_len) return -EINVAL; - if (tok->len < rdev->wiphy.wowlan.tcp->tok->min_len) + if (tok->len < rdev->wiphy.wowlan->tcp->tok->min_len) return -EINVAL; - if (tokens_size > rdev->wiphy.wowlan.tcp->tok->bufsize) + if (tokens_size > rdev->wiphy.wowlan->tcp->tok->bufsize) return -EINVAL; if (tok->offset + tok->len > data_size) return -EINVAL; @@ -7727,7 +7789,7 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, if (tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]) { seq = nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]); - if (!rdev->wiphy.wowlan.tcp->seq) + if (!rdev->wiphy.wowlan->tcp->seq) return -EINVAL; if (seq->len == 0 || seq->len > 4) return -EINVAL; @@ -7808,17 +7870,16 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) struct nlattr *tb[NUM_NL80211_WOWLAN_TRIG]; struct cfg80211_wowlan new_triggers = {}; struct cfg80211_wowlan *ntrig; - struct wiphy_wowlan_support *wowlan = &rdev->wiphy.wowlan; + const struct wiphy_wowlan_support *wowlan = rdev->wiphy.wowlan; int err, i; - bool prev_enabled = rdev->wowlan; + bool prev_enabled = rdev->wiphy.wowlan_config; - if (!rdev->wiphy.wowlan.flags && !rdev->wiphy.wowlan.n_patterns && - !rdev->wiphy.wowlan.tcp) + if (!wowlan) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]) { cfg80211_rdev_free_wowlan(rdev); - rdev->wowlan = NULL; + rdev->wiphy.wowlan_config = NULL; goto set_wakeup; } @@ -7954,11 +8015,12 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) goto error; } cfg80211_rdev_free_wowlan(rdev); - rdev->wowlan = ntrig; + rdev->wiphy.wowlan_config = ntrig; set_wakeup: - if (rdev->ops->set_wakeup && prev_enabled != !!rdev->wowlan) - rdev_set_wakeup(rdev, rdev->wowlan); + if (rdev->ops->set_wakeup && + prev_enabled != !!rdev->wiphy.wowlan_config) + rdev_set_wakeup(rdev, rdev->wiphy.wowlan_config); return 0; error: @@ -8143,9 +8205,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) if (wdev->p2p_started) return 0; - mutex_lock(&rdev->devlist_mtx); err = cfg80211_can_add_interface(rdev, wdev->iftype); - mutex_unlock(&rdev->devlist_mtx); if (err) return err; @@ -8154,9 +8214,7 @@ static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) return err; wdev->p2p_started = true; - mutex_lock(&rdev->devlist_mtx); rdev->opencount++; - mutex_unlock(&rdev->devlist_mtx); return 0; } @@ -8172,11 +8230,7 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->stop_p2p_device) return -EOPNOTSUPP; - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); cfg80211_stop_p2p_device(rdev, wdev); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); return 0; } @@ -8319,11 +8373,11 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, info->user_ptr[0] = rdev; } else if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV || ops->internal_flags & NL80211_FLAG_NEED_WDEV) { - mutex_lock(&cfg80211_mutex); + ASSERT_RTNL(); + wdev = __cfg80211_wdev_from_attrs(genl_info_net(info), info->attrs); if (IS_ERR(wdev)) { - mutex_unlock(&cfg80211_mutex); if (rtnl) rtnl_unlock(); return PTR_ERR(wdev); @@ -8334,7 +8388,6 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV) { if (!dev) { - mutex_unlock(&cfg80211_mutex); if (rtnl) rtnl_unlock(); return -EINVAL; @@ -8348,7 +8401,6 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, if (dev) { if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP && !netif_running(dev)) { - mutex_unlock(&cfg80211_mutex); if (rtnl) rtnl_unlock(); return -ENETDOWN; @@ -8357,17 +8409,12 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, dev_hold(dev); } else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) { if (!wdev->p2p_started) { - mutex_unlock(&cfg80211_mutex); if (rtnl) rtnl_unlock(); return -ENETDOWN; } } - cfg80211_lock_rdev(rdev); - - mutex_unlock(&cfg80211_mutex); - info->user_ptr[0] = rdev; } @@ -8377,8 +8424,6 @@ static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb, static void nl80211_post_doit(struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { - if (info->user_ptr[0]) - cfg80211_unlock_rdev(info->user_ptr[0]); if (info->user_ptr[1]) { if (ops->internal_flags & NL80211_FLAG_NEED_WDEV) { struct wireless_dev *wdev = info->user_ptr[1]; @@ -8398,9 +8443,11 @@ static struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_GET_WIPHY, .doit = nl80211_get_wiphy, .dumpit = nl80211_dump_wiphy, + .done = nl80211_dump_wiphy_done, .policy = nl80211_policy, /* can be retrieved by unprivileged users */ - .internal_flags = NL80211_FLAG_NEED_WIPHY, + .internal_flags = NL80211_FLAG_NEED_WIPHY | + NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_WIPHY, @@ -8415,7 +8462,8 @@ static struct genl_ops nl80211_ops[] = { .dumpit = nl80211_dump_interface, .policy = nl80211_policy, /* can be retrieved by unprivileged users */ - .internal_flags = NL80211_FLAG_NEED_WDEV, + .internal_flags = NL80211_FLAG_NEED_WDEV | + NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_SET_INTERFACE, @@ -8574,6 +8622,7 @@ static struct genl_ops nl80211_ops[] = { .cmd = NL80211_CMD_GET_REG, .doit = nl80211_get_reg, .policy = nl80211_policy, + .internal_flags = NL80211_FLAG_NEED_RTNL, /* can be retrieved by unprivileged users */ }, { @@ -8581,6 +8630,7 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_set_reg, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_RTNL, }, { .cmd = NL80211_CMD_REQ_SET_REG, @@ -9014,13 +9064,13 @@ static struct genl_multicast_group nl80211_regulatory_mcgrp = { void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev) { struct sk_buff *msg; + struct nl80211_dump_wiphy_state state = {}; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; - if (nl80211_send_wiphy(rdev, msg, 0, 0, 0, - false, NULL, NULL, NULL) < 0) { + if (nl80211_send_wiphy(rdev, msg, 0, 0, 0, &state) < 0) { nlmsg_free(msg); return; } @@ -9036,8 +9086,6 @@ static int nl80211_add_scan_req(struct sk_buff *msg, struct nlattr *nest; int i; - lockdep_assert_held(&rdev->sched_scan_mtx); - if (WARN_ON(!req)) return 0; @@ -9344,31 +9392,27 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, NL80211_CMD_DISASSOCIATE, gfp); } -void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf, - size_t len) +void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf, + size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + const struct ieee80211_mgmt *mgmt = (void *)buf; + u32 cmd; - trace_cfg80211_send_unprot_deauth(dev); - nl80211_send_mlme_event(rdev, dev, buf, len, - NL80211_CMD_UNPROT_DEAUTHENTICATE, GFP_ATOMIC); -} -EXPORT_SYMBOL(cfg80211_send_unprot_deauth); + if (WARN_ON(len < 2)) + return; -void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf, - size_t len) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + if (ieee80211_is_deauth(mgmt->frame_control)) + cmd = NL80211_CMD_UNPROT_DEAUTHENTICATE; + else + cmd = NL80211_CMD_UNPROT_DISASSOCIATE; - trace_cfg80211_send_unprot_disassoc(dev); - nl80211_send_mlme_event(rdev, dev, buf, len, - NL80211_CMD_UNPROT_DISASSOCIATE, GFP_ATOMIC); + trace_cfg80211_rx_unprot_mlme_mgmt(dev, buf, len); + nl80211_send_mlme_event(rdev, dev, buf, len, cmd, GFP_ATOMIC); } -EXPORT_SYMBOL(cfg80211_send_unprot_disassoc); +EXPORT_SYMBOL(cfg80211_rx_unprot_mlme_mgmt); static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev, struct net_device *netdev, int cmd, @@ -9879,7 +9923,6 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct sk_buff *msg; void *hdr; - int err; u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid); if (!nlportid) @@ -9900,12 +9943,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr)) goto nla_put_failure; - err = genlmsg_end(msg, hdr); - if (err < 0) { - nlmsg_free(msg); - return true; - } - + genlmsg_end(msg, hdr); genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); return true; @@ -10028,7 +10066,8 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, genlmsg_end(msg, hdr); - genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); return; nla_put_failure: @@ -10348,10 +10387,7 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev, if (nl80211_send_chandef(msg, chandef)) goto nla_put_failure; - if (genlmsg_end(msg, hdr) < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -10417,7 +10453,6 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr, struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct sk_buff *msg; void *hdr; - int err; trace_cfg80211_probe_status(dev, addr, cookie, acked); @@ -10439,11 +10474,7 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr, (acked && nla_put_flag(msg, NL80211_ATTR_ACK))) goto nla_put_failure; - err = genlmsg_end(msg, hdr); - if (err < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -10509,7 +10540,7 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct sk_buff *msg; void *hdr; - int err, size = 200; + int size = 200; trace_cfg80211_report_wowlan_wakeup(wdev->wiphy, wdev, wakeup); @@ -10595,9 +10626,7 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, nla_nest_end(msg, reasons); } - err = genlmsg_end(msg, hdr); - if (err < 0) - goto free_msg; + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -10617,7 +10646,6 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct sk_buff *msg; void *hdr; - int err; trace_cfg80211_tdls_oper_request(wdev->wiphy, dev, peer, oper, reason_code); @@ -10640,11 +10668,7 @@ void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, nla_put_u16(msg, NL80211_ATTR_REASON_CODE, reason_code))) goto nla_put_failure; - err = genlmsg_end(msg, hdr); - if (err < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, gfp); @@ -10702,7 +10726,6 @@ void cfg80211_ft_event(struct net_device *netdev, struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct sk_buff *msg; void *hdr; - int err; trace_cfg80211_ft_event(wiphy, netdev, ft_event); @@ -10728,11 +10751,7 @@ void cfg80211_ft_event(struct net_device *netdev, nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len, ft_event->ric_ies); - err = genlmsg_end(msg, hdr); - if (err < 0) { - nlmsg_free(msg); - return; - } + genlmsg_end(msg, hdr); genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, nl80211_mlme_mcgrp.id, GFP_KERNEL); diff --git a/net/wireless/reg.c b/net/wireless/reg.c index cc35fbaa4578..de06d5d1287f 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -81,7 +81,10 @@ static struct regulatory_request core_request_world = { .country_ie_env = ENVIRON_ANY, }; -/* Receipt of information from last regulatory request */ +/* + * Receipt of information from last regulatory request, + * protected by RTNL (and can be accessed with RCU protection) + */ static struct regulatory_request __rcu *last_request = (void __rcu *)&core_request_world; @@ -96,39 +99,25 @@ static struct device_type reg_device_type = { * Central wireless core regulatory domains, we only need two, * the current one and a world regulatory domain in case we have no * information to give us an alpha2. + * (protected by RTNL, can be read under RCU) */ const struct ieee80211_regdomain __rcu *cfg80211_regdomain; /* - * Protects static reg.c components: - * - cfg80211_regdomain (if not used with RCU) - * - cfg80211_world_regdom - * - last_request (if not used with RCU) - * - reg_num_devs_support_basehint - */ -static DEFINE_MUTEX(reg_mutex); - -/* * Number of devices that registered to the core * that support cellular base station regulatory hints + * (protected by RTNL) */ static int reg_num_devs_support_basehint; -static inline void assert_reg_lock(void) -{ - lockdep_assert_held(®_mutex); -} - static const struct ieee80211_regdomain *get_cfg80211_regdom(void) { - return rcu_dereference_protected(cfg80211_regdomain, - lockdep_is_held(®_mutex)); + return rtnl_dereference(cfg80211_regdomain); } static const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy) { - return rcu_dereference_protected(wiphy->regd, - lockdep_is_held(®_mutex)); + return rtnl_dereference(wiphy->regd); } static void rcu_free_regdom(const struct ieee80211_regdomain *r) @@ -140,8 +129,7 @@ static void rcu_free_regdom(const struct ieee80211_regdomain *r) static struct regulatory_request *get_last_request(void) { - return rcu_dereference_check(last_request, - lockdep_is_held(®_mutex)); + return rcu_dereference_rtnl(last_request); } /* Used to queue up regulatory hints */ @@ -200,6 +188,7 @@ static const struct ieee80211_regdomain world_regdom = { } }; +/* protected by RTNL */ static const struct ieee80211_regdomain *cfg80211_world_regdom = &world_regdom; @@ -215,7 +204,7 @@ static void reset_regdomains(bool full_reset, const struct ieee80211_regdomain *r; struct regulatory_request *lr; - assert_reg_lock(); + ASSERT_RTNL(); r = get_cfg80211_regdom(); @@ -377,7 +366,7 @@ static void reg_regdb_search(struct work_struct *work) const struct ieee80211_regdomain *curdom, *regdom = NULL; int i; - mutex_lock(&cfg80211_mutex); + rtnl_lock(); mutex_lock(®_regdb_search_mutex); while (!list_empty(®_regdb_search_list)) { @@ -402,7 +391,7 @@ static void reg_regdb_search(struct work_struct *work) if (!IS_ERR_OR_NULL(regdom)) set_regdom(regdom); - mutex_unlock(&cfg80211_mutex); + rtnl_unlock(); } static DECLARE_WORK(reg_regdb_work, reg_regdb_search); @@ -936,13 +925,7 @@ static bool reg_request_cell_base(struct regulatory_request *request) bool reg_last_request_cell_base(void) { - bool val; - - mutex_lock(®_mutex); - val = reg_request_cell_base(get_last_request()); - mutex_unlock(®_mutex); - - return val; + return reg_request_cell_base(get_last_request()); } #ifdef CONFIG_CFG80211_CERTIFICATION_ONUS @@ -1225,7 +1208,7 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) struct cfg80211_registered_device *rdev; struct wiphy *wiphy; - assert_cfg80211_lock(); + ASSERT_RTNL(); list_for_each_entry(rdev, &cfg80211_rdev_list, list) { wiphy = &rdev->wiphy; @@ -1362,7 +1345,7 @@ get_reg_request_treatment(struct wiphy *wiphy, return REG_REQ_OK; return REG_REQ_ALREADY_SET; } - return 0; + return REG_REQ_OK; case NL80211_REGDOM_SET_BY_DRIVER: if (lr->initiator == NL80211_REGDOM_SET_BY_CORE) { if (regdom_changes(pending_request->alpha2)) @@ -1444,8 +1427,6 @@ static void reg_set_request_processed(void) * what it believes should be the current regulatory domain. * * Returns one of the different reg request treatment values. - * - * Caller must hold ®_mutex */ static enum reg_request_treatment __regulatory_hint(struct wiphy *wiphy, @@ -1570,21 +1551,19 @@ static void reg_process_pending_hints(void) { struct regulatory_request *reg_request, *lr; - mutex_lock(&cfg80211_mutex); - mutex_lock(®_mutex); lr = get_last_request(); /* When last_request->processed becomes true this will be rescheduled */ if (lr && !lr->processed) { REG_DBG_PRINT("Pending regulatory request, waiting for it to be processed...\n"); - goto out; + return; } spin_lock(®_requests_lock); if (list_empty(®_requests_list)) { spin_unlock(®_requests_lock); - goto out; + return; } reg_request = list_first_entry(®_requests_list, @@ -1595,10 +1574,6 @@ static void reg_process_pending_hints(void) spin_unlock(®_requests_lock); reg_process_hint(reg_request, reg_request->initiator); - -out: - mutex_unlock(®_mutex); - mutex_unlock(&cfg80211_mutex); } /* Processes beacon hints -- this has nothing to do with country IEs */ @@ -1607,9 +1582,6 @@ static void reg_process_pending_beacon_hints(void) struct cfg80211_registered_device *rdev; struct reg_beacon *pending_beacon, *tmp; - mutex_lock(&cfg80211_mutex); - mutex_lock(®_mutex); - /* This goes through the _pending_ beacon list */ spin_lock_bh(®_pending_beacons_lock); @@ -1626,14 +1598,14 @@ static void reg_process_pending_beacon_hints(void) } spin_unlock_bh(®_pending_beacons_lock); - mutex_unlock(®_mutex); - mutex_unlock(&cfg80211_mutex); } static void reg_todo(struct work_struct *work) { + rtnl_lock(); reg_process_pending_hints(); reg_process_pending_beacon_hints(); + rtnl_unlock(); } static void queue_regulatory_request(struct regulatory_request *request) @@ -1717,29 +1689,23 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) } EXPORT_SYMBOL(regulatory_hint); -/* - * We hold wdev_lock() here so we cannot hold cfg80211_mutex() and - * therefore cannot iterate over the rdev list here. - */ void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, const u8 *country_ie, u8 country_ie_len) { char alpha2[2]; enum environment_cap env = ENVIRON_ANY; - struct regulatory_request *request, *lr; - - mutex_lock(®_mutex); - lr = get_last_request(); - - if (unlikely(!lr)) - goto out; + struct regulatory_request *request = NULL, *lr; /* IE len must be evenly divisible by 2 */ if (country_ie_len & 0x01) - goto out; + return; if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN) - goto out; + return; + + request = kzalloc(sizeof(*request), GFP_KERNEL); + if (!request) + return; alpha2[0] = country_ie[0]; alpha2[1] = country_ie[1]; @@ -1749,19 +1715,21 @@ void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, else if (country_ie[2] == 'O') env = ENVIRON_OUTDOOR; + rcu_read_lock(); + lr = get_last_request(); + + if (unlikely(!lr)) + goto out; + /* * We will run this only upon a successful connection on cfg80211. * We leave conflict resolution to the workqueue, where can hold - * cfg80211_mutex. + * the RTNL. */ if (lr->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE && lr->wiphy_idx != WIPHY_IDX_INVALID) goto out; - request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); - if (!request) - goto out; - request->wiphy_idx = get_wiphy_idx(wiphy); request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; @@ -1769,8 +1737,10 @@ void regulatory_hint_11d(struct wiphy *wiphy, enum ieee80211_band band, request->country_ie_env = env; queue_regulatory_request(request); + request = NULL; out: - mutex_unlock(®_mutex); + kfree(request); + rcu_read_unlock(); } static void restore_alpha2(char *alpha2, bool reset_user) @@ -1858,8 +1828,7 @@ static void restore_regulatory_settings(bool reset_user) LIST_HEAD(tmp_reg_req_list); struct cfg80211_registered_device *rdev; - mutex_lock(&cfg80211_mutex); - mutex_lock(®_mutex); + ASSERT_RTNL(); reset_regdomains(true, &world_regdom); restore_alpha2(alpha2, reset_user); @@ -1914,9 +1883,6 @@ static void restore_regulatory_settings(bool reset_user) list_splice_tail_init(&tmp_reg_req_list, ®_requests_list); spin_unlock(®_requests_lock); - mutex_unlock(®_mutex); - mutex_unlock(&cfg80211_mutex); - REG_DBG_PRINT("Kicking the queue\n"); schedule_work(®_work); @@ -2231,7 +2197,6 @@ int set_regdom(const struct ieee80211_regdomain *rd) struct regulatory_request *lr; int r; - mutex_lock(®_mutex); lr = get_last_request(); /* Note that this doesn't update the wiphys, this is done below */ @@ -2241,14 +2206,12 @@ int set_regdom(const struct ieee80211_regdomain *rd) reg_set_request_processed(); kfree(rd); - goto out; + return r; } /* This would make this whole thing pointless */ - if (WARN_ON(!lr->intersect && rd != get_cfg80211_regdom())) { - r = -EINVAL; - goto out; - } + if (WARN_ON(!lr->intersect && rd != get_cfg80211_regdom())) + return -EINVAL; /* update all wiphys now with the new established regulatory domain */ update_all_wiphy_regulatory(lr->initiator); @@ -2259,10 +2222,7 @@ int set_regdom(const struct ieee80211_regdomain *rd) reg_set_request_processed(); - out: - mutex_unlock(®_mutex); - - return r; + return 0; } int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) @@ -2287,23 +2247,20 @@ int reg_device_uevent(struct device *dev, struct kobj_uevent_env *env) void wiphy_regulatory_register(struct wiphy *wiphy) { - mutex_lock(®_mutex); + struct regulatory_request *lr; if (!reg_dev_ignore_cell_hint(wiphy)) reg_num_devs_support_basehint++; - wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE); - - mutex_unlock(®_mutex); + lr = get_last_request(); + wiphy_update_regulatory(wiphy, lr->initiator); } -/* Caller must hold cfg80211_mutex */ void wiphy_regulatory_deregister(struct wiphy *wiphy) { struct wiphy *request_wiphy = NULL; struct regulatory_request *lr; - mutex_lock(®_mutex); lr = get_last_request(); if (!reg_dev_ignore_cell_hint(wiphy)) @@ -2316,18 +2273,18 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy) request_wiphy = wiphy_idx_to_wiphy(lr->wiphy_idx); if (!request_wiphy || request_wiphy != wiphy) - goto out; + return; lr->wiphy_idx = WIPHY_IDX_INVALID; lr->country_ie_env = ENVIRON_ANY; -out: - mutex_unlock(®_mutex); } static void reg_timeout_work(struct work_struct *work) { REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n"); + rtnl_lock(); restore_regulatory_settings(true); + rtnl_unlock(); } int __init regulatory_init(void) @@ -2385,9 +2342,9 @@ void regulatory_exit(void) cancel_delayed_work_sync(®_timeout); /* Lock to suppress warnings */ - mutex_lock(®_mutex); + rtnl_lock(); reset_regdomains(true, NULL); - mutex_unlock(®_mutex); + rtnl_unlock(); dev_set_uevent_suppress(®_pdev->dev, true); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index fd99ea495b7e..ae8c186b50d6 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -169,7 +169,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) union iwreq_data wrqu; #endif - lockdep_assert_held(&rdev->sched_scan_mtx); + ASSERT_RTNL(); request = rdev->scan_req; @@ -230,9 +230,9 @@ void __cfg80211_scan_done(struct work_struct *wk) rdev = container_of(wk, struct cfg80211_registered_device, scan_done_wk); - mutex_lock(&rdev->sched_scan_mtx); + rtnl_lock(); ___cfg80211_scan_done(rdev, false); - mutex_unlock(&rdev->sched_scan_mtx); + rtnl_unlock(); } void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) @@ -241,6 +241,7 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) WARN_ON(request != wiphy_to_dev(request->wiphy)->scan_req); request->aborted = aborted; + request->notified = true; queue_work(cfg80211_wq, &wiphy_to_dev(request->wiphy)->scan_done_wk); } EXPORT_SYMBOL(cfg80211_scan_done); @@ -255,7 +256,7 @@ void __cfg80211_sched_scan_results(struct work_struct *wk) request = rdev->sched_scan_req; - mutex_lock(&rdev->sched_scan_mtx); + rtnl_lock(); /* we don't have sched_scan_req anymore if the scan is stopping */ if (request) { @@ -270,7 +271,7 @@ void __cfg80211_sched_scan_results(struct work_struct *wk) nl80211_send_sched_scan_results(rdev, request->dev); } - mutex_unlock(&rdev->sched_scan_mtx); + rtnl_unlock(); } void cfg80211_sched_scan_results(struct wiphy *wiphy) @@ -289,9 +290,9 @@ void cfg80211_sched_scan_stopped(struct wiphy *wiphy) trace_cfg80211_sched_scan_stopped(wiphy); - mutex_lock(&rdev->sched_scan_mtx); + rtnl_lock(); __cfg80211_stop_sched_scan(rdev, true); - mutex_unlock(&rdev->sched_scan_mtx); + rtnl_unlock(); } EXPORT_SYMBOL(cfg80211_sched_scan_stopped); @@ -300,7 +301,7 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev, { struct net_device *dev; - lockdep_assert_held(&rdev->sched_scan_mtx); + ASSERT_RTNL(); if (!rdev->sched_scan_req) return -ENOENT; @@ -522,6 +523,7 @@ static int cmp_bss(struct cfg80211_bss *a, } } +/* Returned bss is reference counted and must be cleaned up appropriately. */ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, struct ieee80211_channel *channel, const u8 *bssid, @@ -677,6 +679,7 @@ static bool cfg80211_combine_bsses(struct cfg80211_registered_device *dev, return true; } +/* Returned bss is reference counted and must be cleaned up appropriately. */ static struct cfg80211_internal_bss * cfg80211_bss_update(struct cfg80211_registered_device *dev, struct cfg80211_internal_bss *tmp) @@ -865,6 +868,7 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen, return channel; } +/* Returned bss is reference counted and must be cleaned up appropriately. */ struct cfg80211_bss* cfg80211_inform_bss(struct wiphy *wiphy, struct ieee80211_channel *channel, @@ -922,6 +926,7 @@ cfg80211_inform_bss(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_inform_bss); +/* Returned bss is reference counted and must be cleaned up appropriately. */ struct cfg80211_bss * cfg80211_inform_bss_frame(struct wiphy *wiphy, struct ieee80211_channel *channel, @@ -1040,6 +1045,25 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) EXPORT_SYMBOL(cfg80211_unlink_bss); #ifdef CONFIG_CFG80211_WEXT +static struct cfg80211_registered_device * +cfg80211_get_dev_from_ifindex(struct net *net, int ifindex) +{ + struct cfg80211_registered_device *rdev; + struct net_device *dev; + + ASSERT_RTNL(); + + dev = dev_get_by_index(net, ifindex); + if (!dev) + return ERR_PTR(-ENODEV); + if (dev->ieee80211_ptr) + rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy); + else + rdev = ERR_PTR(-ENODEV); + dev_put(dev); + return rdev; +} + int cfg80211_wext_siwscan(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra) @@ -1062,7 +1086,6 @@ int cfg80211_wext_siwscan(struct net_device *dev, if (IS_ERR(rdev)) return PTR_ERR(rdev); - mutex_lock(&rdev->sched_scan_mtx); if (rdev->scan_req) { err = -EBUSY; goto out; @@ -1169,9 +1192,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, dev_hold(dev); } out: - mutex_unlock(&rdev->sched_scan_mtx); kfree(creq); - cfg80211_unlock_rdev(rdev); return err; } EXPORT_SYMBOL_GPL(cfg80211_wext_siwscan); @@ -1470,10 +1491,8 @@ int cfg80211_wext_giwscan(struct net_device *dev, if (IS_ERR(rdev)) return PTR_ERR(rdev); - if (rdev->scan_req) { - res = -EAGAIN; - goto out; - } + if (rdev->scan_req) + return -EAGAIN; res = ieee80211_scan_results(rdev, info, extra, data->length); data->length = 0; @@ -1482,8 +1501,6 @@ int cfg80211_wext_giwscan(struct net_device *dev, res = 0; } - out: - cfg80211_unlock_rdev(rdev); return res; } EXPORT_SYMBOL_GPL(cfg80211_wext_giwscan); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 3ed35c345cae..81c8a10d743c 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1,5 +1,7 @@ /* - * SME code for cfg80211's connect emulation. + * SME code for cfg80211 + * both driver SME event handling and the SME implementation + * (for nl80211's connect() and wext) * * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> * Copyright (C) 2009 Intel Corporation. All rights reserved. @@ -18,18 +20,26 @@ #include "reg.h" #include "rdev-ops.h" +/* + * Software SME in cfg80211, using auth/assoc/deauth calls to the + * driver. This is is for implementing nl80211's connect/disconnect + * and wireless extensions (if configured.) + */ + struct cfg80211_conn { struct cfg80211_connect_params params; /* these are sub-states of the _CONNECTING sme_state */ enum { - CFG80211_CONN_IDLE, CFG80211_CONN_SCANNING, CFG80211_CONN_SCAN_AGAIN, CFG80211_CONN_AUTHENTICATE_NEXT, CFG80211_CONN_AUTHENTICATING, + CFG80211_CONN_AUTH_FAILED, CFG80211_CONN_ASSOCIATE_NEXT, CFG80211_CONN_ASSOCIATING, - CFG80211_CONN_DEAUTH_ASSOC_FAIL, + CFG80211_CONN_ASSOC_FAILED, + CFG80211_CONN_DEAUTH, + CFG80211_CONN_CONNECTED, } state; u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; u8 *ie; @@ -37,45 +47,16 @@ struct cfg80211_conn { bool auto_auth, prev_bssid_valid; }; -static bool cfg80211_is_all_idle(void) -{ - struct cfg80211_registered_device *rdev; - struct wireless_dev *wdev; - bool is_all_idle = true; - - mutex_lock(&cfg80211_mutex); - - /* - * All devices must be idle as otherwise if you are actively - * scanning some new beacon hints could be learned and would - * count as new regulatory hints. - */ - list_for_each_entry(rdev, &cfg80211_rdev_list, list) { - cfg80211_lock_rdev(rdev); - list_for_each_entry(wdev, &rdev->wdev_list, list) { - wdev_lock(wdev); - if (wdev->sme_state != CFG80211_SME_IDLE) - is_all_idle = false; - wdev_unlock(wdev); - } - cfg80211_unlock_rdev(rdev); - } - - mutex_unlock(&cfg80211_mutex); - - return is_all_idle; -} - -static void disconnect_work(struct work_struct *work) +static void cfg80211_sme_free(struct wireless_dev *wdev) { - if (!cfg80211_is_all_idle()) + if (!wdev->conn) return; - regulatory_hint_disconnect(); + kfree(wdev->conn->ie); + kfree(wdev->conn); + wdev->conn = NULL; } -static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); - static int cfg80211_conn_scan(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); @@ -85,7 +66,6 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) ASSERT_RTNL(); ASSERT_RDEV_LOCK(rdev); ASSERT_WDEV_LOCK(wdev); - lockdep_assert_held(&rdev->sched_scan_mtx); if (rdev->scan_req) return -EBUSY; @@ -171,18 +151,23 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) params = &wdev->conn->params; switch (wdev->conn->state) { + case CFG80211_CONN_SCANNING: + /* didn't find it during scan ... */ + return -ENOENT; case CFG80211_CONN_SCAN_AGAIN: return cfg80211_conn_scan(wdev); case CFG80211_CONN_AUTHENTICATE_NEXT: BUG_ON(!rdev->ops->auth); wdev->conn->state = CFG80211_CONN_AUTHENTICATING; - return __cfg80211_mlme_auth(rdev, wdev->netdev, - params->channel, params->auth_type, - params->bssid, - params->ssid, params->ssid_len, - NULL, 0, - params->key, params->key_len, - params->key_idx, NULL, 0); + return cfg80211_mlme_auth(rdev, wdev->netdev, + params->channel, params->auth_type, + params->bssid, + params->ssid, params->ssid_len, + NULL, 0, + params->key, params->key_len, + params->key_idx, NULL, 0); + case CFG80211_CONN_AUTH_FAILED: + return -ENOTCONN; case CFG80211_CONN_ASSOCIATE_NEXT: BUG_ON(!rdev->ops->assoc); wdev->conn->state = CFG80211_CONN_ASSOCIATING; @@ -198,21 +183,27 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev) req.vht_capa = params->vht_capa; req.vht_capa_mask = params->vht_capa_mask; - err = __cfg80211_mlme_assoc(rdev, wdev->netdev, params->channel, - params->bssid, params->ssid, - params->ssid_len, &req); + err = cfg80211_mlme_assoc(rdev, wdev->netdev, params->channel, + params->bssid, params->ssid, + params->ssid_len, &req); if (err) - __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, - NULL, 0, - WLAN_REASON_DEAUTH_LEAVING, - false); + cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, + NULL, 0, + WLAN_REASON_DEAUTH_LEAVING, + false); return err; - case CFG80211_CONN_DEAUTH_ASSOC_FAIL: - __cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, - NULL, 0, - WLAN_REASON_DEAUTH_LEAVING, false); - /* return an error so that we call __cfg80211_connect_result() */ - return -EINVAL; + case CFG80211_CONN_ASSOC_FAILED: + cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, + NULL, 0, + WLAN_REASON_DEAUTH_LEAVING, false); + return -ENOTCONN; + case CFG80211_CONN_DEAUTH: + cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid, + NULL, 0, + WLAN_REASON_DEAUTH_LEAVING, false); + /* free directly, disconnected event already sent */ + cfg80211_sme_free(wdev); + return 0; default: return 0; } @@ -226,9 +217,6 @@ void cfg80211_conn_work(struct work_struct *work) u8 bssid_buf[ETH_ALEN], *bssid = NULL; rtnl_lock(); - cfg80211_lock_rdev(rdev); - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); list_for_each_entry(wdev, &rdev->wdev_list, list) { if (!wdev->netdev) @@ -239,7 +227,8 @@ void cfg80211_conn_work(struct work_struct *work) wdev_unlock(wdev); continue; } - if (wdev->sme_state != CFG80211_SME_CONNECTING || !wdev->conn) { + if (!wdev->conn || + wdev->conn->state == CFG80211_CONN_CONNECTED) { wdev_unlock(wdev); continue; } @@ -247,21 +236,21 @@ void cfg80211_conn_work(struct work_struct *work) memcpy(bssid_buf, wdev->conn->params.bssid, ETH_ALEN); bssid = bssid_buf; } - if (cfg80211_conn_do_work(wdev)) + if (cfg80211_conn_do_work(wdev)) { __cfg80211_connect_result( wdev->netdev, bssid, NULL, 0, NULL, 0, WLAN_STATUS_UNSPECIFIED_FAILURE, false, NULL); + cfg80211_sme_free(wdev); + } wdev_unlock(wdev); } - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); - cfg80211_unlock_rdev(rdev); rtnl_unlock(); } +/* Returned bss is reference counted and must be cleaned up appropriately. */ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); @@ -299,9 +288,6 @@ static void __cfg80211_sme_scan_done(struct net_device *dev) ASSERT_WDEV_LOCK(wdev); - if (wdev->sme_state != CFG80211_SME_CONNECTING) - return; - if (!wdev->conn) return; @@ -310,20 +296,10 @@ static void __cfg80211_sme_scan_done(struct net_device *dev) return; bss = cfg80211_get_conn_bss(wdev); - if (bss) { + if (bss) cfg80211_put_bss(&rdev->wiphy, bss); - } else { - /* not found */ - if (wdev->conn->state == CFG80211_CONN_SCAN_AGAIN) - schedule_work(&rdev->conn_work); - else - __cfg80211_connect_result( - wdev->netdev, - wdev->conn->params.bssid, - NULL, 0, NULL, 0, - WLAN_STATUS_UNSPECIFIED_FAILURE, - false, NULL); - } + else + schedule_work(&rdev->conn_work); } void cfg80211_sme_scan_done(struct net_device *dev) @@ -335,10 +311,8 @@ void cfg80211_sme_scan_done(struct net_device *dev) wdev_unlock(wdev); } -void cfg80211_sme_rx_auth(struct net_device *dev, - const u8 *buf, size_t len) +void cfg80211_sme_rx_auth(struct wireless_dev *wdev, const u8 *buf, size_t len) { - struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; @@ -346,11 +320,7 @@ void cfg80211_sme_rx_auth(struct net_device *dev, ASSERT_WDEV_LOCK(wdev); - /* should only RX auth frames when connecting */ - if (wdev->sme_state != CFG80211_SME_CONNECTING) - return; - - if (WARN_ON(!wdev->conn)) + if (!wdev->conn || wdev->conn->state == CFG80211_CONN_CONNECTED) return; if (status_code == WLAN_STATUS_NOT_SUPPORTED_AUTH_ALG && @@ -379,46 +349,239 @@ void cfg80211_sme_rx_auth(struct net_device *dev, wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT; schedule_work(&rdev->conn_work); } else if (status_code != WLAN_STATUS_SUCCESS) { - __cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, NULL, 0, + __cfg80211_connect_result(wdev->netdev, mgmt->bssid, + NULL, 0, NULL, 0, status_code, false, NULL); - } else if (wdev->sme_state == CFG80211_SME_CONNECTING && - wdev->conn->state == CFG80211_CONN_AUTHENTICATING) { + } else if (wdev->conn->state == CFG80211_CONN_AUTHENTICATING) { wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT; schedule_work(&rdev->conn_work); } } -bool cfg80211_sme_failed_reassoc(struct wireless_dev *wdev) +bool cfg80211_sme_rx_assoc_resp(struct wireless_dev *wdev, u16 status) { - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); - if (WARN_ON(!wdev->conn)) + if (!wdev->conn) return false; - if (!wdev->conn->prev_bssid_valid) + if (status == WLAN_STATUS_SUCCESS) { + wdev->conn->state = CFG80211_CONN_CONNECTED; return false; + } - /* - * Some stupid APs don't accept reassoc, so we - * need to fall back to trying regular assoc. - */ - wdev->conn->prev_bssid_valid = false; - wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT; + if (wdev->conn->prev_bssid_valid) { + /* + * Some stupid APs don't accept reassoc, so we + * need to fall back to trying regular assoc; + * return true so no event is sent to userspace. + */ + wdev->conn->prev_bssid_valid = false; + wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT; + schedule_work(&rdev->conn_work); + return true; + } + + wdev->conn->state = CFG80211_CONN_ASSOC_FAILED; schedule_work(&rdev->conn_work); + return false; +} - return true; +void cfg80211_sme_deauth(struct wireless_dev *wdev) +{ + cfg80211_sme_free(wdev); } -void cfg80211_sme_failed_assoc(struct wireless_dev *wdev) +void cfg80211_sme_auth_timeout(struct wireless_dev *wdev) { - struct wiphy *wiphy = wdev->wiphy; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + + if (!wdev->conn) + return; + + wdev->conn->state = CFG80211_CONN_AUTH_FAILED; + schedule_work(&rdev->conn_work); +} + +void cfg80211_sme_disassoc(struct wireless_dev *wdev) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + + if (!wdev->conn) + return; + + wdev->conn->state = CFG80211_CONN_DEAUTH; + schedule_work(&rdev->conn_work); +} + +void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + + if (!wdev->conn) + return; - wdev->conn->state = CFG80211_CONN_DEAUTH_ASSOC_FAIL; + wdev->conn->state = CFG80211_CONN_ASSOC_FAILED; schedule_work(&rdev->conn_work); } +static int cfg80211_sme_connect(struct wireless_dev *wdev, + struct cfg80211_connect_params *connect, + const u8 *prev_bssid) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct cfg80211_bss *bss; + int err; + + if (!rdev->ops->auth || !rdev->ops->assoc) + return -EOPNOTSUPP; + + if (wdev->current_bss) + return -EALREADY; + + if (WARN_ON(wdev->conn)) + return -EINPROGRESS; + + wdev->conn = kzalloc(sizeof(*wdev->conn), GFP_KERNEL); + if (!wdev->conn) + return -ENOMEM; + + /* + * Copy all parameters, and treat explicitly IEs, BSSID, SSID. + */ + memcpy(&wdev->conn->params, connect, sizeof(*connect)); + if (connect->bssid) { + wdev->conn->params.bssid = wdev->conn->bssid; + memcpy(wdev->conn->bssid, connect->bssid, ETH_ALEN); + } + + if (connect->ie) { + wdev->conn->ie = kmemdup(connect->ie, connect->ie_len, + GFP_KERNEL); + wdev->conn->params.ie = wdev->conn->ie; + if (!wdev->conn->ie) { + kfree(wdev->conn); + wdev->conn = NULL; + return -ENOMEM; + } + } + + if (connect->auth_type == NL80211_AUTHTYPE_AUTOMATIC) { + wdev->conn->auto_auth = true; + /* start with open system ... should mostly work */ + wdev->conn->params.auth_type = + NL80211_AUTHTYPE_OPEN_SYSTEM; + } else { + wdev->conn->auto_auth = false; + } + + wdev->conn->params.ssid = wdev->ssid; + wdev->conn->params.ssid_len = connect->ssid_len; + + /* see if we have the bss already */ + bss = cfg80211_get_conn_bss(wdev); + + if (prev_bssid) { + memcpy(wdev->conn->prev_bssid, prev_bssid, ETH_ALEN); + wdev->conn->prev_bssid_valid = true; + } + + /* we're good if we have a matching bss struct */ + if (bss) { + wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT; + err = cfg80211_conn_do_work(wdev); + cfg80211_put_bss(wdev->wiphy, bss); + } else { + /* otherwise we'll need to scan for the AP first */ + err = cfg80211_conn_scan(wdev); + + /* + * If we can't scan right now, then we need to scan again + * after the current scan finished, since the parameters + * changed (unless we find a good AP anyway). + */ + if (err == -EBUSY) { + err = 0; + wdev->conn->state = CFG80211_CONN_SCAN_AGAIN; + } + } + + if (err) + cfg80211_sme_free(wdev); + + return err; +} + +static int cfg80211_sme_disconnect(struct wireless_dev *wdev, u16 reason) +{ + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + int err; + + if (!wdev->conn) + return 0; + + if (!rdev->ops->deauth) + return -EOPNOTSUPP; + + if (wdev->conn->state == CFG80211_CONN_SCANNING || + wdev->conn->state == CFG80211_CONN_SCAN_AGAIN) { + err = 0; + goto out; + } + + /* wdev->conn->params.bssid must be set if > SCANNING */ + err = cfg80211_mlme_deauth(rdev, wdev->netdev, + wdev->conn->params.bssid, + NULL, 0, reason, false); + out: + cfg80211_sme_free(wdev); + return err; +} + +/* + * code shared for in-device and software SME + */ + +static bool cfg80211_is_all_idle(void) +{ + struct cfg80211_registered_device *rdev; + struct wireless_dev *wdev; + bool is_all_idle = true; + + /* + * All devices must be idle as otherwise if you are actively + * scanning some new beacon hints could be learned and would + * count as new regulatory hints. + */ + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + list_for_each_entry(wdev, &rdev->wdev_list, list) { + wdev_lock(wdev); + if (wdev->conn || wdev->current_bss) + is_all_idle = false; + wdev_unlock(wdev); + } + } + + return is_all_idle; +} + +static void disconnect_work(struct work_struct *work) +{ + rtnl_lock(); + if (cfg80211_is_all_idle()) + regulatory_hint_disconnect(); + rtnl_unlock(); +} + +static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); + + +/* + * API calls for drivers implementing connect/disconnect and + * SME event handling + */ + +/* This method must consume bss one way or another */ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, const u8 *req_ie, size_t req_ie_len, const u8 *resp_ie, size_t resp_ie_len, @@ -434,11 +597,10 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, ASSERT_WDEV_LOCK(wdev); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && - wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) - return; - - if (wdev->sme_state != CFG80211_SME_CONNECTING) + wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) { + cfg80211_put_bss(wdev->wiphy, bss); return; + } nl80211_send_connect_result(wiphy_to_dev(wdev->wiphy), dev, bssid, req_ie, req_ie_len, @@ -476,38 +638,30 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, wdev->current_bss = NULL; } - if (wdev->conn) - wdev->conn->state = CFG80211_CONN_IDLE; - if (status != WLAN_STATUS_SUCCESS) { - wdev->sme_state = CFG80211_SME_IDLE; - if (wdev->conn) - kfree(wdev->conn->ie); - kfree(wdev->conn); - wdev->conn = NULL; kfree(wdev->connect_keys); wdev->connect_keys = NULL; wdev->ssid_len = 0; - cfg80211_put_bss(wdev->wiphy, bss); + if (bss) { + cfg80211_unhold_bss(bss_from_pub(bss)); + cfg80211_put_bss(wdev->wiphy, bss); + } return; } - if (!bss) - bss = cfg80211_get_bss(wdev->wiphy, - wdev->conn ? wdev->conn->params.channel : - NULL, - bssid, + if (!bss) { + WARN_ON_ONCE(!wiphy_to_dev(wdev->wiphy)->ops->connect); + bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid, wdev->ssid, wdev->ssid_len, WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); + if (WARN_ON(!bss)) + return; + cfg80211_hold_bss(bss_from_pub(bss)); + } - if (WARN_ON(!bss)) - return; - - cfg80211_hold_bss(bss_from_pub(bss)); wdev->current_bss = bss_from_pub(bss); - wdev->sme_state = CFG80211_SME_CONNECTED; cfg80211_upload_connect_keys(wdev); rcu_read_lock(); @@ -543,8 +697,6 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, struct cfg80211_event *ev; unsigned long flags; - CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTING); - ev = kzalloc(sizeof(*ev) + req_ie_len + resp_ie_len, gfp); if (!ev) return; @@ -571,6 +723,7 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid, } EXPORT_SYMBOL(cfg80211_connect_result); +/* Consumes bss object one way or another */ void __cfg80211_roamed(struct wireless_dev *wdev, struct cfg80211_bss *bss, const u8 *req_ie, size_t req_ie_len, @@ -585,14 +738,9 @@ void __cfg80211_roamed(struct wireless_dev *wdev, wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) goto out; - if (wdev->sme_state != CFG80211_SME_CONNECTED) + if (WARN_ON(!wdev->current_bss)) goto out; - /* internal error -- how did we get to CONNECTED w/o BSS? */ - if (WARN_ON(!wdev->current_bss)) { - goto out; - } - cfg80211_unhold_bss(wdev->current_bss); cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); wdev->current_bss = NULL; @@ -641,8 +789,6 @@ void cfg80211_roamed(struct net_device *dev, struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_bss *bss; - CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTED); - bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, wdev->ssid, wdev->ssid_len, WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); @@ -654,6 +800,7 @@ void cfg80211_roamed(struct net_device *dev, } EXPORT_SYMBOL(cfg80211_roamed); +/* Consumes bss object one way or another */ void cfg80211_roamed_bss(struct net_device *dev, struct cfg80211_bss *bss, const u8 *req_ie, size_t req_ie_len, const u8 *resp_ie, @@ -664,8 +811,6 @@ void cfg80211_roamed_bss(struct net_device *dev, struct cfg80211_event *ev; unsigned long flags; - CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTED); - if (WARN_ON(!bss)) return; @@ -707,25 +852,14 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) return; - if (wdev->sme_state != CFG80211_SME_CONNECTED) - return; - if (wdev->current_bss) { cfg80211_unhold_bss(wdev->current_bss); cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); } wdev->current_bss = NULL; - wdev->sme_state = CFG80211_SME_IDLE; wdev->ssid_len = 0; - if (wdev->conn) { - kfree(wdev->conn->ie); - wdev->conn->ie = NULL; - kfree(wdev->conn); - wdev->conn = NULL; - } - nl80211_send_disconnected(rdev, dev, reason, ie, ie_len, from_ap); /* @@ -754,8 +888,6 @@ void cfg80211_disconnected(struct net_device *dev, u16 reason, struct cfg80211_event *ev; unsigned long flags; - CFG80211_DEV_WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTED); - ev = kzalloc(sizeof(*ev) + ie_len, gfp); if (!ev) return; @@ -773,21 +905,20 @@ void cfg80211_disconnected(struct net_device *dev, u16 reason, } EXPORT_SYMBOL(cfg80211_disconnected); -int __cfg80211_connect(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct cfg80211_connect_params *connect, - struct cfg80211_cached_keys *connkeys, - const u8 *prev_bssid) +/* + * API calls for nl80211/wext compatibility code + */ +int cfg80211_connect(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_connect_params *connect, + struct cfg80211_cached_keys *connkeys, + const u8 *prev_bssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_bss *bss = NULL; int err; ASSERT_WDEV_LOCK(wdev); - if (wdev->sme_state != CFG80211_SME_IDLE) - return -EALREADY; - if (WARN_ON(wdev->connect_keys)) { kfree(wdev->connect_keys); wdev->connect_keys = NULL; @@ -823,219 +954,43 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev, } } - if (!rdev->ops->connect) { - if (!rdev->ops->auth || !rdev->ops->assoc) - return -EOPNOTSUPP; - - if (WARN_ON(wdev->conn)) - return -EINPROGRESS; - - wdev->conn = kzalloc(sizeof(*wdev->conn), GFP_KERNEL); - if (!wdev->conn) - return -ENOMEM; - - /* - * Copy all parameters, and treat explicitly IEs, BSSID, SSID. - */ - memcpy(&wdev->conn->params, connect, sizeof(*connect)); - if (connect->bssid) { - wdev->conn->params.bssid = wdev->conn->bssid; - memcpy(wdev->conn->bssid, connect->bssid, ETH_ALEN); - } - - if (connect->ie) { - wdev->conn->ie = kmemdup(connect->ie, connect->ie_len, - GFP_KERNEL); - wdev->conn->params.ie = wdev->conn->ie; - if (!wdev->conn->ie) { - kfree(wdev->conn); - wdev->conn = NULL; - return -ENOMEM; - } - } - - if (connect->auth_type == NL80211_AUTHTYPE_AUTOMATIC) { - wdev->conn->auto_auth = true; - /* start with open system ... should mostly work */ - wdev->conn->params.auth_type = - NL80211_AUTHTYPE_OPEN_SYSTEM; - } else { - wdev->conn->auto_auth = false; - } - - memcpy(wdev->ssid, connect->ssid, connect->ssid_len); - wdev->ssid_len = connect->ssid_len; - wdev->conn->params.ssid = wdev->ssid; - wdev->conn->params.ssid_len = connect->ssid_len; - - /* see if we have the bss already */ - bss = cfg80211_get_conn_bss(wdev); - - wdev->sme_state = CFG80211_SME_CONNECTING; - wdev->connect_keys = connkeys; - - if (prev_bssid) { - memcpy(wdev->conn->prev_bssid, prev_bssid, ETH_ALEN); - wdev->conn->prev_bssid_valid = true; - } - - /* we're good if we have a matching bss struct */ - if (bss) { - wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT; - err = cfg80211_conn_do_work(wdev); - cfg80211_put_bss(wdev->wiphy, bss); - } else { - /* otherwise we'll need to scan for the AP first */ - err = cfg80211_conn_scan(wdev); - /* - * If we can't scan right now, then we need to scan again - * after the current scan finished, since the parameters - * changed (unless we find a good AP anyway). - */ - if (err == -EBUSY) { - err = 0; - wdev->conn->state = CFG80211_CONN_SCAN_AGAIN; - } - } - if (err) { - kfree(wdev->conn->ie); - kfree(wdev->conn); - wdev->conn = NULL; - wdev->sme_state = CFG80211_SME_IDLE; - wdev->connect_keys = NULL; - wdev->ssid_len = 0; - } + wdev->connect_keys = connkeys; + memcpy(wdev->ssid, connect->ssid, connect->ssid_len); + wdev->ssid_len = connect->ssid_len; - return err; - } else { - wdev->sme_state = CFG80211_SME_CONNECTING; - wdev->connect_keys = connkeys; + if (!rdev->ops->connect) + err = cfg80211_sme_connect(wdev, connect, prev_bssid); + else err = rdev_connect(rdev, dev, connect); - if (err) { - wdev->connect_keys = NULL; - wdev->sme_state = CFG80211_SME_IDLE; - return err; - } - memcpy(wdev->ssid, connect->ssid, connect->ssid_len); - wdev->ssid_len = connect->ssid_len; - - return 0; + if (err) { + wdev->connect_keys = NULL; + wdev->ssid_len = 0; + return err; } -} -int cfg80211_connect(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct cfg80211_connect_params *connect, - struct cfg80211_cached_keys *connkeys) -{ - int err; - - mutex_lock(&rdev->devlist_mtx); - /* might request scan - scan_mtx -> wdev_mtx dependency */ - mutex_lock(&rdev->sched_scan_mtx); - wdev_lock(dev->ieee80211_ptr); - err = __cfg80211_connect(rdev, dev, connect, connkeys, NULL); - wdev_unlock(dev->ieee80211_ptr); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); - - return err; + return 0; } -int __cfg80211_disconnect(struct cfg80211_registered_device *rdev, - struct net_device *dev, u16 reason, bool wextev) +int cfg80211_disconnect(struct cfg80211_registered_device *rdev, + struct net_device *dev, u16 reason, bool wextev) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; ASSERT_WDEV_LOCK(wdev); - if (wdev->sme_state == CFG80211_SME_IDLE) - return -EINVAL; - kfree(wdev->connect_keys); wdev->connect_keys = NULL; - if (!rdev->ops->disconnect) { - if (!rdev->ops->deauth) - return -EOPNOTSUPP; - - /* was it connected by userspace SME? */ - if (!wdev->conn) { - cfg80211_mlme_down(rdev, dev); - goto disconnect; - } - - if (wdev->sme_state == CFG80211_SME_CONNECTING && - (wdev->conn->state == CFG80211_CONN_SCANNING || - wdev->conn->state == CFG80211_CONN_SCAN_AGAIN)) { - wdev->sme_state = CFG80211_SME_IDLE; - kfree(wdev->conn->ie); - kfree(wdev->conn); - wdev->conn = NULL; - wdev->ssid_len = 0; - return 0; - } - - /* wdev->conn->params.bssid must be set if > SCANNING */ - err = __cfg80211_mlme_deauth(rdev, dev, - wdev->conn->params.bssid, - NULL, 0, reason, false); - if (err) - return err; + if (wdev->conn) { + err = cfg80211_sme_disconnect(wdev, reason); + } else if (!rdev->ops->disconnect) { + cfg80211_mlme_down(rdev, dev); + err = 0; } else { err = rdev_disconnect(rdev, dev, reason); - if (err) - return err; } - disconnect: - if (wdev->sme_state == CFG80211_SME_CONNECTED) - __cfg80211_disconnected(dev, NULL, 0, 0, false); - else if (wdev->sme_state == CFG80211_SME_CONNECTING) - __cfg80211_connect_result(dev, NULL, NULL, 0, NULL, 0, - WLAN_STATUS_UNSPECIFIED_FAILURE, - wextev, NULL); - - return 0; -} - -int cfg80211_disconnect(struct cfg80211_registered_device *rdev, - struct net_device *dev, - u16 reason, bool wextev) -{ - int err; - - wdev_lock(dev->ieee80211_ptr); - err = __cfg80211_disconnect(rdev, dev, reason, wextev); - wdev_unlock(dev->ieee80211_ptr); - return err; } - -void cfg80211_sme_disassoc(struct net_device *dev, - struct cfg80211_internal_bss *bss) -{ - struct wireless_dev *wdev = dev->ieee80211_ptr; - struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); - u8 bssid[ETH_ALEN]; - - ASSERT_WDEV_LOCK(wdev); - - if (!wdev->conn) - return; - - if (wdev->conn->state == CFG80211_CONN_IDLE) - return; - - /* - * Ok, so the association was made by this SME -- we don't - * want it any more so deauthenticate too. - */ - - memcpy(bssid, bss->pub.bssid, ETH_ALEN); - - __cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0, - WLAN_REASON_DEAUTH_LEAVING, false); -} diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 8f28b9f798d8..a23253e06358 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -83,6 +83,7 @@ static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env) return 0; } +#ifdef CONFIG_PM static void cfg80211_leave_all(struct cfg80211_registered_device *rdev) { struct wireless_dev *wdev; @@ -100,10 +101,10 @@ static int wiphy_suspend(struct device *dev, pm_message_t state) rtnl_lock(); if (rdev->wiphy.registered) { - if (!rdev->wowlan) + if (!rdev->wiphy.wowlan_config) cfg80211_leave_all(rdev); if (rdev->ops->suspend) - ret = rdev_suspend(rdev, rdev->wowlan); + ret = rdev_suspend(rdev, rdev->wiphy.wowlan_config); if (ret == 1) { /* Driver refuse to configure wowlan */ cfg80211_leave_all(rdev); @@ -132,6 +133,7 @@ static int wiphy_resume(struct device *dev) return ret; } +#endif static const void *wiphy_namespace(struct device *d) { @@ -146,8 +148,10 @@ struct class ieee80211_class = { .dev_release = wiphy_dev_release, .dev_attrs = ieee80211_dev_attrs, .dev_uevent = wiphy_uevent, +#ifdef CONFIG_PM .suspend = wiphy_suspend, .resume = wiphy_resume, +#endif .ns_type = &net_ns_type_operations, .namespace = wiphy_namespace, }; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 5755bc14abbd..e1534baf2ebb 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1911,24 +1911,46 @@ TRACE_EVENT(cfg80211_send_rx_assoc, NETDEV_PR_ARG, MAC_PR_ARG(bssid), CHAN_PR_ARG) ); -DEFINE_EVENT(netdev_evt_only, __cfg80211_send_deauth, - TP_PROTO(struct net_device *netdev), - TP_ARGS(netdev) +DECLARE_EVENT_CLASS(netdev_frame_event, + TP_PROTO(struct net_device *netdev, const u8 *buf, int len), + TP_ARGS(netdev, buf, len), + TP_STRUCT__entry( + NETDEV_ENTRY + __dynamic_array(u8, frame, len) + ), + TP_fast_assign( + NETDEV_ASSIGN; + memcpy(__get_dynamic_array(frame), buf, len); + ), + TP_printk(NETDEV_PR_FMT ", ftype:0x%.2x", + NETDEV_PR_ARG, + le16_to_cpup((__le16 *)__get_dynamic_array(frame))) ); -DEFINE_EVENT(netdev_evt_only, __cfg80211_send_disassoc, - TP_PROTO(struct net_device *netdev), - TP_ARGS(netdev) +DEFINE_EVENT(netdev_frame_event, cfg80211_rx_unprot_mlme_mgmt, + TP_PROTO(struct net_device *netdev, const u8 *buf, int len), + TP_ARGS(netdev, buf, len) ); -DEFINE_EVENT(netdev_evt_only, cfg80211_send_unprot_deauth, - TP_PROTO(struct net_device *netdev), - TP_ARGS(netdev) +DEFINE_EVENT(netdev_frame_event, cfg80211_rx_mlme_mgmt, + TP_PROTO(struct net_device *netdev, const u8 *buf, int len), + TP_ARGS(netdev, buf, len) ); -DEFINE_EVENT(netdev_evt_only, cfg80211_send_unprot_disassoc, - TP_PROTO(struct net_device *netdev), - TP_ARGS(netdev) +TRACE_EVENT(cfg80211_tx_mlme_mgmt, + TP_PROTO(struct net_device *netdev, const u8 *buf, int len), + TP_ARGS(netdev, buf, len), + TP_STRUCT__entry( + NETDEV_ENTRY + __dynamic_array(u8, frame, len) + ), + TP_fast_assign( + NETDEV_ASSIGN; + memcpy(__get_dynamic_array(frame), buf, len); + ), + TP_printk(NETDEV_PR_FMT ", ftype:0x%.2x", + NETDEV_PR_ARG, + le16_to_cpup((__le16 *)__get_dynamic_array(frame))) ); DECLARE_EVENT_CLASS(netdev_mac_evt, diff --git a/net/wireless/util.c b/net/wireless/util.c index f5ad4d94ba88..74458b7f61eb 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -33,6 +33,29 @@ ieee80211_get_response_rate(struct ieee80211_supported_band *sband, } EXPORT_SYMBOL(ieee80211_get_response_rate); +u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband) +{ + struct ieee80211_rate *bitrates; + u32 mandatory_rates = 0; + enum ieee80211_rate_flags mandatory_flag; + int i; + + if (WARN_ON(!sband)) + return 1; + + if (sband->band == IEEE80211_BAND_2GHZ) + mandatory_flag = IEEE80211_RATE_MANDATORY_B; + else + mandatory_flag = IEEE80211_RATE_MANDATORY_A; + + bitrates = sband->bitrates; + for (i = 0; i < sband->n_bitrates; i++) + if (bitrates[i].flags & mandatory_flag) + mandatory_rates |= BIT(i); + return mandatory_rates; +} +EXPORT_SYMBOL(ieee80211_mandatory_rates); + int ieee80211_channel_to_frequency(int chan, enum ieee80211_band band) { /* see 802.11 17.3.8.3.2 and Annex J @@ -785,12 +808,8 @@ void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev) ASSERT_RTNL(); ASSERT_RDEV_LOCK(rdev); - mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->wdev_list, list) cfg80211_process_wdev_events(wdev); - - mutex_unlock(&rdev->devlist_mtx); } int cfg80211_change_iface(struct cfg80211_registered_device *rdev, @@ -822,10 +841,8 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, return -EBUSY; if (ntype != otype && netif_running(dev)) { - mutex_lock(&rdev->devlist_mtx); err = cfg80211_can_change_interface(rdev, dev->ieee80211_ptr, ntype); - mutex_unlock(&rdev->devlist_mtx); if (err) return err; @@ -841,8 +858,10 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: + wdev_lock(dev->ieee80211_ptr); cfg80211_disconnect(rdev, dev, WLAN_REASON_DEAUTH_LEAVING, true); + wdev_unlock(dev->ieee80211_ptr); break; case NL80211_IFTYPE_MESH_POINT: /* mesh should be handled? */ @@ -1169,6 +1188,9 @@ bool ieee80211_operating_class_to_band(u8 operating_class, case 84: *band = IEEE80211_BAND_2GHZ; return true; + case 180: + *band = IEEE80211_BAND_60GHZ; + return true; } return false; @@ -1184,8 +1206,6 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, if (!beacon_int) return -EINVAL; - mutex_lock(&rdev->devlist_mtx); - list_for_each_entry(wdev, &rdev->wdev_list, list) { if (!wdev->beacon_interval) continue; @@ -1195,8 +1215,6 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, } } - mutex_unlock(&rdev->devlist_mtx); - return res; } @@ -1220,7 +1238,6 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev, int i, j; ASSERT_RTNL(); - lockdep_assert_held(&rdev->devlist_mtx); if (WARN_ON(hweight32(radar_detect) > 1)) return -EINVAL; diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index d997d0f0c54a..e7c6e862580d 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -72,7 +72,6 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, struct cfg80211_registered_device *rdev; struct vif_params vifparams; enum nl80211_iftype type; - int ret; rdev = wiphy_to_dev(wdev->wiphy); @@ -98,11 +97,7 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, memset(&vifparams, 0, sizeof(vifparams)); - cfg80211_lock_rdev(rdev); - ret = cfg80211_change_iface(rdev, dev, type, NULL, &vifparams); - cfg80211_unlock_rdev(rdev); - - return ret; + return cfg80211_change_iface(rdev, dev, type, NULL, &vifparams); } EXPORT_SYMBOL_GPL(cfg80211_wext_siwmode); @@ -579,13 +574,10 @@ static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, { int err; - /* devlist mutex needed for possible IBSS re-join */ - mutex_lock(&rdev->devlist_mtx); wdev_lock(dev->ieee80211_ptr); err = __cfg80211_set_encryption(rdev, dev, pairwise, addr, remove, tx_key, idx, params); wdev_unlock(dev->ieee80211_ptr); - mutex_unlock(&rdev->devlist_mtx); return err; } @@ -787,7 +779,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, struct cfg80211_chan_def chandef = { .width = NL80211_CHAN_WIDTH_20_NOHT, }; - int freq, err; + int freq; switch (wdev->iftype) { case NL80211_IFTYPE_STATION: @@ -804,10 +796,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); if (!chandef.chan) return -EINVAL; - mutex_lock(&rdev->devlist_mtx); - err = cfg80211_set_monitor_channel(rdev, &chandef); - mutex_unlock(&rdev->devlist_mtx); - return err; + return cfg80211_set_monitor_channel(rdev, &chandef); case NL80211_IFTYPE_MESH_POINT: freq = cfg80211_wext_freq(wdev->wiphy, wextfreq); if (freq < 0) @@ -818,10 +807,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); if (!chandef.chan) return -EINVAL; - mutex_lock(&rdev->devlist_mtx); - err = cfg80211_set_mesh_channel(rdev, wdev, &chandef); - mutex_unlock(&rdev->devlist_mtx); - return err; + return cfg80211_set_mesh_channel(rdev, wdev, &chandef); default: return -EOPNOTSUPP; } diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index e79cb5c0655a..14c9a2583ba0 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -54,8 +54,8 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, if (wdev->wext.prev_bssid_valid) prev_bssid = wdev->wext.prev_bssid; - err = __cfg80211_connect(rdev, wdev->netdev, - &wdev->wext.connect, ck, prev_bssid); + err = cfg80211_connect(rdev, wdev->netdev, + &wdev->wext.connect, ck, prev_bssid); if (err) kfree(ck); @@ -87,12 +87,9 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, return -EINVAL; } - cfg80211_lock_rdev(rdev); - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); - if (wdev->sme_state != CFG80211_SME_IDLE) { + if (wdev->conn) { bool event = true; if (wdev->wext.connect.channel == chan) { @@ -103,8 +100,8 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, /* if SSID set, we'll try right again, avoid event */ if (wdev->wext.connect.ssid_len) event = false; - err = __cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, event); + err = cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, event); if (err) goto out; } @@ -136,9 +133,6 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); - cfg80211_unlock_rdev(rdev); return err; } @@ -190,14 +184,11 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, if (len > 0 && ssid[len - 1] == '\0') len--; - cfg80211_lock_rdev(rdev); - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); err = 0; - if (wdev->sme_state != CFG80211_SME_IDLE) { + if (wdev->conn) { bool event = true; if (wdev->wext.connect.ssid && len && @@ -208,8 +199,8 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, /* if SSID set now, we'll try to connect, avoid event */ if (len) event = false; - err = __cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, event); + err = cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, event); if (err) goto out; } @@ -226,9 +217,6 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); - cfg80211_unlock_rdev(rdev); return err; } @@ -287,12 +275,9 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, if (is_zero_ether_addr(bssid) || is_broadcast_ether_addr(bssid)) bssid = NULL; - cfg80211_lock_rdev(rdev); - mutex_lock(&rdev->devlist_mtx); - mutex_lock(&rdev->sched_scan_mtx); wdev_lock(wdev); - if (wdev->sme_state != CFG80211_SME_IDLE) { + if (wdev->conn) { err = 0; /* both automatic */ if (!bssid && !wdev->wext.connect.bssid) @@ -303,8 +288,8 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, ether_addr_equal(bssid, wdev->wext.connect.bssid)) goto out; - err = __cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, false); + err = cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, false); if (err) goto out; } @@ -318,9 +303,6 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev, err = cfg80211_mgd_wext_connect(rdev, wdev); out: wdev_unlock(wdev); - mutex_unlock(&rdev->sched_scan_mtx); - mutex_unlock(&rdev->devlist_mtx); - cfg80211_unlock_rdev(rdev); return err; } @@ -382,9 +364,9 @@ int cfg80211_wext_siwgenie(struct net_device *dev, wdev->wext.ie = ie; wdev->wext.ie_len = ie_len; - if (wdev->sme_state != CFG80211_SME_IDLE) { - err = __cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, false); + if (wdev->conn) { + err = cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, false); if (err) goto out; } @@ -420,8 +402,7 @@ int cfg80211_wext_siwmlme(struct net_device *dev, switch (mlme->cmd) { case IW_MLME_DEAUTH: case IW_MLME_DISASSOC: - err = __cfg80211_disconnect(rdev, dev, mlme->reason_code, - true); + err = cfg80211_disconnect(rdev, dev, mlme->reason_code, true); break; default: err = -EOPNOTSUPP; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 37ca9694aabe..45a3ab5612c1 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -224,7 +224,7 @@ static void x25_kill_by_device(struct net_device *dev) static int x25_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct x25_neigh *nb; if (!net_eq(dev_net(dev), &init_net)) @@ -1583,11 +1583,11 @@ out_cud_release: case SIOCX25CALLACCPTAPPRV: { rc = -EINVAL; lock_sock(sk); - if (sk->sk_state != TCP_CLOSE) - break; - clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); + if (sk->sk_state == TCP_CLOSE) { + clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); + rc = 0; + } release_sock(sk); - rc = 0; break; } @@ -1595,14 +1595,15 @@ out_cud_release: rc = -EINVAL; lock_sock(sk); if (sk->sk_state != TCP_ESTABLISHED) - break; + goto out_sendcallaccpt_release; /* must call accptapprv above */ if (test_bit(X25_ACCPT_APPRV_FLAG, &x25->flags)) - break; + goto out_sendcallaccpt_release; x25_write_internal(sk, X25_CALL_ACCEPTED); x25->state = X25_STATE_3; - release_sock(sk); rc = 0; +out_sendcallaccpt_release: + release_sock(sk); break; } diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index ab2bb42fe094..88843996f935 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -163,6 +163,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) skb->sp->xvec[skb->sp->len++] = x; spin_lock(&x->lock); + if (unlikely(x->km.state == XFRM_STATE_ACQ)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); + goto drop_unlock; + } + if (unlikely(x->km.state != XFRM_STATE_VALID)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEINVALID); goto drop_unlock; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 0cf003dfa8fc..eb4a84288648 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -89,7 +89,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) err = x->type->output(x, skb); if (err == -EINPROGRESS) - goto out_exit; + goto out; resume: if (err) { @@ -107,15 +107,14 @@ resume: x = dst->xfrm; } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); - err = 0; + return 0; -out_exit: - return err; error: spin_unlock_bh(&x->lock); error_nolock: kfree_skb(skb); - goto out_exit; +out: + return err; } int xfrm_output_resume(struct sk_buff *skb, int err) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ea970b8002a2..e52cab3591dd 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2785,7 +2785,7 @@ static void __net_init xfrm_dst_ops_init(struct net *net) static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_DOWN: diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index c721b0d9ab8b..80cd1e55b834 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -44,6 +44,7 @@ static const struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR), SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR), SNMP_MIB_ITEM("XfrmOutStateInvalid", LINUX_MIB_XFRMOUTSTATEINVALID), + SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR), SNMP_MIB_SENTINEL }; |