diff options
author | David S. Miller <davem@davemloft.net> | 2015-07-31 23:52:20 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-07-31 23:52:20 -0700 |
commit | 5510b3c2a173921374ec847848fb20b98e1c698a (patch) | |
tree | c9e185281ef17280ce0dc30be7923124874736b0 /net | |
parent | 17f901e8915cb922c2ca710835ef34f166f53ee9 (diff) | |
parent | 7c764cec3703583247c4ab837c652975a3d41f4b (diff) | |
download | lwn-5510b3c2a173921374ec847848fb20b98e1c698a.tar.gz lwn-5510b3c2a173921374ec847848fb20b98e1c698a.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Conflicts:
arch/s390/net/bpf_jit_comp.c
drivers/net/ethernet/ti/netcp_ethss.c
net/bridge/br_multicast.c
net/ipv4/ip_fragment.c
All four conflicts were cases of simple overlapping
changes.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
47 files changed, 427 insertions, 242 deletions
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 9dd49ca67dbc..6e70ddb158b4 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -704,6 +704,7 @@ static void p9_virtio_remove(struct virtio_device *vdev) mutex_unlock(&virtio_9p_lock); + vdev->config->reset(vdev); vdev->config->del_vqs(vdev); sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 3d0f7d2a0616..ad82324f710f 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -2312,6 +2312,10 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) return 1; chan = conn->smp; + if (!chan) { + BT_ERR("SMP security requested but not available"); + return 1; + } if (!hci_dev_test_flag(hcon->hdev, HCI_LE_ENABLED)) return 1; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 0ff6e1bbca91..fa7bfced888e 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -37,15 +37,30 @@ static inline int should_deliver(const struct net_bridge_port *p, int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb) { - if (!is_skb_forwardable(skb->dev, skb)) { - kfree_skb(skb); - } else { - skb_push(skb, ETH_HLEN); - br_drop_fake_rtable(skb); - skb_sender_cpu_clear(skb); - dev_queue_xmit(skb); + if (!is_skb_forwardable(skb->dev, skb)) + goto drop; + + skb_push(skb, ETH_HLEN); + br_drop_fake_rtable(skb); + skb_sender_cpu_clear(skb); + + if (skb->ip_summed == CHECKSUM_PARTIAL && + (skb->protocol == htons(ETH_P_8021Q) || + skb->protocol == htons(ETH_P_8021AD))) { + int depth; + + if (!__vlan_get_protocol(skb, skb->protocol, &depth)) + goto drop; + + skb_set_network_header(skb, depth); } + dev_queue_xmit(skb); + + return 0; + +drop: + kfree_skb(skb); return 0; } EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 5e9d1c5e1194..6a592856da1c 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -519,6 +519,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (p->port->state == BR_STATE_DISABLED) goto unlock; + entry->state = p->state; rcu_assign_pointer(*pp, p->next); hlist_del_init(&p->mglist); del_timer(&p->timer); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index fd238587e032..0752796fe0ba 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1432,8 +1432,7 @@ br_multicast_leave_group(struct net_bridge *br, spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || - (port && port->state == BR_STATE_DISABLED) || - timer_pending(&other_query->timer)) + (port && port->state == BR_STATE_DISABLED)) goto out; mdb = mlock_dereference(br->mdb, br); @@ -1441,6 +1440,32 @@ br_multicast_leave_group(struct net_bridge *br, if (!mp) goto out; + if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) { + struct net_bridge_port_group __rcu **pp; + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (p->port != port) + continue; + + rcu_assign_pointer(*pp, p->next); + hlist_del_init(&p->mglist); + del_timer(&p->timer); + call_rcu_bh(&p->rcu, br_multicast_free_pg); + br_mdb_notify(br->dev, port, group, RTM_DELMDB, + p->state); + + if (!mp->ports && !mp->mglist && + netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); + } + goto out; + } + + if (timer_pending(&other_query->timer)) + goto out; + if (br->multicast_querier) { __br_multicast_send_query(br, port, &mp->addr); @@ -1466,29 +1491,6 @@ br_multicast_leave_group(struct net_bridge *br, } } - if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) { - struct net_bridge_port_group __rcu **pp; - - for (pp = &mp->ports; - (p = mlock_dereference(*pp, br)) != NULL; - pp = &p->next) { - if (p->port != port) - continue; - - rcu_assign_pointer(*pp, p->next); - hlist_del_init(&p->mglist); - del_timer(&p->timer); - br_mdb_notify(br->dev, port, group, RTM_DELMDB, - p->state); - call_rcu_bh(&p->rcu, br_multicast_free_pg); - - if (!mp->ports && !mp->mglist && - netif_running(br->dev)) - mod_timer(&mp->timer, jiffies); - } - goto out; - } - now = jiffies; time = now + br->multicast_last_member_count * br->multicast_last_member_interval; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 793d247ac2ca..91a2e08c2bb8 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -691,9 +691,17 @@ static int br_port_slave_changelink(struct net_device *brdev, struct nlattr *tb[], struct nlattr *data[]) { + struct net_bridge *br = netdev_priv(brdev); + int ret; + if (!data) return 0; - return br_setport(br_port_get_rtnl(dev), data); + + spin_lock_bh(&br->lock); + ret = br_setport(br_port_get_rtnl(dev), data); + spin_unlock_bh(&br->lock); + + return ret; } static int br_port_fill_slave_info(struct sk_buff *skb, diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index b4b6dab9c285..ed74ffaa851f 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -209,8 +209,9 @@ void br_transmit_config(struct net_bridge_port *p) br_send_config_bpdu(p, &bpdu); p->topology_change_ack = 0; p->config_pending = 0; - mod_timer(&p->hold_timer, - round_jiffies(jiffies + BR_HOLD_TIME)); + if (p->br->stp_enabled == BR_KERNEL_STP) + mod_timer(&p->hold_timer, + round_jiffies(jiffies + BR_HOLD_TIME)); } } diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index a2730e7196cd..4ca449a16132 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -48,7 +48,8 @@ void br_stp_enable_bridge(struct net_bridge *br) struct net_bridge_port *p; spin_lock_bh(&br->lock); - mod_timer(&br->hello_timer, jiffies + br->hello_time); + if (br->stp_enabled == BR_KERNEL_STP) + mod_timer(&br->hello_timer, jiffies + br->hello_time); mod_timer(&br->gc_timer, jiffies + HZ/10); br_config_bpdu_generation(br); @@ -127,6 +128,7 @@ static void br_stp_start(struct net_bridge *br) int r; char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL }; char *envp[] = { NULL }; + struct net_bridge_port *p; r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); @@ -140,6 +142,10 @@ static void br_stp_start(struct net_bridge *br) if (r == 0) { br->stp_enabled = BR_USER_STP; br_debug(br, "userspace STP started\n"); + /* Stop hello and hold timers */ + del_timer(&br->hello_timer); + list_for_each_entry(p, &br->port_list, list) + del_timer(&p->hold_timer); } else { br->stp_enabled = BR_KERNEL_STP; br_debug(br, "using kernel STP\n"); @@ -156,12 +162,17 @@ static void br_stp_stop(struct net_bridge *br) int r; char *argv[] = { BR_STP_PROG, br->dev->name, "stop", NULL }; char *envp[] = { NULL }; + struct net_bridge_port *p; if (br->stp_enabled == BR_USER_STP) { r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); br_info(br, "userspace STP stopped, return code %d\n", r); /* To start timers on any ports left in blocking */ + mod_timer(&br->hello_timer, jiffies + br->hello_time); + list_for_each_entry(p, &br->port_list, list) + mod_timer(&p->hold_timer, + round_jiffies(jiffies + BR_HOLD_TIME)); spin_lock_bh(&br->lock); br_port_state_selection(br); spin_unlock_bh(&br->lock); diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 7caf7fae2d5b..5f0f5af0ec35 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -40,7 +40,9 @@ static void br_hello_timer_expired(unsigned long arg) if (br->dev->flags & IFF_UP) { br_config_bpdu_generation(br); - mod_timer(&br->hello_timer, round_jiffies(jiffies + br->hello_time)); + if (br->stp_enabled != BR_USER_STP) + mod_timer(&br->hello_timer, + round_jiffies(jiffies + br->hello_time)); } spin_unlock(&br->lock); } diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 1f2a126f4ffa..6441f47b1a8f 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -23,7 +23,8 @@ static inline struct cgroup_cls_state *css_cls_state(struct cgroup_subsys_state struct cgroup_cls_state *task_cls_state(struct task_struct *p) { - return css_cls_state(task_css(p, net_cls_cgrp_id)); + return css_cls_state(task_css_check(p, net_cls_cgrp_id, + rcu_read_lock_bh_held())); } EXPORT_SYMBOL_GPL(task_cls_state); diff --git a/net/core/sock.c b/net/core/sock.c index 08f16db46070..193901d09757 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1497,7 +1497,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) sock_copy(newsk, sk); /* SANITY */ - get_net(sock_net(newsk)); + if (likely(newsk->sk_net_refcnt)) + get_net(sock_net(newsk)); sk_node_init(&newsk->sk_node); sock_lock_init(newsk); bh_lock_sock(newsk); @@ -1967,20 +1968,21 @@ static void __release_sock(struct sock *sk) * sk_wait_data - wait for data to arrive at sk_receive_queue * @sk: sock to wait on * @timeo: for how long + * @skb: last skb seen on sk_receive_queue * * Now socket state including sk->sk_err is changed only under lock, * hence we may omit checks after joining wait queue. * We check receive queue before schedule() only as optimization; * it is very likely that release_sock() added new data. */ -int sk_wait_data(struct sock *sk, long *timeo) +int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb) { int rc; DEFINE_WAIT(wait); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); - rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue)); + rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb); clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); finish_wait(sk_sleep(sk), &wait); return rc; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 52a94016526d..b5cf13a28009 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -886,7 +886,7 @@ verify_sock_status: break; } - sk_wait_data(sk, &timeo); + sk_wait_data(sk, &timeo, NULL); continue; found_ok_skb: if (len > skb->len) diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index f46e4d1306f2..214d44aef35b 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -207,7 +207,7 @@ found: } else { fq->q.meat += skb->len; } - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { @@ -287,7 +287,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, clone->data_len = clone->len; head->data_len -= clone->len; head->len -= clone->len; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } WARN_ON(head == NULL); @@ -310,7 +310,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, } fp = next; } - sub_frag_mem_limit(&fq->q, sum_truesize); + sub_frag_mem_limit(fq->q.net, sum_truesize); head->next = NULL; head->dev = dev; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 1d59e50ce8b7..34a308573f4b 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1026,14 +1026,16 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) neigh = neigh_lookup(&arp_tbl, &ip, dev); if (neigh) { - read_lock_bh(&neigh->lock); - memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); - r->arp_flags = arp_state_to_flags(neigh); - read_unlock_bh(&neigh->lock); - r->arp_ha.sa_family = dev->type; - strlcpy(r->arp_dev, dev->name, sizeof(r->arp_dev)); + if (!(neigh->nud_state & NUD_NOARP)) { + read_lock_bh(&neigh->lock); + memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len); + r->arp_flags = arp_state_to_flags(neigh); + read_unlock_bh(&neigh->lock); + r->arp_ha.sa_family = dev->type; + strlcpy(r->arp_dev, dev->name, sizeof(r->arp_dev)); + err = 0; + } neigh_release(neigh); - err = 0; } return err; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e813196c91c7..2d9cb1748f81 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -882,7 +882,6 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0); rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid); - blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); } return 0; } diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index c6211ed60b03..9c02920725db 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -13,6 +13,7 @@ struct fib_alias { u8 fa_state; u8 fa_slen; u32 tb_id; + s16 fa_default; struct rcu_head rcu; }; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 65e00399a9a6..558e196bae0f 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1297,23 +1297,40 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event) } /* Must be invoked inside of an RCU protected region. */ -void fib_select_default(struct fib_result *res) +void fib_select_default(const struct flowi4 *flp, struct fib_result *res) { struct fib_info *fi = NULL, *last_resort = NULL; struct hlist_head *fa_head = res->fa_head; struct fib_table *tb = res->table; + u8 slen = 32 - res->prefixlen; int order = -1, last_idx = -1; - struct fib_alias *fa; + struct fib_alias *fa, *fa1 = NULL; + u32 last_prio = res->fi->fib_priority; + u8 last_tos = 0; hlist_for_each_entry_rcu(fa, fa_head, fa_list) { struct fib_info *next_fi = fa->fa_info; + if (fa->fa_slen != slen) + continue; + if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) + continue; + if (fa->tb_id != tb->tb_id) + continue; + if (next_fi->fib_priority > last_prio && + fa->fa_tos == last_tos) { + if (last_tos) + continue; + break; + } + if (next_fi->fib_flags & RTNH_F_DEAD) + continue; + last_tos = fa->fa_tos; + last_prio = next_fi->fib_priority; + if (next_fi->fib_scope != res->scope || fa->fa_type != RTN_UNICAST) continue; - - if (next_fi->fib_priority > res->fi->fib_priority) - break; if (!next_fi->fib_nh[0].nh_gw || next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) continue; @@ -1323,10 +1340,11 @@ void fib_select_default(struct fib_result *res) if (!fi) { if (next_fi != res->fi) break; + fa1 = fa; } else if (!fib_detect_death(fi, order, &last_resort, - &last_idx, tb->tb_default)) { + &last_idx, fa1->fa_default)) { fib_result_assign(res, fi); - tb->tb_default = order; + fa1->fa_default = order; goto out; } fi = next_fi; @@ -1334,20 +1352,21 @@ void fib_select_default(struct fib_result *res) } if (order <= 0 || !fi) { - tb->tb_default = -1; + if (fa1) + fa1->fa_default = -1; goto out; } if (!fib_detect_death(fi, order, &last_resort, &last_idx, - tb->tb_default)) { + fa1->fa_default)) { fib_result_assign(res, fi); - tb->tb_default = order; + fa1->fa_default = order; goto out; } if (last_idx >= 0) fib_result_assign(res, last_resort); - tb->tb_default = last_idx; + fa1->fa_default = last_idx; out: return; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 15d32612e3c6..37c4bb89a708 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1171,6 +1171,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_state = state & ~FA_S_ACCESSED; new_fa->fa_slen = fa->fa_slen; new_fa->tb_id = tb->tb_id; + new_fa->fa_default = -1; err = switchdev_fib_ipv4_add(key, plen, fi, new_fa->fa_tos, @@ -1222,6 +1223,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_state = 0; new_fa->fa_slen = slen; new_fa->tb_id = tb->tb_id; + new_fa->fa_default = -1; /* (Optionally) offload fib entry to switch hardware. */ err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type, @@ -1791,8 +1793,6 @@ void fib_table_flush_external(struct fib_table *tb) if (hlist_empty(&n->leaf)) { put_child_root(pn, n->key, NULL); node_free(n); - } else { - leaf_pull_suffix(pn, n); } } } @@ -1862,8 +1862,6 @@ int fib_table_flush(struct fib_table *tb) if (hlist_empty(&n->leaf)) { put_child_root(pn, n->key, NULL); node_free(n); - } else { - leaf_pull_suffix(pn, n); } } @@ -1990,7 +1988,6 @@ struct fib_table *fib_trie_table(u32 id, struct fib_table *alias) return NULL; tb->tb_id = id; - tb->tb_default = -1; tb->tb_num_default = 0; tb->tb_data = (alias ? alias->__data : tb->__data); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 5e346a082e5f..d0a7c0319e3d 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -131,34 +131,22 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb) unsigned int evicted = 0; HLIST_HEAD(expired); -evict_again: spin_lock(&hb->chain_lock); hlist_for_each_entry_safe(fq, n, &hb->chain, list) { if (!inet_fragq_should_evict(fq)) continue; - if (!del_timer(&fq->timer)) { - /* q expiring right now thus increment its refcount so - * it won't be freed under us and wait until the timer - * has finished executing then destroy it - */ - atomic_inc(&fq->refcnt); - spin_unlock(&hb->chain_lock); - del_timer_sync(&fq->timer); - inet_frag_put(fq, f); - goto evict_again; - } + if (!del_timer(&fq->timer)) + continue; - fq->flags |= INET_FRAG_EVICTED; - hlist_del(&fq->list); - hlist_add_head(&fq->list, &expired); + hlist_add_head(&fq->list_evictor, &expired); ++evicted; } spin_unlock(&hb->chain_lock); - hlist_for_each_entry_safe(fq, n, &expired, list) + hlist_for_each_entry_safe(fq, n, &expired, list_evictor) f->frag_expire((unsigned long) fq); return evicted; @@ -240,18 +228,20 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) int i; nf->low_thresh = 0; - local_bh_disable(); evict_again: + local_bh_disable(); seq = read_seqbegin(&f->rnd_seqlock); for (i = 0; i < INETFRAGS_HASHSZ ; i++) inet_evict_bucket(f, &f->hash[i]); - if (read_seqretry(&f->rnd_seqlock, seq)) - goto evict_again; - local_bh_enable(); + cond_resched(); + + if (read_seqretry(&f->rnd_seqlock, seq) || + percpu_counter_sum(&nf->mem)) + goto evict_again; percpu_counter_destroy(&nf->mem); } @@ -284,8 +274,8 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) struct inet_frag_bucket *hb; hb = get_frag_bucket_locked(fq, f); - if (!(fq->flags & INET_FRAG_EVICTED)) - hlist_del(&fq->list); + hlist_del(&fq->list); + fq->flags |= INET_FRAG_COMPLETE; spin_unlock(&hb->chain_lock); } @@ -297,7 +287,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) if (!(fq->flags & INET_FRAG_COMPLETE)) { fq_unlink(fq, f); atomic_dec(&fq->refcnt); - fq->flags |= INET_FRAG_COMPLETE; } } EXPORT_SYMBOL(inet_frag_kill); @@ -330,11 +319,12 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) fp = xp; } sum = sum_truesize + f->qsize; - sub_frag_mem_limit(q, sum); if (f->destructor) f->destructor(q); kmem_cache_free(f->frags_cachep, q); + + sub_frag_mem_limit(nf, sum); } EXPORT_SYMBOL(inet_frag_destroy); @@ -390,7 +380,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, q->net = nf; f->constructor(q, arg); - add_frag_mem_limit(q, f->qsize); + add_frag_mem_limit(nf, f->qsize); setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index f44bccc42494..d96722ae8979 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -202,7 +202,7 @@ static void ip_expire(unsigned long arg) ipq_kill(qp); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); - if (!(qp->q.flags & INET_FRAG_EVICTED)) { + if (!inet_frag_evicting(&qp->q)) { struct sk_buff *head = qp->q.fragments; const struct iphdr *iph; int err; @@ -309,7 +309,7 @@ static int ip_frag_reinit(struct ipq *qp) kfree_skb(fp); fp = xp; } while (fp); - sub_frag_mem_limit(&qp->q, sum_truesize); + sub_frag_mem_limit(qp->q.net, sum_truesize); qp->q.flags = 0; qp->q.len = 0; @@ -455,7 +455,7 @@ found: qp->q.fragments = next; qp->q.meat -= free_it->len; - sub_frag_mem_limit(&qp->q, free_it->truesize); + sub_frag_mem_limit(qp->q.net, free_it->truesize); kfree_skb(free_it); } } @@ -479,7 +479,7 @@ found: qp->q.stamp = skb->tstamp; qp->q.meat += skb->len; qp->ecn |= ecn; - add_frag_mem_limit(&qp->q, skb->truesize); + add_frag_mem_limit(qp->q.net, skb->truesize); if (offset == 0) qp->q.flags |= INET_FRAG_FIRST_IN; @@ -586,7 +586,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - add_frag_mem_limit(&qp->q, clone->truesize); + add_frag_mem_limit(qp->q.net, clone->truesize); } skb_shinfo(head)->frag_list = head->next; @@ -601,7 +601,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; } - sub_frag_mem_limit(&qp->q, head->truesize); + sub_frag_mem_limit(qp->q.net, head->truesize); head->next = NULL; head->dev = dev; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 11096396ef4a..908f7ef2f12a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2194,7 +2194,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) if (!res.prefixlen && res.table->tb_num_default > 1 && res.type == RTN_UNICAST && !fl4->flowi4_oif) - fib_select_default(&res); + fib_select_default(fl4, &res); if (!fl4->saddr) fl4->saddr = FIB_RES_PREFSRC(net, res); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7f4056785acc..45534a5ab430 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -780,7 +780,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, ret = -EAGAIN; break; } - sk_wait_data(sk, &timeo); + sk_wait_data(sk, &timeo, NULL); if (signal_pending(current)) { ret = sock_intr_errno(timeo); break; @@ -1575,7 +1575,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int target; /* Read at least this many bytes */ long timeo; struct task_struct *user_recv = NULL; - struct sk_buff *skb; + struct sk_buff *skb, *last; u32 urg_hole = 0; if (unlikely(flags & MSG_ERRQUEUE)) @@ -1635,7 +1635,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, /* Next get a buffer. */ + last = skb_peek_tail(&sk->sk_receive_queue); skb_queue_walk(&sk->sk_receive_queue, skb) { + last = skb; /* Now that we have two receive queues this * shouldn't happen. */ @@ -1754,8 +1756,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, /* Do not sleep, just process backlog. */ release_sock(sk); lock_sock(sk); - } else - sk_wait_data(sk, &timeo); + } else { + sk_wait_data(sk, &timeo, last); + } if (user_recv) { int chunk; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 6e184e02fd3c..b3054611f88a 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1648,6 +1648,7 @@ int ndisc_rcv(struct sk_buff *skb) static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_change_info *change_info; struct net *net = dev_net(dev); struct inet6_dev *idev; @@ -1662,6 +1663,11 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, ndisc_send_unsol_na(dev); in6_dev_put(idev); break; + case NETDEV_CHANGE: + change_info = ptr; + if (change_info->flags_changed & IFF_NOARP) + neigh_changeaddr(&nd_tbl, dev); + break; case NETDEV_DOWN: neigh_ifdown(&nd_tbl, dev); fib6_run_gc(0, net, false); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6f187c8d8a1b..6d02498172c1 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -348,7 +348,7 @@ found: fq->ecn |= ecn; if (payload_len > fq->q.max_size) fq->q.max_size = payload_len; - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -430,7 +430,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -454,7 +454,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; } - sub_frag_mem_limit(&fq->q, head->truesize); + sub_frag_mem_limit(fq->q.net, head->truesize); head->ignore_df = 1; head->next = NULL; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 8ffa2c8cce77..f1159bb76e0a 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -144,7 +144,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); - if (fq->q.flags & INET_FRAG_EVICTED) + if (inet_frag_evicting(&fq->q)) goto out_rcu_unlock; IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); @@ -330,7 +330,7 @@ found: fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; fq->ecn |= ecn; - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -443,7 +443,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -481,7 +481,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, } fp = next; } - sub_frag_mem_limit(&fq->q, sum_truesize); + sub_frag_mem_limit(fq->q.net, sum_truesize); head->next = NULL; head->dev = dev; diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 8fd9febaa5ba..8dab4e569571 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -613,7 +613,7 @@ static int llc_wait_data(struct sock *sk, long timeo) if (signal_pending(current)) break; rc = 0; - if (sk_wait_data(sk, &timeo)) + if (sk_wait_data(sk, &timeo, NULL)) break; } return rc; @@ -802,7 +802,7 @@ static int llc_ui_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, release_sock(sk); lock_sock(sk); } else - sk_wait_data(sk, &timeo); + sk_wait_data(sk, &timeo, NULL); if ((flags & MSG_PEEK) && peek_seq != llc->copied_seq) { net_dbg_ratelimited("LLC(%s:%d): Application bug, race in MSG_PEEK\n", diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5d2b806a862e..38fbc194b9cb 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -319,7 +319,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * return *ignored=0 i.e. ICMP and NF_DROP */ sched = rcu_dereference(svc->scheduler); - dest = sched->schedule(svc, skb, iph); + if (sched) { + /* read svc->sched_data after svc->scheduler */ + smp_rmb(); + dest = sched->schedule(svc, skb, iph); + } else { + dest = NULL; + } if (!dest) { IP_VS_DBG(1, "p-schedule: no dest found.\n"); kfree(param.pe_data); @@ -467,7 +473,13 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, } sched = rcu_dereference(svc->scheduler); - dest = sched->schedule(svc, skb, iph); + if (sched) { + /* read svc->sched_data after svc->scheduler */ + smp_rmb(); + dest = sched->schedule(svc, skb, iph); + } else { + dest = NULL; + } if (dest == NULL) { IP_VS_DBG(1, "Schedule: no dest found.\n"); return NULL; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 285eae3a1454..24c554201a76 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -842,15 +842,16 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, __ip_vs_dst_cache_reset(dest); spin_unlock_bh(&dest->dst_lock); - sched = rcu_dereference_protected(svc->scheduler, 1); if (add) { ip_vs_start_estimator(svc->net, &dest->stats); list_add_rcu(&dest->n_list, &svc->destinations); svc->num_dests++; - if (sched->add_dest) + sched = rcu_dereference_protected(svc->scheduler, 1); + if (sched && sched->add_dest) sched->add_dest(svc, dest); } else { - if (sched->upd_dest) + sched = rcu_dereference_protected(svc->scheduler, 1); + if (sched && sched->upd_dest) sched->upd_dest(svc, dest); } } @@ -1084,7 +1085,7 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, struct ip_vs_scheduler *sched; sched = rcu_dereference_protected(svc->scheduler, 1); - if (sched->del_dest) + if (sched && sched->del_dest) sched->del_dest(svc, dest); } } @@ -1175,11 +1176,14 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, ip_vs_use_count_inc(); /* Lookup the scheduler by 'u->sched_name' */ - sched = ip_vs_scheduler_get(u->sched_name); - if (sched == NULL) { - pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); - ret = -ENOENT; - goto out_err; + if (strcmp(u->sched_name, "none")) { + sched = ip_vs_scheduler_get(u->sched_name); + if (!sched) { + pr_info("Scheduler module ip_vs_%s not found\n", + u->sched_name); + ret = -ENOENT; + goto out_err; + } } if (u->pe_name && *u->pe_name) { @@ -1240,10 +1244,12 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, spin_lock_init(&svc->stats.lock); /* Bind the scheduler */ - ret = ip_vs_bind_scheduler(svc, sched); - if (ret) - goto out_err; - sched = NULL; + if (sched) { + ret = ip_vs_bind_scheduler(svc, sched); + if (ret) + goto out_err; + sched = NULL; + } /* Bind the ct retriever */ RCU_INIT_POINTER(svc->pe, pe); @@ -1291,17 +1297,20 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, static int ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) { - struct ip_vs_scheduler *sched, *old_sched; + struct ip_vs_scheduler *sched = NULL, *old_sched; struct ip_vs_pe *pe = NULL, *old_pe = NULL; int ret = 0; /* * Lookup the scheduler, by 'u->sched_name' */ - sched = ip_vs_scheduler_get(u->sched_name); - if (sched == NULL) { - pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); - return -ENOENT; + if (strcmp(u->sched_name, "none")) { + sched = ip_vs_scheduler_get(u->sched_name); + if (!sched) { + pr_info("Scheduler module ip_vs_%s not found\n", + u->sched_name); + return -ENOENT; + } } old_sched = sched; @@ -1329,14 +1338,20 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) old_sched = rcu_dereference_protected(svc->scheduler, 1); if (sched != old_sched) { + if (old_sched) { + ip_vs_unbind_scheduler(svc, old_sched); + RCU_INIT_POINTER(svc->scheduler, NULL); + /* Wait all svc->sched_data users */ + synchronize_rcu(); + } /* Bind the new scheduler */ - ret = ip_vs_bind_scheduler(svc, sched); - if (ret) { - old_sched = sched; - goto out; + if (sched) { + ret = ip_vs_bind_scheduler(svc, sched); + if (ret) { + ip_vs_scheduler_put(sched); + goto out; + } } - /* Unbind the old scheduler on success */ - ip_vs_unbind_scheduler(svc, old_sched); } /* @@ -1982,6 +1997,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) const struct ip_vs_iter *iter = seq->private; const struct ip_vs_dest *dest; struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); + char *sched_name = sched ? sched->name : "none"; if (iter->table == ip_vs_svc_table) { #ifdef CONFIG_IP_VS_IPV6 @@ -1990,18 +2006,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), - sched->name); + sched_name); else #endif seq_printf(seq, "%s %08X:%04X %s %s ", ip_vs_proto_name(svc->protocol), ntohl(svc->addr.ip), ntohs(svc->port), - sched->name, + sched_name, (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } else { seq_printf(seq, "FWM %08X %s %s", - svc->fwmark, sched->name, + svc->fwmark, sched_name, (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); } @@ -2427,13 +2443,15 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) { struct ip_vs_scheduler *sched; struct ip_vs_kstats kstats; + char *sched_name; sched = rcu_dereference_protected(src->scheduler, 1); + sched_name = sched ? sched->name : "none"; dst->protocol = src->protocol; dst->addr = src->addr.ip; dst->port = src->port; dst->fwmark = src->fwmark; - strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name)); + strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name)); dst->flags = src->flags; dst->timeout = src->timeout / HZ; dst->netmask = src->netmask; @@ -2892,6 +2910,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, struct ip_vs_flags flags = { .flags = svc->flags, .mask = ~0 }; struct ip_vs_kstats kstats; + char *sched_name; nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); if (!nl_service) @@ -2910,8 +2929,9 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, } sched = rcu_dereference_protected(svc->scheduler, 1); + sched_name = sched ? sched->name : "none"; pe = rcu_dereference_protected(svc->pe, 1); - if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) || + if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) || (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index 199760c71f39..7e8141647943 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -74,7 +74,7 @@ void ip_vs_unbind_scheduler(struct ip_vs_service *svc, if (sched->done_service) sched->done_service(svc); - /* svc->scheduler can not be set to NULL */ + /* svc->scheduler can be set to NULL only by caller */ } @@ -147,21 +147,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler) void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg) { - struct ip_vs_scheduler *sched; + struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); + char *sched_name = sched ? sched->name : "none"; - sched = rcu_dereference(svc->scheduler); if (svc->fwmark) { IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n", - sched->name, svc->fwmark, svc->fwmark, msg); + sched_name, svc->fwmark, svc->fwmark, msg); #ifdef CONFIG_IP_VS_IPV6 } else if (svc->af == AF_INET6) { IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", - sched->name, ip_vs_proto_name(svc->protocol), + sched_name, ip_vs_proto_name(svc->protocol), &svc->addr.in6, ntohs(svc->port), msg); #endif } else { IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n", - sched->name, ip_vs_proto_name(svc->protocol), + sched_name, ip_vs_proto_name(svc->protocol), &svc->addr.ip, ntohs(svc->port), msg); } } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index b08ba9538d12..d99ad93eb855 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -612,7 +612,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp, pkts = atomic_add_return(1, &cp->in_pkts); else pkts = sysctl_sync_threshold(ipvs); - ip_vs_sync_conn(net, cp->control, pkts); + ip_vs_sync_conn(net, cp, pkts); } } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index bf66a8657a5f..258a0b0e82a2 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -130,7 +130,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr, memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; - fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? FLOWI_FLAG_KNOWN_NH : 0; @@ -505,6 +504,13 @@ err_put: return -1; err_unreach: + /* The ip6_link_failure function requires the dev field to be set + * in order to get the net (further for the sake of fwmark + * reflection). + */ + if (!skb->dev) + skb->dev = skb_dst(skb)->dev; + dst_link_failure(skb); return -1; } @@ -523,10 +529,27 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, if (ret == NF_ACCEPT) { nf_reset(skb); skb_forward_csum(skb); + if (!skb->sk) + skb_sender_cpu_clear(skb); } return ret; } +/* In the event of a remote destination, it's possible that we would have + * matches against an old socket (particularly a TIME-WAIT socket). This + * causes havoc down the line (ip_local_out et. al. expect regular sockets + * and invalid memory accesses will happen) so simply drop the association + * in this case. +*/ +static inline void ip_vs_drop_early_demux_sk(struct sk_buff *skb) +{ + /* If dev is set, the packet came from the LOCAL_IN callback and + * not from a local TCP socket. + */ + if (skb->dev) + skb_orphan(skb); +} + /* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, struct ip_vs_conn *cp, int local) @@ -538,12 +561,23 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 1); + + /* Remove the early_demux association unless it's bound for the + * exact same port and address on this host after translation. + */ + if (!local || cp->vport != cp->dport || + !ip_vs_addr_equal(cp->af, &cp->vaddr, &cp->daddr)) + ip_vs_drop_early_demux_sk(skb); + if (!local) { skb_forward_csum(skb); + if (!skb->sk) + skb_sender_cpu_clear(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, NULL, skb_dst(skb)->dev, dst_output_sk); } else ret = NF_ACCEPT; + return ret; } @@ -557,7 +591,10 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) ip_vs_notrack(skb); if (!local) { + ip_vs_drop_early_demux_sk(skb); skb_forward_csum(skb); + if (!skb->sk) + skb_sender_cpu_clear(skb); NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, NULL, skb_dst(skb)->dev, dst_output_sk); } else @@ -845,6 +882,8 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, struct ipv6hdr *old_ipv6h = NULL; #endif + ip_vs_drop_early_demux_sk(skb); + if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) { new_skb = skb_realloc_headroom(skb, max_headroom); if (!new_skb) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 13fad8668f83..651039ad1681 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -287,6 +287,46 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct) spin_unlock(&pcpu->lock); } +/* Released via destroy_conntrack() */ +struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags) +{ + struct nf_conn *tmpl; + + tmpl = kzalloc(sizeof(struct nf_conn), GFP_KERNEL); + if (tmpl == NULL) + return NULL; + + tmpl->status = IPS_TEMPLATE; + write_pnet(&tmpl->ct_net, net); + +#ifdef CONFIG_NF_CONNTRACK_ZONES + if (zone) { + struct nf_conntrack_zone *nf_ct_zone; + + nf_ct_zone = nf_ct_ext_add(tmpl, NF_CT_EXT_ZONE, GFP_ATOMIC); + if (!nf_ct_zone) + goto out_free; + nf_ct_zone->id = zone; + } +#endif + atomic_set(&tmpl->ct_general.use, 0); + + return tmpl; +#ifdef CONFIG_NF_CONNTRACK_ZONES +out_free: + kfree(tmpl); + return NULL; +#endif +} +EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc); + +static void nf_ct_tmpl_free(struct nf_conn *tmpl) +{ + nf_ct_ext_destroy(tmpl); + nf_ct_ext_free(tmpl); + kfree(tmpl); +} + static void destroy_conntrack(struct nf_conntrack *nfct) { @@ -298,6 +338,10 @@ destroy_conntrack(struct nf_conntrack *nfct) NF_CT_ASSERT(atomic_read(&nfct->use) == 0); NF_CT_ASSERT(!timer_pending(&ct->timeout)); + if (unlikely(nf_ct_is_template(ct))) { + nf_ct_tmpl_free(ct); + return; + } rcu_read_lock(); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); if (l4proto && l4proto->destroy) @@ -540,28 +584,6 @@ out: } EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); -/* deletion from this larval template list happens via nf_ct_put() */ -void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl) -{ - struct ct_pcpu *pcpu; - - __set_bit(IPS_TEMPLATE_BIT, &tmpl->status); - __set_bit(IPS_CONFIRMED_BIT, &tmpl->status); - nf_conntrack_get(&tmpl->ct_general); - - /* add this conntrack to the (per cpu) tmpl list */ - local_bh_disable(); - tmpl->cpu = smp_processor_id(); - pcpu = per_cpu_ptr(nf_ct_net(tmpl)->ct.pcpu_lists, tmpl->cpu); - - spin_lock(&pcpu->lock); - /* Overload tuple linked list to put us in template list. */ - hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, - &pcpu->tmpl); - spin_unlock_bh(&pcpu->lock); -} -EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert); - /* Confirm a connection given skb; places it in hash table */ int __nf_conntrack_confirm(struct sk_buff *skb) @@ -1751,7 +1773,6 @@ int nf_conntrack_init_net(struct net *net) spin_lock_init(&pcpu->lock); INIT_HLIST_NULLS_HEAD(&pcpu->unconfirmed, UNCONFIRMED_NULLS_VAL); INIT_HLIST_NULLS_HEAD(&pcpu->dying, DYING_NULLS_VAL); - INIT_HLIST_NULLS_HEAD(&pcpu->tmpl, TEMPLATE_NULLS_VAL); } net->ct.stat = alloc_percpu(struct ip_conntrack_stat); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 7a17070c5dab..b45a4223cb05 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -219,7 +219,8 @@ static inline int expect_clash(const struct nf_conntrack_expect *a, a->mask.src.u3.all[count] & b->mask.src.u3.all[count]; } - return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); + return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) && + nf_ct_zone(a->master) == nf_ct_zone(b->master); } static inline int expect_matches(const struct nf_conntrack_expect *a, diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d1c23940a86a..6b8b0abbfab4 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2995,11 +2995,6 @@ ctnetlink_create_expect(struct net *net, u16 zone, } err = nf_ct_expect_related_report(exp, portid, report); - if (err < 0) - goto err_exp; - - return 0; -err_exp: nf_ct_expect_put(exp); err_ct: nf_ct_put(ct); diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index 789feeae6c44..71f1e9fdfa18 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -349,12 +349,10 @@ static void __net_exit synproxy_proc_exit(struct net *net) static int __net_init synproxy_net_init(struct net *net) { struct synproxy_net *snet = synproxy_pernet(net); - struct nf_conntrack_tuple t; struct nf_conn *ct; int err = -ENOMEM; - memset(&t, 0, sizeof(t)); - ct = nf_conntrack_alloc(net, 0, &t, &t, GFP_KERNEL); + ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL); if (IS_ERR(ct)) { err = PTR_ERR(ct); goto err1; @@ -365,7 +363,8 @@ static int __net_init synproxy_net_init(struct net *net) if (!nfct_synproxy_ext_add(ct)) goto err2; - nf_conntrack_tmpl_insert(net, ct); + __set_bit(IPS_CONFIRMED_BIT, &ct->status); + nf_conntrack_get(&ct->ct_general); snet->tmpl = ct; snet->stats = alloc_percpu(struct synproxy_stats); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 75747aecdebe..c6630030c912 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -184,7 +184,6 @@ out: static int xt_ct_tg_check(const struct xt_tgchk_param *par, struct xt_ct_target_info_v1 *info) { - struct nf_conntrack_tuple t; struct nf_conn *ct; int ret = -EOPNOTSUPP; @@ -202,8 +201,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, if (ret < 0) goto err1; - memset(&t, 0, sizeof(t)); - ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL); + ct = nf_ct_tmpl_alloc(par->net, info->zone, GFP_KERNEL); ret = PTR_ERR(ct); if (IS_ERR(ct)) goto err2; @@ -227,8 +225,8 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par, if (ret < 0) goto err3; } - - nf_conntrack_tmpl_insert(par->net, ct); + __set_bit(IPS_CONFIRMED_BIT, &ct->status); + nf_conntrack_get(&ct->ct_general); out: info->ct = ct; return 0; diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index f407ebc13481..29d2c31f406c 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -126,6 +126,7 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) goto out; } + sysfs_attr_init(&info->timer->attr.attr); info->timer->attr.attr.name = kstrdup(info->label, GFP_KERNEL); if (!info->timer->attr.attr.name) { ret = -ENOMEM; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2af8590d9e0f..b5afe538bb88 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2401,7 +2401,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, addr, hlen); - if (tp_len > dev->mtu + dev->hard_header_len) { + if (likely(tp_len >= 0) && + tp_len > dev->mtu + dev->hard_header_len) { struct ethhdr *ehdr; /* Earlier code assumed this would be a VLAN pkt, * double-check this now that we have the actual @@ -2782,7 +2783,7 @@ static int packet_release(struct socket *sock) static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) { struct packet_sock *po = pkt_sk(sk); - const struct net_device *dev_curr; + struct net_device *dev_curr; __be16 proto_curr; bool need_rehook; @@ -2806,15 +2807,13 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) po->num = proto; po->prot_hook.type = proto; - - if (po->prot_hook.dev) - dev_put(po->prot_hook.dev); - po->prot_hook.dev = dev; po->ifindex = dev ? dev->ifindex : 0; packet_cached_dev_assign(po, dev); } + if (dev_curr) + dev_put(dev_curr); if (proto == 0 || !need_rehook) goto out_unlock; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 074a32f466f8..b087087ccfa9 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -54,7 +54,7 @@ void tcf_hash_destroy(struct tc_action *a) } EXPORT_SYMBOL(tcf_hash_destroy); -int tcf_hash_release(struct tc_action *a, int bind) +int __tcf_hash_release(struct tc_action *a, bool bind, bool strict) { struct tcf_common *p = a->priv; int ret = 0; @@ -62,7 +62,7 @@ int tcf_hash_release(struct tc_action *a, int bind) if (p) { if (bind) p->tcfc_bindcnt--; - else if (p->tcfc_bindcnt > 0) + else if (strict && p->tcfc_bindcnt > 0) return -EPERM; p->tcfc_refcnt--; @@ -73,9 +73,10 @@ int tcf_hash_release(struct tc_action *a, int bind) ret = 1; } } + return ret; } -EXPORT_SYMBOL(tcf_hash_release); +EXPORT_SYMBOL(__tcf_hash_release); static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, struct tc_action *a) @@ -145,7 +146,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a) head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; hlist_for_each_entry_safe(p, n, head, tcfc_head) { a->priv = p; - ret = tcf_hash_release(a, 0); + ret = __tcf_hash_release(a, false, true); if (ret == ACT_P_DELETED) { module_put(a->ops->owner); n_i++; @@ -432,7 +433,7 @@ int tcf_action_destroy(struct list_head *actions, int bind) int ret = 0; list_for_each_entry_safe(a, tmp, actions, list) { - ret = tcf_hash_release(a, bind); + ret = __tcf_hash_release(a, bind, true); if (ret == ACT_P_DELETED) module_put(a->ops->owner); else if (ret < 0) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index e9e923a8c747..aaae8e83bf18 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -27,9 +27,10 @@ struct tcf_bpf_cfg { struct bpf_prog *filter; struct sock_filter *bpf_ops; - char *bpf_name; + const char *bpf_name; u32 bpf_fd; u16 bpf_num_ops; + bool is_ebpf; }; static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, @@ -207,6 +208,7 @@ static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg) cfg->bpf_ops = bpf_ops; cfg->bpf_num_ops = bpf_num_ops; cfg->filter = fp; + cfg->is_ebpf = false; return 0; } @@ -241,18 +243,40 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg) cfg->bpf_fd = bpf_fd; cfg->bpf_name = name; cfg->filter = fp; + cfg->is_ebpf = true; return 0; } +static void tcf_bpf_cfg_cleanup(const struct tcf_bpf_cfg *cfg) +{ + if (cfg->is_ebpf) + bpf_prog_put(cfg->filter); + else + bpf_prog_destroy(cfg->filter); + + kfree(cfg->bpf_ops); + kfree(cfg->bpf_name); +} + +static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, + struct tcf_bpf_cfg *cfg) +{ + cfg->is_ebpf = tcf_bpf_is_ebpf(prog); + cfg->filter = prog->filter; + + cfg->bpf_ops = prog->bpf_ops; + cfg->bpf_name = prog->bpf_name; +} + static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action *act, int replace, int bind) { struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; + struct tcf_bpf_cfg cfg, old; struct tc_act_bpf *parm; struct tcf_bpf *prog; - struct tcf_bpf_cfg cfg; bool is_bpf, is_ebpf; int ret; @@ -301,6 +325,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, prog = to_bpf(act); spin_lock_bh(&prog->tcf_lock); + if (ret != ACT_P_CREATED) + tcf_bpf_prog_fill_cfg(prog, &old); + prog->bpf_ops = cfg.bpf_ops; prog->bpf_name = cfg.bpf_name; @@ -316,32 +343,22 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (ret == ACT_P_CREATED) tcf_hash_insert(act); + else + tcf_bpf_cfg_cleanup(&old); return ret; destroy_fp: - if (is_ebpf) - bpf_prog_put(cfg.filter); - else - bpf_prog_destroy(cfg.filter); - - kfree(cfg.bpf_ops); - kfree(cfg.bpf_name); - + tcf_bpf_cfg_cleanup(&cfg); return ret; } static void tcf_bpf_cleanup(struct tc_action *act, int bind) { - const struct tcf_bpf *prog = act->priv; - - if (tcf_bpf_is_ebpf(prog)) - bpf_prog_put(prog->filter); - else - bpf_prog_destroy(prog->filter); + struct tcf_bpf_cfg tmp; - kfree(prog->bpf_ops); - kfree(prog->bpf_name); + tcf_bpf_prog_fill_cfg(act->priv, &tmp); + tcf_bpf_cfg_cleanup(&tmp); } static struct tc_action_ops act_bpf_ops __read_mostly = { diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index ce8676ad892f..e38a7701f154 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -69,13 +69,12 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; } else { - p = to_pedit(a); - tcf_hash_release(a, bind); if (bind) return 0; + tcf_hash_release(a, bind); if (!ovr) return -EEXIST; - + p = to_pedit(a); if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) { keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 93d5742dc7e0..6a783afe4960 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -385,6 +385,19 @@ static void choke_reset(struct Qdisc *sch) { struct choke_sched_data *q = qdisc_priv(sch); + while (q->head != q->tail) { + struct sk_buff *skb = q->tab[q->head]; + + q->head = (q->head + 1) & q->tab_mask; + if (!skb) + continue; + qdisc_qstats_backlog_dec(sch, skb); + --sch->q.qlen; + qdisc_drop(skb, sch); + } + + memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *)); + q->head = q->tail = 0; red_restart(&q->vars); } diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c index 89f8fcf73f18..ade9445a55ab 100644 --- a/net/sched/sch_plug.c +++ b/net/sched/sch_plug.c @@ -216,6 +216,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = { .peek = qdisc_peek_head, .init = plug_init, .change = plug_change, + .reset = qdisc_reset_queue, .owner = THIS_MODULE, }; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1425ec2bbd5a..17bef01b9aa3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2200,12 +2200,6 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval, if (copy_from_user(&sctp_sk(sk)->subscribe, optval, optlen)) return -EFAULT; - if (sctp_sk(sk)->subscribe.sctp_data_io_event) - pr_warn_ratelimited(DEPRECATED "%s (pid %d) " - "Requested SCTP_SNDRCVINFO event.\n" - "Use SCTP_RCVINFO through SCTP_RECVRCVINFO option instead.\n", - current->comm, task_pid_nr(current)); - /* At the time when a user app subscribes to SCTP_SENDER_DRY_EVENT, * if there is no data to be sent or retransmit, the stack will * immediately send up this notification. diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 9825ff0f91d6..6255d141133b 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -240,8 +240,8 @@ static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid) req = xprt_alloc_bc_req(xprt, GFP_ATOMIC); if (!req) goto not_found; - /* Note: this 'free' request adds it to xprt->bc_pa_list */ - xprt_free_bc_request(req); + list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list); + xprt->bc_alloc_count++; } req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, rq_bc_pa_list); @@ -336,7 +336,7 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied) spin_lock(&xprt->bc_pa_lock); list_del(&req->rq_bc_pa_list); - xprt->bc_alloc_count--; + xprt_dec_alloc_count(xprt, 1); spin_unlock(&xprt->bc_pa_lock); req->rq_private_buf.len = copied; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index cbc6af923dd1..23608eb0ded2 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1902,6 +1902,7 @@ call_transmit_status(struct rpc_task *task) switch (task->tk_status) { case -EAGAIN: + case -ENOBUFS: break; default: dprint_status(task); @@ -1928,7 +1929,6 @@ call_transmit_status(struct rpc_task *task) case -ECONNABORTED: case -EADDRINUSE: case -ENOTCONN: - case -ENOBUFS: case -EPIPE: rpc_task_force_reencode(task); } @@ -2057,12 +2057,13 @@ call_status(struct rpc_task *task) case -ECONNABORTED: rpc_force_rebind(clnt); case -EADDRINUSE: - case -ENOBUFS: rpc_delay(task, 3*HZ); case -EPIPE: case -ENOTCONN: task->tk_action = call_bind; break; + case -ENOBUFS: + rpc_delay(task, HZ>>2); case -EAGAIN: task->tk_action = call_transmit; break; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e193c2b5476b..0030376327b7 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -527,6 +527,10 @@ static int xs_local_send_request(struct rpc_task *task) true, &sent); dprintk("RPC: %s(%u) = %d\n", __func__, xdr->len - req->rq_bytes_sent, status); + + if (status == -EAGAIN && sock_writeable(transport->inet)) + status = -ENOBUFS; + if (likely(sent > 0) || status == 0) { req->rq_bytes_sent += sent; req->rq_xmit_bytes_sent += sent; @@ -539,6 +543,7 @@ static int xs_local_send_request(struct rpc_task *task) switch (status) { case -ENOBUFS: + break; case -EAGAIN: status = xs_nospace(task); break; @@ -589,6 +594,9 @@ static int xs_udp_send_request(struct rpc_task *task) if (status == -EPERM) goto process_status; + if (status == -EAGAIN && sock_writeable(transport->inet)) + status = -ENOBUFS; + if (sent > 0 || status == 0) { req->rq_xmit_bytes_sent += sent; if (sent >= req->rq_slen) @@ -669,9 +677,6 @@ static int xs_tcp_send_request(struct rpc_task *task) dprintk("RPC: xs_tcp_send_request(%u) = %d\n", xdr->len - req->rq_bytes_sent, status); - if (unlikely(sent == 0 && status < 0)) - break; - /* If we've sent the entire packet, immediately * reset the count of bytes sent. */ req->rq_bytes_sent += sent; @@ -681,18 +686,21 @@ static int xs_tcp_send_request(struct rpc_task *task) return 0; } - if (sent != 0) - continue; - status = -EAGAIN; - break; + if (status < 0) + break; + if (sent == 0) { + status = -EAGAIN; + break; + } } + if (status == -EAGAIN && sk_stream_is_writeable(transport->inet)) + status = -ENOBUFS; switch (status) { case -ENOTSOCK: status = -ENOTCONN; /* Should we call xs_close() here? */ break; - case -ENOBUFS: case -EAGAIN: status = xs_nospace(task); break; @@ -703,6 +711,7 @@ static int xs_tcp_send_request(struct rpc_task *task) case -ECONNREFUSED: case -ENOTCONN: case -EADDRINUSE: + case -ENOBUFS: case -EPIPE: clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); } |