From 8975ae88e137ea02a71b7a86af2f8eb790c2f1e7 Mon Sep 17 00:00:00 2001 From: Liad Kaufman Date: Sun, 14 Sep 2014 21:48:28 +0300 Subject: mac80211: fix warning on htmldocs for last_tdls_pkt_time Forgot to add an entry to the struct description of sta_info. Signed-off-by: Liad Kaufman Reviewed-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/sta_info.h | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 42f68cb8957e..bcda2ac7d844 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -336,6 +336,7 @@ struct ieee80211_tx_latency_stat { * @known_smps_mode: the smps_mode the client thinks we are in. Relevant for * AP only. * @cipher_scheme: optional cipher scheme for this station + * @last_tdls_pkt_time: holds the time in jiffies of last TDLS pkt ACKed */ struct sta_info { /* General information, mostly static */ -- cgit v1.2.3 From c12bc4885f4b3bab0ed779c69d5d7e3223fa5003 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Tue, 30 Sep 2014 07:08:02 +0300 Subject: mac80211: return the vif's chandef in ieee80211_cfg_get_channel() The chandef of the channel context a vif is using may be different than the chandef of the vif itself. For instance, the bandwidth used by the vif may be narrower than the one configured in the channel context. To avoid confusion, return the vif's chandef in ieee80211_cfg_get_channel() instead of the chandef of the channel context. Signed-off-by: Luciano Coelho Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fb6a1502b6df..343da1e35025 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3458,7 +3458,7 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy, rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); if (chanctx_conf) { - *chandef = chanctx_conf->def; + *chandef = sdata->vif.bss_conf.chandef; ret = 0; } else if (local->open_count > 0 && local->open_count == local->monitors && -- cgit v1.2.3 From bc37b16870a382e8b71d881444c19a16de1c1a7f Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Tue, 7 Oct 2014 22:20:23 +0200 Subject: net: rfkill: kernel-doc warning fixes Correct the kernel-doc, the parameter is called "blocked" not "state". Signed-off-by: Fabian Frederick Signed-off-by: Johannes Berg --- net/rfkill/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/rfkill/core.c b/net/rfkill/core.c index b3b16c070a7f..fa7cd792791c 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -329,7 +329,7 @@ static atomic_t rfkill_input_disabled = ATOMIC_INIT(0); /** * __rfkill_switch_all - Toggle state of all switches of given type * @type: type of interfaces to be affected - * @state: the new state + * @blocked: the new state * * This function sets the state of all switches of given type, * unless a specific switch is claimed by userspace (in which case, @@ -353,7 +353,7 @@ static void __rfkill_switch_all(const enum rfkill_type type, bool blocked) /** * rfkill_switch_all - Toggle state of all switches of given type * @type: type of interfaces to be affected - * @state: the new state + * @blocked: the new state * * Acquires rfkill_global_mutex and calls __rfkill_switch_all(@type, @state). * Please refer to __rfkill_switch_all() for details. -- cgit v1.2.3 From 252e07ca5f64dd31fdfca8027287e7d75fefdab1 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Wed, 8 Oct 2014 09:48:34 +0300 Subject: nl80211: sanity check the channel switch counter value The nl80211 channel switch count attribute (NL80211_ATTR_CH_SWITCH_COUNT) is specified as u32, but the specification uses u8 for the counter. To make sure strange things don't happen without informing the user, sanity check the value and return -EINVAL if it doesn't fit in u8. Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index cb9f5a44ffad..5839c85075f1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5927,6 +5927,7 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) int err; bool need_new_beacon = false; int len, i; + u32 cs_count; if (!rdev->ops->channel_switch || !(rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)) @@ -5963,7 +5964,14 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) if (need_new_beacon && !info->attrs[NL80211_ATTR_CSA_IES]) return -EINVAL; - params.count = nla_get_u32(info->attrs[NL80211_ATTR_CH_SWITCH_COUNT]); + /* Even though the attribute is u32, the specification says + * u8, so let's make sure we don't overflow. + */ + cs_count = nla_get_u32(info->attrs[NL80211_ATTR_CH_SWITCH_COUNT]); + if (cs_count > 255) + return -EINVAL; + + params.count = cs_count; if (!need_new_beacon) goto skip_beacons; -- cgit v1.2.3 From c7abf25af0f41be4b50d44c5b185d52eea360cb8 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Mon, 13 Oct 2014 14:34:41 +0200 Subject: mac80211: fix typo in starting baserate for rts_cts_rate_idx It affects non-(V)HT rates and can lead to selecting an rts_cts rate that is not a basic rate or way superior to the reference rate (ATM rates[0] used for the 1st attempt of the protected frame data). E.g, assuming drivers register growing (bitrate) sorted tables of ieee80211_rate-s, having : - rates[0].idx == d'2 and basic_rates == b'10100 will select rts_cts idx b'10011 & ~d'(BIT(2)-1), i.e. 1, likewise - rates[0].idx == d'2 and basic_rates == b'10001 will select rts_cts idx b'10000 The first is not a basic rate and the second is > rates[0]. Also, wrt severity of the addressed misbehavior, ATM we only have one rts_cts_rate_idx rather than one per rate table entry, so this idx might still point to bitrates > rates[1..MAX_RATES]. Fixes: 5253ffb8c9e1 ("mac80211: always pick a basic rate to tx RTS/CTS for pre-HT rates") Cc: stable@vger.kernel.org Signed-off-by: Karl Beldan Signed-off-by: Johannes Berg --- net/mac80211/rate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 8fdadfd94ba8..6081329784dd 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -448,7 +448,7 @@ static void rate_fixup_ratelist(struct ieee80211_vif *vif, */ if (!(rates[0].flags & IEEE80211_TX_RC_MCS)) { u32 basic_rates = vif->bss_conf.basic_rates; - s8 baserate = basic_rates ? ffs(basic_rates - 1) : 0; + s8 baserate = basic_rates ? ffs(basic_rates) - 1 : 0; rate = &sband->bitrates[rates[0].idx]; -- cgit v1.2.3 From 11b2357d5dbce999803e9055f8c09829a8a87db4 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Mon, 20 Oct 2014 10:54:36 +0200 Subject: mac80211: minstrels: fix buffer overflow in HT debugfs rc_stats ATM an HT rc_stats line is 106 chars. Times 8(MCS_GROUP_RATES)*3(SS)*2(GI)*2(BW) + CCK(4), i.e. x100, this is well above the current 8192 - sizeof(*ms) currently allocated. Fix this by squeezing the output as follows (not that we're short on memory but this also improves readability and range, the new format adds one more digit to *ok/*cum and ok/cum): - Before (HT) (106 ch): type rate throughput ewma prob this prob retry this succ/attempt success attempts CCK/LP 5.5M 0.0 0.0 0.0 0 0( 0) 0 0 HT20/LGI ABCDP MCS0 0.0 0.0 0.0 1 0( 0) 0 0 - After (75 ch): type rate tpt eprob *prob ret *ok(*cum) ok( cum) CCK/LP 5.5M 0.0 0.0 0.0 0 0( 0) 0( 0) HT20/LGI ABCDP MCS0 0.0 0.0 0.0 1 0( 0) 0( 0) - Align non-HT format Before (non-HT) (83 ch): rate throughput ewma prob this prob this succ/attempt success attempts ABCDP 6 0.0 0.0 0.0 0( 0) 0 0 54 0.0 0.0 0.0 0( 0) 0 0 - After (61 ch): rate tpt eprob *prob *ok(*cum) ok( cum) ABCDP 1 0.0 0.0 0.0 0( 0) 0( 0) 54 0.0 0.0 0.0 0( 0) 0( 0) *This also adds dynamic checks for overflow, lowers the size of the non-HT request (allowing > 30 entries) and replaces the buddy-rounded allocations (s/sizeof(*ms) + 8192/8192). Signed-off-by: Karl Beldan Acked-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_debugfs.c | 12 +++++++----- net/mac80211/rc80211_minstrel_ht_debugfs.c | 13 ++++++++----- 2 files changed, 15 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c index edde723f9f00..2acab1bcaa4b 100644 --- a/net/mac80211/rc80211_minstrel_debugfs.c +++ b/net/mac80211/rc80211_minstrel_debugfs.c @@ -62,14 +62,14 @@ minstrel_stats_open(struct inode *inode, struct file *file) unsigned int i, tp, prob, eprob; char *p; - ms = kmalloc(sizeof(*ms) + 4096, GFP_KERNEL); + ms = kmalloc(2048, GFP_KERNEL); if (!ms) return -ENOMEM; file->private_data = ms; p = ms->buf; - p += sprintf(p, "rate throughput ewma prob this prob " - "this succ/attempt success attempts\n"); + p += sprintf(p, "rate tpt eprob *prob" + " *ok(*cum) ok( cum)\n"); for (i = 0; i < mi->n_rates; i++) { struct minstrel_rate *mr = &mi->r[i]; struct minstrel_rate_stats *mrs = &mi->r[i].stats; @@ -86,8 +86,8 @@ minstrel_stats_open(struct inode *inode, struct file *file) prob = MINSTREL_TRUNC(mrs->cur_prob * 1000); eprob = MINSTREL_TRUNC(mrs->probability * 1000); - p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u " - " %3u(%3u) %8llu %8llu\n", + p += sprintf(p, " %4u.%1u %3u.%1u %3u.%1u" + " %4u(%4u) %9llu(%9llu)\n", tp / 10, tp % 10, eprob / 10, eprob % 10, prob / 10, prob % 10, @@ -102,6 +102,8 @@ minstrel_stats_open(struct inode *inode, struct file *file) mi->sample_packets); ms->len = p - ms->buf; + WARN_ON(ms->len + sizeof(*ms) > 2048); + return 0; } diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c index a72ad46f2a04..d537bec93754 100644 --- a/net/mac80211/rc80211_minstrel_ht_debugfs.c +++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c @@ -63,8 +63,8 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p) prob = MINSTREL_TRUNC(mr->cur_prob * 1000); eprob = MINSTREL_TRUNC(mr->probability * 1000); - p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u " - "%3u %3u(%3u) %8llu %8llu\n", + p += sprintf(p, " %4u.%1u %3u.%1u %3u.%1u " + "%3u %4u(%4u) %9llu(%9llu)\n", tp / 10, tp % 10, eprob / 10, eprob % 10, prob / 10, prob % 10, @@ -96,14 +96,15 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file) return ret; } - ms = kmalloc(sizeof(*ms) + 8192, GFP_KERNEL); + ms = kmalloc(8192, GFP_KERNEL); if (!ms) return -ENOMEM; file->private_data = ms; p = ms->buf; - p += sprintf(p, "type rate throughput ewma prob " - "this prob retry this succ/attempt success attempts\n"); + p += sprintf(p, "type rate tpt eprob *prob " + "ret *ok(*cum) ok( cum)\n"); + p = minstrel_ht_stats_dump(mi, max_mcs, p); for (i = 0; i < max_mcs; i++) @@ -118,6 +119,8 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file) MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10); ms->len = p - ms->buf; + WARN_ON(ms->len + sizeof(*ms) > 8192); + return nonseekable_open(inode, file); } -- cgit v1.2.3 From e37ad9fd636071e45368d1d9cc3b7b421281ce7f Mon Sep 17 00:00:00 2001 From: Marcelo Leitner Date: Mon, 13 Oct 2014 13:09:28 -0300 Subject: netfilter: nf_conntrack: allow server to become a client in TW handling When a port that was used to listen for inbound connections gets closed and reused for outgoing connections (like rsh ends up doing for stderr flow), current we may reject the SYN/ACK packet for the new connection because tcp_conntracks states forbirds a port to become a client while there is still a TIME_WAIT entry in there for it. As TCP may expire the TIME_WAIT socket in 60s and conntrack's timeout for it is 120s, there is a ~60s window that the application can end up opening a port that conntrack will end up blocking. This patch fixes this by simply allowing such state transition: if we see a SYN, in TIME_WAIT state, on REPLY direction, move it to sSS. Note that the rest of the code already handles this situation, more specificly in tcp_packet(), first switch clause. Signed-off-by: Marcelo Ricardo Leitner Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_proto_tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 44d1ea32570a..d87b6423ffb2 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -213,7 +213,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { { /* REPLY */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ -/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 }, +/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 }, /* * sNO -> sIV Never reached. * sSS -> sS2 Simultaneous open @@ -223,7 +223,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sFW -> sIV * sCW -> sIV * sLA -> sIV - * sTW -> sIV Reopened connection, but server may not do it. + * sTW -> sSS Reopened connection, but server may have switched role * sCL -> sIV */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ -- cgit v1.2.3 From 0f9f5e1b83abd2b37c67658e02a6fc9001831fa5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 21 Oct 2014 11:28:12 +0300 Subject: netfilter: ipset: off by one in ip_set_nfnl_get_byindex() The ->ip_set_list[] array is initialized in ip_set_net_init() and it has ->ip_set_max elements so this check should be >= instead of > otherwise we are off by one. Signed-off-by: Dan Carpenter Acked-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 912e5a05b79d..86f9d76b1464 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -659,7 +659,7 @@ ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index) struct ip_set *set; struct ip_set_net *inst = ip_set_pernet(net); - if (index > inst->ip_set_max) + if (index >= inst->ip_set_max) return IPSET_INVALID_ID; nfnl_lock(NFNL_SUBSYS_IPSET); -- cgit v1.2.3 From c123bb7163043bb8f33858cf8e45b01c17dbd171 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 21 Oct 2014 11:08:21 +0200 Subject: netfilter: nf_tables: check for NULL in nf_tables_newchain pcpu stats allocation alloc_percpu returns NULL on failure, not a negative error code. Fixes: ff3cd7b3c922 ("netfilter: nf_tables: refactor chain statistic routines") Signed-off-by: Sabrina Dubroca Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 65eb2a1160d5..11ab4b078f3b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1328,10 +1328,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb, basechain->stats = stats; } else { stats = netdev_alloc_pcpu_stats(struct nft_stats); - if (IS_ERR(stats)) { + if (stats == NULL) { module_put(type->owner); kfree(basechain); - return PTR_ERR(stats); + return -ENOMEM; } rcu_assign_pointer(basechain->stats, stats); } -- cgit v1.2.3 From 7677e86843e2136a9b05549a9ca47d4f744565b6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 4 Oct 2014 22:18:02 +0800 Subject: bridge: Do not compile options in br_parse_ip_options Commit 462fb2af9788a82a534f8184abfde31574e1cfa0 bridge : Sanitize skb before it enters the IP stack broke when IP options are actually used because it mangles the skb as if it entered the IP stack which is wrong because the bridge is supposed to operate below the IP stack. Since nobody has actually requested for parsing of IP options this patch fixes it by simply reverting to the previous approach of ignoring all IP options, i.e., zeroing the IPCB. If and when somebody who uses IP options and actually needs them to be parsed by the bridge complains then we can revisit this. Reported-by: David Newall Signed-off-by: Herbert Xu Tested-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 1bada53bb195..1a4f32c09ad5 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -192,7 +192,6 @@ static inline void nf_bridge_save_header(struct sk_buff *skb) static int br_parse_ip_options(struct sk_buff *skb) { - struct ip_options *opt; const struct iphdr *iph; struct net_device *dev = skb->dev; u32 len; @@ -201,7 +200,6 @@ static int br_parse_ip_options(struct sk_buff *skb) goto inhdr_error; iph = ip_hdr(skb); - opt = &(IPCB(skb)->opt); /* Basic sanity checks */ if (iph->ihl < 5 || iph->version != 4) @@ -227,23 +225,11 @@ static int br_parse_ip_options(struct sk_buff *skb) } memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); - if (iph->ihl == 5) - return 0; - - opt->optlen = iph->ihl*4 - sizeof(struct iphdr); - if (ip_options_compile(dev_net(dev), opt, skb)) - goto inhdr_error; - - /* Check correct handling of SRR option */ - if (unlikely(opt->srr)) { - struct in_device *in_dev = __in_dev_get_rcu(dev); - if (in_dev && !IN_DEV_SOURCE_ROUTE(in_dev)) - goto drop; - - if (ip_options_rcv_srr(skb)) - goto drop; - } - + /* We should really parse IP options here but until + * somebody who actually uses IP options complains to + * us we'll just silently ignore the options because + * we're lazy! + */ return 0; inhdr_error: -- cgit v1.2.3 From 9dfa1dfe4d5e5e66a991321ab08afe69759d797a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 23 Oct 2014 10:36:06 +0200 Subject: netfilter: nf_log: account for size of NLMSG_DONE attribute We currently neither account for the nlattr size, nor do we consider the size of the trailing NLMSG_DONE when allocating nlmsg skb. This can result in nflog to stop working, as __nfulnl_send() re-tries sending forever if it failed to append NLMSG_DONE (which will never work if buffer is not large enough). Reported-by: Houcheng Lin Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b1e3a0579416..8117fba8e661 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -649,7 +649,8 @@ nfulnl_log_packet(struct net *net, + nla_total_size(sizeof(u_int32_t)) /* gid */ + nla_total_size(plen) /* prefix */ + nla_total_size(sizeof(struct nfulnl_msg_packet_hw)) - + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)); + + nla_total_size(sizeof(struct nfulnl_msg_packet_timestamp)) + + nla_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */ if (in && skb_mac_header_was_set(skb)) { size += nla_total_size(skb->dev->hard_header_len) @@ -692,8 +693,7 @@ nfulnl_log_packet(struct net *net, goto unlock_and_release; } - if (inst->skb && - size > skb_tailroom(inst->skb) - sizeof(struct nfgenmsg)) { + if (inst->skb && size > skb_tailroom(inst->skb)) { /* either the queue len is too high or we don't have * enough room in the skb left. flush to userspace. */ __nfulnl_flush(inst); -- cgit v1.2.3 From c1e7dc91eed0ed1a51c9b814d648db18bf8fc6e9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 23 Oct 2014 10:36:07 +0200 Subject: netfilter: nfnetlink_log: fix maximum packet length logged to userspace don't try to queue payloads > 0xffff - NLA_HDRLEN, it does not work. The nla length includes the size of the nla struct, so anything larger results in u16 integer overflow. This patch is similar to 9cefbbc9c8f9abe (netfilter: nfnetlink_queue: cleanup copy_range usage). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 8117fba8e661..2d02eac35415 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -43,7 +43,8 @@ #define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE #define NFULNL_TIMEOUT_DEFAULT 100 /* every second */ #define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */ -#define NFULNL_COPY_RANGE_MAX 0xFFFF /* max packet size is limited by 16-bit struct nfattr nfa_len field */ +/* max packet size is limited by 16-bit struct nfattr nfa_len field */ +#define NFULNL_COPY_RANGE_MAX (0xFFFF - NLA_HDRLEN) #define PRINTR(x, args...) do { if (net_ratelimit()) \ printk(x, ## args); } while (0); @@ -252,6 +253,8 @@ nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode, case NFULNL_COPY_PACKET: inst->copy_mode = mode; + if (range == 0) + range = NFULNL_COPY_RANGE_MAX; inst->copy_range = min_t(unsigned int, range, NFULNL_COPY_RANGE_MAX); break; @@ -679,8 +682,7 @@ nfulnl_log_packet(struct net *net, break; case NFULNL_COPY_PACKET: - if (inst->copy_range == 0 - || inst->copy_range > skb->len) + if (inst->copy_range > skb->len) data_len = skb->len; else data_len = inst->copy_range; -- cgit v1.2.3 From b51d3fa364885a2c1e1668f88776c67c95291820 Mon Sep 17 00:00:00 2001 From: Houcheng Lin Date: Thu, 23 Oct 2014 10:36:08 +0200 Subject: netfilter: nf_log: release skbuff on nlmsg put failure The kernel should reserve enough room in the skb so that the DONE message can always be appended. However, in case of e.g. new attribute erronously not being size-accounted for, __nfulnl_send() will still try to put next nlmsg into this full skbuf, causing the skb to be stuck forever and blocking delivery of further messages. Fix issue by releasing skb immediately after nlmsg_put error and WARN() so we can track down the cause of such size mismatch. [ fw@strlen.de: add tailroom/len info to WARN ] Signed-off-by: Houcheng Lin Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_log.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 2d02eac35415..5f1be5ba3559 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -346,26 +346,25 @@ nfulnl_alloc_skb(struct net *net, u32 peer_portid, unsigned int inst_size, return skb; } -static int +static void __nfulnl_send(struct nfulnl_instance *inst) { - int status = -1; - if (inst->qlen > 1) { struct nlmsghdr *nlh = nlmsg_put(inst->skb, 0, 0, NLMSG_DONE, sizeof(struct nfgenmsg), 0); - if (!nlh) + if (WARN_ONCE(!nlh, "bad nlskb size: %u, tailroom %d\n", + inst->skb->len, skb_tailroom(inst->skb))) { + kfree_skb(inst->skb); goto out; + } } - status = nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid, - MSG_DONTWAIT); - + nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid, + MSG_DONTWAIT); +out: inst->qlen = 0; inst->skb = NULL; -out: - return status; } static void -- cgit v1.2.3 From 349ce993ac706869d553a1816426d3a4bfda02b1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Oct 2014 12:58:58 -0700 Subject: tcp: md5: do not use alloc_percpu() percpu tcp_md5sig_pool contains memory blobs that ultimately go through sg_set_buf(). -> sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf)); This requires that whole area is in a physically contiguous portion of memory. And that @buf is not backed by vmalloc(). Given that alloc_percpu() can use vmalloc() areas, this does not fit the requirements. Replace alloc_percpu() by a static DEFINE_PER_CPU() as tcp_md5sig_pool is small anyway, there is no gain to dynamically allocate it. Signed-off-by: Eric Dumazet Fixes: 765cf9976e93 ("tcp: md5: remove one indirection level in tcp_md5sig_pool") Reported-by: Crestez Dan Leonard Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 59 ++++++++++++++++++++-------------------------------------- 1 file changed, 20 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1bec4e76d88c..39ec0c379545 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2868,61 +2868,42 @@ EXPORT_SYMBOL(compat_tcp_getsockopt); #endif #ifdef CONFIG_TCP_MD5SIG -static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly; +static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool); static DEFINE_MUTEX(tcp_md5sig_mutex); - -static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu); - - if (p->md5_desc.tfm) - crypto_free_hash(p->md5_desc.tfm); - } - free_percpu(pool); -} +static bool tcp_md5sig_pool_populated = false; static void __tcp_alloc_md5sig_pool(void) { int cpu; - struct tcp_md5sig_pool __percpu *pool; - - pool = alloc_percpu(struct tcp_md5sig_pool); - if (!pool) - return; for_each_possible_cpu(cpu) { - struct crypto_hash *hash; - - hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR_OR_NULL(hash)) - goto out_free; + if (!per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm) { + struct crypto_hash *hash; - per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash; + hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR_OR_NULL(hash)) + return; + per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash; + } } - /* before setting tcp_md5sig_pool, we must commit all writes - * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool() + /* before setting tcp_md5sig_pool_populated, we must commit all writes + * to memory. See smp_rmb() in tcp_get_md5sig_pool() */ smp_wmb(); - tcp_md5sig_pool = pool; - return; -out_free: - __tcp_free_md5sig_pool(pool); + tcp_md5sig_pool_populated = true; } bool tcp_alloc_md5sig_pool(void) { - if (unlikely(!tcp_md5sig_pool)) { + if (unlikely(!tcp_md5sig_pool_populated)) { mutex_lock(&tcp_md5sig_mutex); - if (!tcp_md5sig_pool) + if (!tcp_md5sig_pool_populated) __tcp_alloc_md5sig_pool(); mutex_unlock(&tcp_md5sig_mutex); } - return tcp_md5sig_pool != NULL; + return tcp_md5sig_pool_populated; } EXPORT_SYMBOL(tcp_alloc_md5sig_pool); @@ -2936,13 +2917,13 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool); */ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) { - struct tcp_md5sig_pool __percpu *p; - local_bh_disable(); - p = ACCESS_ONCE(tcp_md5sig_pool); - if (p) - return raw_cpu_ptr(p); + if (tcp_md5sig_pool_populated) { + /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */ + smp_rmb(); + return this_cpu_ptr(&tcp_md5sig_pool); + } local_bh_enable(); return NULL; } -- cgit v1.2.3 From 93a35f59f1b13a02674877e3efdf07ae47e34052 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Oct 2014 06:30:30 -0700 Subject: net: napi_reuse_skb() should check pfmemalloc Do not reuse skb if it was pfmemalloc tainted, otherwise future frame might be dropped anyway. Signed-off-by: Eric Dumazet Signed-off-by: Roman Gushchin Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index b793e3521a36..945bbd001359 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4157,6 +4157,10 @@ EXPORT_SYMBOL(napi_gro_receive); static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) { + if (unlikely(skb->pfmemalloc)) { + consume_skb(skb); + return; + } __skb_pull(skb, skb_headlen(skb)); /* restore the reserve we had after netdev_alloc_skb_ip_align() */ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb)); -- cgit v1.2.3 From 7965ee93719921ea5978f331da653dfa2d7b99f5 Mon Sep 17 00:00:00 2001 From: Arturo Borrero Date: Sun, 26 Oct 2014 12:22:40 +0100 Subject: netfilter: nft_compat: fix wrong target lookup in nft_target_select_ops() The code looks for an already loaded target, and the correct list to search is nft_target_list, not nft_match_list. Signed-off-by: Arturo Borrero Gonzalez Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 0480f57a4eb6..9d6d6f60a80f 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -672,7 +672,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, family = ctx->afi->family; /* Re-use the existing target if it's already loaded. */ - list_for_each_entry(nft_target, &nft_match_list, head) { + list_for_each_entry(nft_target, &nft_target_list, head) { struct xt_target *target = nft_target->ops.data; if (strcmp(target->name, tg_name) == 0 && -- cgit v1.2.3 From f89b7755f517cdbb755d7543eef986ee9d54e654 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 23 Oct 2014 18:41:08 -0700 Subject: bpf: split eBPF out of NET introduce two configs: - hidden CONFIG_BPF to select eBPF interpreter that classic socket filters depend on - visible CONFIG_BPF_SYSCALL (default off) that tracing and sockets can use that solves several problems: - tracing and others that wish to use eBPF don't need to depend on NET. They can use BPF_SYSCALL to allow loading from userspace or select BPF to use it directly from kernel in NET-less configs. - in 3.18 programs cannot be attached to events yet, so don't force it on - when the rest of eBPF infra is there in 3.19+, it's still useful to switch it off to minimize kernel size bloat-o-meter on x64 shows: add/remove: 0/60 grow/shrink: 0/2 up/down: 0/-15601 (-15601) tested with many different config combinations. Hopefully didn't miss anything. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- init/Kconfig | 14 ++++++++++++++ kernel/Makefile | 2 +- kernel/bpf/Makefile | 6 +++--- kernel/bpf/core.c | 9 +++++++++ net/Kconfig | 2 +- 5 files changed, 28 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/init/Kconfig b/init/Kconfig index 3ee28ae02cc8..2081a4d3d917 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1341,6 +1341,10 @@ config SYSCTL_ARCH_UNALIGN_ALLOW config HAVE_PCSPKR_PLATFORM bool +# interpreter that classic socket filters depend on +config BPF + bool + menuconfig EXPERT bool "Configure standard kernel features (expert users)" # Unhide debug options, to make the on-by-default options visible @@ -1521,6 +1525,16 @@ config EVENTFD If unsure, say Y. +# syscall, maps, verifier +config BPF_SYSCALL + bool "Enable bpf() system call" if EXPERT + select ANON_INODES + select BPF + default n + help + Enable the bpf() system call that allows to manipulate eBPF + programs and maps via file descriptors. + config SHMEM bool "Use full shmem filesystem" if EXPERT default y diff --git a/kernel/Makefile b/kernel/Makefile index dc5c77544fd6..17ea6d4a9a24 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -86,7 +86,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/ obj-$(CONFIG_TRACEPOINTS) += trace/ obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-$(CONFIG_CPU_PM) += cpu_pm.o -obj-$(CONFIG_NET) += bpf/ +obj-$(CONFIG_BPF) += bpf/ obj-$(CONFIG_PERF_EVENTS) += events/ diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 45427239f375..0daf7f6ae7df 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -1,5 +1,5 @@ -obj-y := core.o syscall.o verifier.o - +obj-y := core.o +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o ifdef CONFIG_TEST_BPF -obj-y += test_stub.o +obj-$(CONFIG_BPF_SYSCALL) += test_stub.o endif diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index f0c30c59b317..d6594e457a25 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -655,3 +655,12 @@ void bpf_prog_free(struct bpf_prog *fp) schedule_work(&aux->work); } EXPORT_SYMBOL_GPL(bpf_prog_free); + +/* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call + * skb_copy_bits(), so provide a weak definition of it for NET-less config. + */ +int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to, + int len) +{ + return -EFAULT; +} diff --git a/net/Kconfig b/net/Kconfig index 6272420a721b..99815b5454bf 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -6,7 +6,7 @@ menuconfig NET bool "Networking support" select NLATTR select GENERIC_NET_UTILS - select ANON_INODES + select BPF ---help--- Unless you really know what you are doing, you should say Y here. The reason is that some programs need kernel networking support even -- cgit v1.2.3 From 3d53666b40007b55204ee8890618da79a20c9940 Mon Sep 17 00:00:00 2001 From: Alex Gartrell Date: Mon, 6 Oct 2014 08:46:19 -0700 Subject: ipvs: Avoid null-pointer deref in debug code Use daddr instead of reaching into dest. Reported-by: Dan Carpenter Signed-off-by: Alex Gartrell Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_xmit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 91f17c1eb8a2..437a3663ad03 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -316,7 +316,7 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode, local))) { IP_VS_DBG_RL("We are crossing local and non-local addresses" - " daddr=%pI4\n", &dest->addr.ip); + " daddr=%pI4\n", &daddr); goto err_put; } @@ -458,7 +458,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest, if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode, local))) { IP_VS_DBG_RL("We are crossing local and non-local addresses" - " daddr=%pI6\n", &dest->addr.in6); + " daddr=%pI6\n", daddr); goto err_put; } -- cgit v1.2.3 From ae439286a0dec99cc8029868243689b5b5f3ff75 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 24 Oct 2014 23:44:04 +0200 Subject: net: dsa: Error out on tagging protocol mismatches If there is a mismatch between enabled tagging protocols and the protocol the switch supports, error out, rather than continue with a situation which is unlikely to work. Signed-off-by: Andrew Lunn cc: alexander.h.duyck@intel.com Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- net/dsa/dsa.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 22f34cf4cb27..6317b41c99b0 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -174,8 +174,11 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, dst->rcv = brcm_netdev_ops.rcv; break; #endif - default: + case DSA_TAG_PROTO_NONE: break; + default: + ret = -ENOPROTOOPT; + goto out; } dst->tag_protocol = drv->tag_protocol; -- cgit v1.2.3 From d56109020d93337545dd257a790cb429a70acfad Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 24 Oct 2014 16:55:58 -0700 Subject: sch_pie: schedule the timer after all init succeed Cc: Vijay Subramanian Cc: David S. Miller Signed-off-by: Cong Wang Acked-by: Eric Dumazet --- net/sched/sch_pie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 33d7a98a7a97..b783a446d884 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -445,7 +445,6 @@ static int pie_init(struct Qdisc *sch, struct nlattr *opt) sch->limit = q->params.limit; setup_timer(&q->adapt_timer, pie_timer, (unsigned long)sch); - mod_timer(&q->adapt_timer, jiffies + HZ / 2); if (opt) { int err = pie_change(sch, opt); @@ -454,6 +453,7 @@ static int pie_init(struct Qdisc *sch, struct nlattr *opt) return err; } + mod_timer(&q->adapt_timer, jiffies + HZ / 2); return 0; } -- cgit v1.2.3 From b2ed64a97430a26a63c6ea91c9b50e639a98dfbc Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Mon, 27 Oct 2014 17:39:16 +0100 Subject: ipv6: notify userspace when we added or changed an ipv6 token NetworkManager might want to know that it changed when the router advertisement arrives. Signed-off-by: Lubomir Rintel Cc: Hannes Frederic Sowa Cc: Daniel Borkmann Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 725c763270a0..0169ccf5aa4f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4531,6 +4531,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) } write_unlock_bh(&idev->lock); + inet6_ifinfo_notify(RTM_NEWLINK, idev); addrconf_verify_rtnl(); return 0; } -- cgit v1.2.3 From 65ba1f1ec0eff1c25933468e1d238201c0c2cb29 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 28 Oct 2014 10:30:34 +0100 Subject: inet: frags: fix a race between inet_evict_bucket and inet_frag_kill When the evictor is running it adds some chosen frags to a local list to be evicted once the chain lock has been released but at the same time the *frag_queue can be running for some of the same queues and it may call inet_frag_kill which will wait on the chain lock and will then delete the queue from the wrong list since it was added in the eviction one. The fix is simple - check if the queue has the evict flag set under the chain lock before deleting it, this is safe because the evict flag is set only under that lock and having the flag set also means that the queue has been detached from the chain list, so no need to delete it again. An important note to make is that we're safe w.r.t refcnt because inet_frag_kill and inet_evict_bucket will sync on the del_timer operation where only one of the two can succeed (or if the timer is executing - none of them), the cases are: 1. inet_frag_kill succeeds in del_timer - then the timer ref is removed, but inet_evict_bucket will not add this queue to its expire list but will restart eviction in that chain 2. inet_evict_bucket succeeds in del_timer - then the timer ref is kept until the evictor "expires" the queue, but inet_frag_kill will remove the initial ref and will set INET_FRAG_COMPLETE which will make the frag_expire fn just to remove its ref. In the end all of the queue users will do an inet_frag_put and the one that reaches 0 will free it. The refcount balance should be okay. CC: Florian Westphal CC: Eric Dumazet CC: Patrick McLean Fixes: b13d3cbfb8e8 ("inet: frag: move eviction of queues to work queue") Suggested-by: Eric Dumazet Reported-by: Patrick McLean Tested-by: Patrick McLean Signed-off-by: Nikolay Aleksandrov Reviewed-by: Florian Westphal Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 9eb89f3f0ee4..894ec30c5896 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -285,7 +285,8 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) struct inet_frag_bucket *hb; hb = get_frag_bucket_locked(fq, f); - hlist_del(&fq->list); + if (!(fq->flags & INET_FRAG_EVICTED)) + hlist_del(&fq->list); spin_unlock(&hb->chain_lock); } -- cgit v1.2.3 From d70127e8a942364de8dd140fe73893efda363293 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 28 Oct 2014 10:44:01 +0100 Subject: inet: frags: remove the WARN_ON from inet_evict_bucket The WARN_ON in inet_evict_bucket can be triggered by a valid case: inet_frag_kill and inet_evict_bucket can be running in parallel on the same queue which means that there has been at least one more ref added by a previous inet_frag_find call, but inet_frag_kill can delete the timer before inet_evict_bucket which will cause the WARN_ON() there to trigger since we'll have refcnt!=1. Now, this case is valid because the queue is being "killed" for some reason (removed from the chain list and its timer deleted) so it will get destroyed in the end by one of the inet_frag_put() calls which reaches 0 i.e. refcnt is still valid. CC: Florian Westphal CC: Eric Dumazet CC: Patrick McLean Fixes: b13d3cbfb8e8 ("inet: frag: move eviction of queues to work queue") Reported-by: Patrick McLean Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 894ec30c5896..19419b60cb37 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -146,7 +146,6 @@ evict_again: atomic_inc(&fq->refcnt); spin_unlock(&hb->chain_lock); del_timer_sync(&fq->timer); - WARN_ON(atomic_read(&fq->refcnt) != 1); inet_frag_put(fq, f); goto evict_again; } -- cgit v1.2.3 From fa19c2b050ab5254326f5fc07096dd3c6a8d5d58 Mon Sep 17 00:00:00 2001 From: Nicolas Cavallari Date: Thu, 30 Oct 2014 10:09:53 +0100 Subject: ipv4: Do not cache routing failures due to disabled forwarding. If we cache them, the kernel will reuse them, independently of whether forwarding is enabled or not. Which means that if forwarding is disabled on the input interface where the first routing request comes from, then that unreachable result will be cached and reused for other interfaces, even if forwarding is enabled on them. The opposite is also true. This can be verified with two interfaces A and B and an output interface C, where B has forwarding enabled, but not A and trying ip route get $dst iif A from $src && ip route get $dst iif B from $src Signed-off-by: Nicolas Cavallari Reviewed-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/route.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 2d4ae469b471..6a2155b02602 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1798,6 +1798,7 @@ local_input: no_route: RT_CACHE_STAT_INC(in_no_route); res.type = RTN_UNREACHABLE; + res.fi = NULL; goto local_input; /* -- cgit v1.2.3 From 14051f0452a2c26a3f4791e6ad6a435e8f1945ff Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Thu, 30 Oct 2014 08:40:56 -0700 Subject: gre: Use inner mac length when computing tunnel length Currently, skb_inner_network_header is used but this does not account for Ethernet header for ETH_P_TEB. Use skb_inner_mac_header which handles TEB and also should work with IP encapsulation in which case inner mac and inner network headers are the same. Tested: Ran TCP_STREAM over GRE, worked as expected. Signed-off-by: Tom Herbert Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- net/ipv4/gre_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index f6e345c0bc23..bb5947b0ce2d 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -47,7 +47,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, greh = (struct gre_base_hdr *)skb_transport_header(skb); - ghl = skb_inner_network_header(skb) - skb_transport_header(skb); + ghl = skb_inner_mac_header(skb) - skb_transport_header(skb); if (unlikely(ghl < sizeof(*greh))) goto out; -- cgit v1.2.3 From 39bb5e62867de82b269b07df900165029b928359 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Oct 2014 10:32:34 -0700 Subject: net: skb_fclone_busy() needs to detect orphaned skb Some drivers are unable to perform TX completions in a bound time. They instead call skb_orphan() Problem is skb_fclone_busy() has to detect this case, otherwise we block TCP retransmits and can freeze unlucky tcp sessions on mostly idle hosts. Signed-off-by: Eric Dumazet Fixes: 1f3279ae0c13 ("tcp: avoid retransmits of TCP packets hanging in host queues") Signed-off-by: David S. Miller --- include/linux/skbuff.h | 8 ++++++-- net/ipv4/tcp_output.c | 2 +- net/xfrm/xfrm_policy.c | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5884f95ff0e9..6c8b6f604e76 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -799,15 +799,19 @@ struct sk_buff_fclones { * @skb: buffer * * Returns true is skb is a fast clone, and its clone is not freed. + * Some drivers call skb_orphan() in their ndo_start_xmit(), + * so we also check that this didnt happen. */ -static inline bool skb_fclone_busy(const struct sk_buff *skb) +static inline bool skb_fclone_busy(const struct sock *sk, + const struct sk_buff *skb) { const struct sk_buff_fclones *fclones; fclones = container_of(skb, struct sk_buff_fclones, skb1); return skb->fclone == SKB_FCLONE_ORIG && - fclones->skb2.fclone == SKB_FCLONE_CLONE; + fclones->skb2.fclone == SKB_FCLONE_CLONE && + fclones->skb2.sk == sk; } static inline struct sk_buff *alloc_skb_fclone(unsigned int size, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3af21296d967..a3d453b94747 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2126,7 +2126,7 @@ bool tcp_schedule_loss_probe(struct sock *sk) static bool skb_still_in_host_queue(const struct sock *sk, const struct sk_buff *skb) { - if (unlikely(skb_fclone_busy(skb))) { + if (unlikely(skb_fclone_busy(sk, skb))) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); return true; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 4c4e457e7888..88bf289abdc9 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1962,7 +1962,7 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb) struct xfrm_policy *pol = xdst->pols[0]; struct xfrm_policy_queue *pq = &pol->polq; - if (unlikely(skb_fclone_busy(skb))) { + if (unlikely(skb_fclone_busy(sk, skb))) { kfree_skb(skb); return 0; } -- cgit v1.2.3 From 5188cd44c55db3e92cd9e77a40b5baa7ed4340f7 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 30 Oct 2014 18:27:17 +0000 Subject: drivers/net, ipv6: Select IPv6 fragment idents for virtio UFO packets UFO is now disabled on all drivers that work with virtio net headers, but userland may try to send UFO/IPv6 packets anyway. Instead of sending with ID=0, we should select identifiers on their behalf (as we used to). Signed-off-by: Ben Hutchings Fixes: 916e4cf46d02 ("ipv6: reuse ip6_frag_id from ip6_ufo_append_data") Signed-off-by: David S. Miller --- drivers/net/macvtap.c | 3 +++ drivers/net/tun.c | 6 +++++- include/net/ipv6.h | 2 ++ net/ipv6/output_core.c | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 2aeaa61ece09..6f226de655a4 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -572,6 +573,8 @@ static int macvtap_skb_from_vnet_hdr(struct sk_buff *skb, pr_warn_once("macvtap: %s: using disabled UFO feature; please fix this program\n", current->comm); gso_type = SKB_GSO_UDP; + if (skb->protocol == htons(ETH_P_IPV6)) + ipv6_proxy_select_ident(skb); break; default: return -EINVAL; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 280d3d2a9792..7302398f0b1f 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include #include @@ -1139,6 +1140,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, break; } + skb_reset_network_header(skb); + if (gso.gso_type != VIRTIO_NET_HDR_GSO_NONE) { pr_debug("GSO!\n"); switch (gso.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { @@ -1159,6 +1162,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, current->comm); } skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + if (skb->protocol == htons(ETH_P_IPV6)) + ipv6_proxy_select_ident(skb); break; } default: @@ -1189,7 +1194,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG; } - skb_reset_network_header(skb); skb_probe_transport_header(skb, 0); rxhash = skb_get_hash(skb); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 97f472012438..4292929392b0 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -671,6 +671,8 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr)); } +void ipv6_proxy_select_ident(struct sk_buff *skb); + int ip6_dst_hoplimit(struct dst_entry *dst); static inline int ip6_sk_dst_hoplimit(struct ipv6_pinfo *np, struct flowi6 *fl6, diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index fc24c390af05..97f41a3e68d9 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -3,11 +3,45 @@ * not configured or static. These functions are needed by GSO/GRO implementation. */ #include +#include #include #include #include #include +/* This function exists only for tap drivers that must support broken + * clients requesting UFO without specifying an IPv6 fragment ID. + * + * This is similar to ipv6_select_ident() but we use an independent hash + * seed to limit information leakage. + * + * The network header must be set before calling this. + */ +void ipv6_proxy_select_ident(struct sk_buff *skb) +{ + static u32 ip6_proxy_idents_hashrnd __read_mostly; + struct in6_addr buf[2]; + struct in6_addr *addrs; + u32 hash, id; + + addrs = skb_header_pointer(skb, + skb_network_offset(skb) + + offsetof(struct ipv6hdr, saddr), + sizeof(buf), buf); + if (!addrs) + return; + + net_get_random_once(&ip6_proxy_idents_hashrnd, + sizeof(ip6_proxy_idents_hashrnd)); + + hash = __ipv6_addr_jhash(&addrs[1], ip6_proxy_idents_hashrnd); + hash = __ipv6_addr_jhash(&addrs[0], hash); + + id = ip_idents_reserve(hash, 1); + skb_shinfo(skb)->ip6_frag_id = htonl(id); +} +EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); + int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { u16 offset = sizeof(struct ipv6hdr); -- cgit v1.2.3 From 4d87716cd057bde3f90e304289c1fec88d45a1cc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 25 Oct 2014 12:25:06 +0200 Subject: netfilter: nf_tables_bridge: update hook_mask to allow {pre,post}routing Fixes: 36d2af5 ("netfilter: nf_tables: allow to filter from prerouting and postrouting") Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nf_tables_bridge.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index da17a5eab8b4..074c557ab505 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -75,9 +75,11 @@ static const struct nf_chain_type filter_bridge = { .type = NFT_CHAIN_T_DEFAULT, .family = NFPROTO_BRIDGE, .owner = THIS_MODULE, - .hook_mask = (1 << NF_BR_LOCAL_IN) | + .hook_mask = (1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | - (1 << NF_BR_LOCAL_OUT), + (1 << NF_BR_LOCAL_OUT) | + (1 << NF_BR_POST_ROUTING), }; static int __init nf_tables_bridge_init(void) -- cgit v1.2.3 From 052b9498eea532deb5de75277a53f6e0623215dc Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 25 Oct 2014 18:24:57 +0200 Subject: netfilter: nf_reject_ipv4: split nf_send_reset() in smaller functions That can be reused by the reject bridge expression to build the reject packet. The new functions are: * nf_reject_ip_tcphdr_get(): to sanitize and to obtain the TCP header. * nf_reject_iphdr_put(): to build the IPv4 header. * nf_reject_ip_tcphdr_put(): to build the TCP header. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv4/nf_reject.h | 10 ++++ net/ipv4/netfilter/nf_reject_ipv4.c | 88 ++++++++++++++++++++++++---------- 2 files changed, 72 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h index e8427193c777..03e928a55229 100644 --- a/include/net/netfilter/ipv4/nf_reject.h +++ b/include/net/netfilter/ipv4/nf_reject.h @@ -1,6 +1,8 @@ #ifndef _IPV4_NF_REJECT_H #define _IPV4_NF_REJECT_H +#include +#include #include static inline void nf_send_unreach(struct sk_buff *skb_in, int code) @@ -10,4 +12,12 @@ static inline void nf_send_unreach(struct sk_buff *skb_in, int code) void nf_send_reset(struct sk_buff *oldskb, int hook); +const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *_oth, int hook); +struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __be16 protocol, int ttl); +void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, + const struct tcphdr *oth); + #endif /* _IPV4_NF_REJECT_H */ diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index 92b303dbd5fc..1baaa83dfe5c 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -12,43 +12,39 @@ #include #include #include +#include -/* Send RST reply */ -void nf_send_reset(struct sk_buff *oldskb, int hook) +const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *_oth, int hook) { - struct sk_buff *nskb; - const struct iphdr *oiph; - struct iphdr *niph; const struct tcphdr *oth; - struct tcphdr _otcph, *tcph; /* IP header checks: fragment. */ if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) - return; + return NULL; oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb), - sizeof(_otcph), &_otcph); + sizeof(struct tcphdr), _oth); if (oth == NULL) - return; + return NULL; /* No RST for RST. */ if (oth->rst) - return; - - if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) - return; + return NULL; /* Check checksum */ if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) - return; - oiph = ip_hdr(oldskb); + return NULL; - nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + - LL_MAX_HEADER, GFP_ATOMIC); - if (!nskb) - return; + return oth; +} +EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_get); - skb_reserve(nskb, LL_MAX_HEADER); +struct iphdr *nf_reject_iphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __be16 protocol, int ttl) +{ + struct iphdr *niph, *oiph = ip_hdr(oldskb); skb_reset_network_header(nskb); niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr)); @@ -57,10 +53,23 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) niph->tos = 0; niph->id = 0; niph->frag_off = htons(IP_DF); - niph->protocol = IPPROTO_TCP; + niph->protocol = protocol; niph->check = 0; niph->saddr = oiph->daddr; niph->daddr = oiph->saddr; + niph->ttl = ttl; + + nskb->protocol = htons(ETH_P_IP); + + return niph; +} +EXPORT_SYMBOL_GPL(nf_reject_iphdr_put); + +void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, + const struct tcphdr *oth) +{ + struct iphdr *niph = ip_hdr(nskb); + struct tcphdr *tcph; skb_reset_transport_header(nskb); tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); @@ -69,9 +78,9 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) tcph->dest = oth->source; tcph->doff = sizeof(struct tcphdr) / 4; - if (oth->ack) + if (oth->ack) { tcph->seq = oth->ack_seq; - else { + } else { tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + oldskb->len - ip_hdrlen(oldskb) - (oth->doff << 2)); @@ -84,16 +93,43 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) nskb->ip_summed = CHECKSUM_PARTIAL; nskb->csum_start = (unsigned char *)tcph - nskb->head; nskb->csum_offset = offsetof(struct tcphdr, check); +} +EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put); + +/* Send RST reply */ +void nf_send_reset(struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + const struct iphdr *oiph; + struct iphdr *niph; + const struct tcphdr *oth; + struct tcphdr _oth; + + oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook); + if (!oth) + return; + + if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) + return; + + oiph = ip_hdr(oldskb); + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; /* ip_route_me_harder expects skb->dst to be set */ skb_dst_set_noref(nskb, skb_dst(oldskb)); - nskb->protocol = htons(ETH_P_IP); + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, + ip4_dst_hoplimit(skb_dst(nskb))); + nf_reject_ip_tcphdr_put(nskb, oldskb, oth); + if (ip_route_me_harder(nskb, RTN_UNSPEC)) goto free_nskb; - niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); - /* "Never happens" */ if (nskb->len > dst_mtu(skb_dst(nskb))) goto free_nskb; -- cgit v1.2.3 From 8bfcdf6671b1c8006c52c3eaf9fd1b5dfcf41c3d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 26 Oct 2014 12:35:54 +0100 Subject: netfilter: nf_reject_ipv6: split nf_send_reset6() in smaller functions That can be reused by the reject bridge expression to build the reject packet. The new functions are: * nf_reject_ip6_tcphdr_get(): to sanitize and to obtain the TCP header. * nf_reject_ip6hdr_put(): to build the IPv6 header. * nf_reject_ip6_tcphdr_put(): to build the TCP header. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/ipv6/nf_reject.h | 10 ++ net/ipv6/netfilter/nf_reject_ipv6.c | 175 ++++++++++++++++++++------------- 2 files changed, 119 insertions(+), 66 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h index 48e18810a9be..23216d48abf9 100644 --- a/include/net/netfilter/ipv6/nf_reject.h +++ b/include/net/netfilter/ipv6/nf_reject.h @@ -15,4 +15,14 @@ nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code, void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook); +const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *otcph, + unsigned int *otcplen, int hook); +struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __be16 protocol, int hoplimit); +void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + const struct tcphdr *oth, unsigned int otcplen); + #endif /* _IPV6_NF_REJECT_H */ diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 20d9defc6c59..015eb8a80766 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -12,116 +12,102 @@ #include #include #include +#include -void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) +const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, + struct tcphdr *otcph, + unsigned int *otcplen, int hook) { - struct sk_buff *nskb; - struct tcphdr otcph, *tcph; - unsigned int otcplen, hh_len; - int tcphoff, needs_ack; const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); - struct ipv6hdr *ip6h; -#define DEFAULT_TOS_VALUE 0x0U - const __u8 tclass = DEFAULT_TOS_VALUE; - struct dst_entry *dst = NULL; u8 proto; __be16 frag_off; - struct flowi6 fl6; - - if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || - (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { - pr_debug("addr is not unicast.\n"); - return; - } + int tcphoff; proto = oip6h->nexthdr; - tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off); + tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), + &proto, &frag_off); if ((tcphoff < 0) || (tcphoff > oldskb->len)) { pr_debug("Cannot get TCP header.\n"); - return; + return NULL; } - otcplen = oldskb->len - tcphoff; + *otcplen = oldskb->len - tcphoff; /* IP header checks: fragment, too short. */ - if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) { - pr_debug("proto(%d) != IPPROTO_TCP, " - "or too short. otcplen = %d\n", - proto, otcplen); - return; + if (proto != IPPROTO_TCP || *otcplen < sizeof(struct tcphdr)) { + pr_debug("proto(%d) != IPPROTO_TCP or too short (len = %d)\n", + proto, *otcplen); + return NULL; } - if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr))) - BUG(); + otcph = skb_header_pointer(oldskb, tcphoff, sizeof(struct tcphdr), + otcph); + if (otcph == NULL) + return NULL; /* No RST for RST. */ - if (otcph.rst) { + if (otcph->rst) { pr_debug("RST is set\n"); - return; + return NULL; } /* Check checksum. */ if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) { pr_debug("TCP checksum is invalid\n"); - return; - } - - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_TCP; - fl6.saddr = oip6h->daddr; - fl6.daddr = oip6h->saddr; - fl6.fl6_sport = otcph.dest; - fl6.fl6_dport = otcph.source; - security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); - dst = ip6_route_output(net, NULL, &fl6); - if (dst == NULL || dst->error) { - dst_release(dst); - return; - } - dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); - if (IS_ERR(dst)) - return; - - hh_len = (dst->dev->hard_header_len + 15)&~15; - nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) + dst->trailer_len, - GFP_ATOMIC); - - if (!nskb) { - net_dbg_ratelimited("cannot alloc skb\n"); - dst_release(dst); - return; + return NULL; } - skb_dst_set(nskb, dst); + return otcph; +} +EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_get); - skb_reserve(nskb, hh_len + dst->header_len); +struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + __be16 protocol, int hoplimit) +{ + struct ipv6hdr *ip6h; + const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); +#define DEFAULT_TOS_VALUE 0x0U + const __u8 tclass = DEFAULT_TOS_VALUE; skb_put(nskb, sizeof(struct ipv6hdr)); skb_reset_network_header(nskb); ip6h = ipv6_hdr(nskb); ip6_flow_hdr(ip6h, tclass, 0); - ip6h->hop_limit = ip6_dst_hoplimit(dst); - ip6h->nexthdr = IPPROTO_TCP; + ip6h->hop_limit = hoplimit; + ip6h->nexthdr = protocol; ip6h->saddr = oip6h->daddr; ip6h->daddr = oip6h->saddr; + nskb->protocol = htons(ETH_P_IPV6); + + return ip6h; +} +EXPORT_SYMBOL_GPL(nf_reject_ip6hdr_put); + +void nf_reject_ip6_tcphdr_put(struct sk_buff *nskb, + const struct sk_buff *oldskb, + const struct tcphdr *oth, unsigned int otcplen) +{ + struct tcphdr *tcph; + int needs_ack; + skb_reset_transport_header(nskb); tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); /* Truncate to length (no data) */ tcph->doff = sizeof(struct tcphdr)/4; - tcph->source = otcph.dest; - tcph->dest = otcph.source; + tcph->source = oth->dest; + tcph->dest = oth->source; - if (otcph.ack) { + if (oth->ack) { needs_ack = 0; - tcph->seq = otcph.ack_seq; + tcph->seq = oth->ack_seq; tcph->ack_seq = 0; } else { needs_ack = 1; - tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin - + otcplen - (otcph.doff<<2)); + tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + + otcplen - (oth->doff<<2)); tcph->seq = 0; } @@ -139,6 +125,63 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) sizeof(struct tcphdr), IPPROTO_TCP, csum_partial(tcph, sizeof(struct tcphdr), 0)); +} +EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_put); + +void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + struct tcphdr _otcph; + const struct tcphdr *otcph; + unsigned int otcplen, hh_len; + const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); + struct ipv6hdr *ip6h; + struct dst_entry *dst = NULL; + struct flowi6 fl6; + + if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || + (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { + pr_debug("addr is not unicast.\n"); + return; + } + + otcph = nf_reject_ip6_tcphdr_get(oldskb, &_otcph, &otcplen, hook); + if (!otcph) + return; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + fl6.saddr = oip6h->daddr; + fl6.daddr = oip6h->saddr; + fl6.fl6_sport = otcph->dest; + fl6.fl6_dport = otcph->source; + security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); + dst = ip6_route_output(net, NULL, &fl6); + if (dst == NULL || dst->error) { + dst_release(dst); + return; + } + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + if (IS_ERR(dst)) + return; + + hh_len = (dst->dev->hard_header_len + 15)&~15; + nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) + + sizeof(struct tcphdr) + dst->trailer_len, + GFP_ATOMIC); + + if (!nskb) { + net_dbg_ratelimited("cannot alloc skb\n"); + dst_release(dst); + return; + } + + skb_dst_set(nskb, dst); + + skb_reserve(nskb, hh_len + dst->header_len); + ip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, + ip6_dst_hoplimit(dst)); + nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen); nf_ct_attach(nskb, oldskb); -- cgit v1.2.3 From 523b929d5446c023e1219aa81455a8c766cac883 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 25 Oct 2014 18:40:26 +0200 Subject: netfilter: nft_reject_bridge: don't use IP stack to reject traffic If the packet is received via the bridge stack, this cannot reject packets from the IP stack. This adds functions to build the reject packet and send it from the bridge stack. Comments and assumptions on this patch: 1) Validate the IPv4 and IPv6 headers before further processing, given that the packet comes from the bridge stack, we cannot assume they are clean. Truncated packets are dropped, we follow similar approach in the existing iptables match/target extensions that need to inspect layer 4 headers that is not available. This also includes packets that are directed to multicast and broadcast ethernet addresses. 2) br_deliver() is exported to inject the reject packet via bridge localout -> postrouting. So the approach is similar to what we already do in the iptables reject target. The reject packet is sent to the bridge port from which we have received the original packet. 3) The reject packet is forged based on the original packet. The TTL is set based on sysctl_ip_default_ttl for IPv4 and per-net ipv6.devconf_all hoplimit for IPv6. Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_forward.c | 1 + net/bridge/netfilter/nft_reject_bridge.c | 263 +++++++++++++++++++++++++++++-- 2 files changed, 254 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 992ec49a96aa..44cb786b925a 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -112,6 +112,7 @@ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) kfree_skb(skb); } +EXPORT_SYMBOL_GPL(br_deliver); /* called with rcu_read_lock */ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0) diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index a76479535df2..31b27e1bab9f 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -16,6 +16,237 @@ #include #include #include +#include +#include +#include "../br_private.h" + +static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, + struct sk_buff *nskb) +{ + struct ethhdr *eth; + + eth = (struct ethhdr *)skb_push(nskb, ETH_HLEN); + skb_reset_mac_header(nskb); + ether_addr_copy(eth->h_source, eth_hdr(oldskb)->h_dest); + ether_addr_copy(eth->h_dest, eth_hdr(oldskb)->h_source); + eth->h_proto = eth_hdr(oldskb)->h_proto; + skb_pull(nskb, ETH_HLEN); +} + +static int nft_reject_iphdr_validate(struct sk_buff *oldskb) +{ + struct iphdr *iph; + u32 len; + + if (!pskb_may_pull(oldskb, sizeof(struct iphdr))) + return 0; + + iph = ip_hdr(oldskb); + if (iph->ihl < 5 || iph->version != 4) + return 0; + + len = ntohs(iph->tot_len); + if (oldskb->len < len) + return 0; + else if (len < (iph->ihl*4)) + return 0; + + if (!pskb_may_pull(oldskb, iph->ihl*4)) + return 0; + + return 1; +} + +static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + struct iphdr *niph; + const struct tcphdr *oth; + struct tcphdr _oth; + + if (!nft_reject_iphdr_validate(oldskb)) + return; + + oth = nf_reject_ip_tcphdr_get(oldskb, &_oth, hook); + if (!oth) + return; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, + sysctl_ip_default_ttl); + nf_reject_ip_tcphdr_put(nskb, oldskb, oth); + niph->ttl = sysctl_ip_default_ttl; + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_deliver(br_port_get_rcu(oldskb->dev), nskb); +} + +static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, int hook, + u8 code) +{ + struct sk_buff *nskb; + struct iphdr *niph; + struct icmphdr *icmph; + unsigned int len; + void *payload; + __wsum csum; + + if (!nft_reject_iphdr_validate(oldskb)) + return; + + /* IP header checks: fragment. */ + if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) + return; + + /* RFC says return as much as we can without exceeding 576 bytes. */ + len = min_t(unsigned int, 536, oldskb->len); + + if (!pskb_may_pull(oldskb, len)) + return; + + if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), 0)) + return; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmphdr) + + LL_MAX_HEADER + len, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP, + sysctl_ip_default_ttl); + + skb_reset_transport_header(nskb); + icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr)); + memset(icmph, 0, sizeof(*icmph)); + icmph->type = ICMP_DEST_UNREACH; + icmph->code = code; + + payload = skb_put(nskb, len); + memcpy(payload, skb_network_header(oldskb), len); + + csum = csum_partial((void *)icmph, len + sizeof(struct icmphdr), 0); + icmph->checksum = csum_fold(csum); + + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_deliver(br_port_get_rcu(oldskb->dev), nskb); +} + +static int nft_reject_ip6hdr_validate(struct sk_buff *oldskb) +{ + struct ipv6hdr *hdr; + u32 pkt_len; + + if (!pskb_may_pull(oldskb, sizeof(struct ipv6hdr))) + return 0; + + hdr = ipv6_hdr(oldskb); + if (hdr->version != 6) + return 0; + + pkt_len = ntohs(hdr->payload_len); + if (pkt_len + sizeof(struct ipv6hdr) > oldskb->len) + return 0; + + return 1; +} + +static void nft_reject_br_send_v6_tcp_reset(struct net *net, + struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + const struct tcphdr *oth; + struct tcphdr _oth; + unsigned int otcplen; + struct ipv6hdr *nip6h; + + if (!nft_reject_ip6hdr_validate(oldskb)) + return; + + oth = nf_reject_ip6_tcphdr_get(oldskb, &_oth, &otcplen, hook); + if (!oth) + return; + + nskb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP, + net->ipv6.devconf_all->hop_limit); + nf_reject_ip6_tcphdr_put(nskb, oldskb, oth, otcplen); + nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_deliver(br_port_get_rcu(oldskb->dev), nskb); +} + +static void nft_reject_br_send_v6_unreach(struct net *net, + struct sk_buff *oldskb, int hook, + u8 code) +{ + struct sk_buff *nskb; + struct ipv6hdr *nip6h; + struct icmp6hdr *icmp6h; + unsigned int len; + void *payload; + + if (!nft_reject_ip6hdr_validate(oldskb)) + return; + + /* Include "As much of invoking packet as possible without the ICMPv6 + * packet exceeding the minimum IPv6 MTU" in the ICMP payload. + */ + len = min_t(unsigned int, 1220, oldskb->len); + + if (!pskb_may_pull(oldskb, len)) + return; + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct icmp6hdr) + + LL_MAX_HEADER + len, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_ICMPV6, + net->ipv6.devconf_all->hop_limit); + + skb_reset_transport_header(nskb); + icmp6h = (struct icmp6hdr *)skb_put(nskb, sizeof(struct icmp6hdr)); + memset(icmp6h, 0, sizeof(*icmp6h)); + icmp6h->icmp6_type = ICMPV6_DEST_UNREACH; + icmp6h->icmp6_code = code; + + payload = skb_put(nskb, len); + memcpy(payload, skb_network_header(oldskb), len); + nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr)); + + icmp6h->icmp6_cksum = + csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, + nskb->len - sizeof(struct ipv6hdr), + IPPROTO_ICMPV6, + csum_partial(icmp6h, + nskb->len - sizeof(struct ipv6hdr), + 0)); + + nft_reject_br_push_etherhdr(oldskb, nskb); + + br_deliver(br_port_get_rcu(oldskb->dev), nskb); +} static void nft_reject_bridge_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1], @@ -23,35 +254,46 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, { struct nft_reject *priv = nft_expr_priv(expr); struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); + const unsigned char *dest = eth_hdr(pkt->skb)->h_dest; + + if (is_broadcast_ether_addr(dest) || + is_multicast_ether_addr(dest)) + goto out; switch (eth_hdr(pkt->skb)->h_proto) { case htons(ETH_P_IP): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach(pkt->skb, priv->icmp_code); + nft_reject_br_send_v4_unreach(pkt->skb, + pkt->ops->hooknum, + priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nf_send_reset(pkt->skb, pkt->ops->hooknum); + nft_reject_br_send_v4_tcp_reset(pkt->skb, + pkt->ops->hooknum); break; case NFT_REJECT_ICMPX_UNREACH: - nf_send_unreach(pkt->skb, - nft_reject_icmp_code(priv->icmp_code)); + nft_reject_br_send_v4_unreach(pkt->skb, + pkt->ops->hooknum, + nft_reject_icmp_code(priv->icmp_code)); break; } break; case htons(ETH_P_IPV6): switch (priv->type) { case NFT_REJECT_ICMP_UNREACH: - nf_send_unreach6(net, pkt->skb, priv->icmp_code, - pkt->ops->hooknum); + nft_reject_br_send_v6_unreach(net, pkt->skb, + pkt->ops->hooknum, + priv->icmp_code); break; case NFT_REJECT_TCP_RST: - nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); + nft_reject_br_send_v6_tcp_reset(net, pkt->skb, + pkt->ops->hooknum); break; case NFT_REJECT_ICMPX_UNREACH: - nf_send_unreach6(net, pkt->skb, - nft_reject_icmpv6_code(priv->icmp_code), - pkt->ops->hooknum); + nft_reject_br_send_v6_unreach(net, pkt->skb, + pkt->ops->hooknum, + nft_reject_icmpv6_code(priv->icmp_code)); break; } break; @@ -59,6 +301,7 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, /* No explicit way to reject this protocol, drop it. */ break; } +out: data[NFT_REG_VERDICT].verdict = NF_DROP; } -- cgit v1.2.3 From 127917c29a432c3b798e014a1714e9c1af0f87fe Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 27 Oct 2014 14:08:17 +0100 Subject: netfilter: nft_reject_bridge: restrict reject to prerouting and input Restrict the reject expression to the prerouting and input bridge hooks. If we allow this to be used from forward or any other later bridge hook, if the frame is flooded to several ports, we'll end up sending several reject packets, one per cloned packet. Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nft_reject_bridge.c | 33 +++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 31b27e1bab9f..654c9018e3e7 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "../br_private.h" static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, @@ -305,12 +306,34 @@ out: data[NFT_REG_VERDICT].verdict = NF_DROP; } +static int nft_reject_bridge_validate_hooks(const struct nft_chain *chain) +{ + struct nft_base_chain *basechain; + + if (chain->flags & NFT_BASE_CHAIN) { + basechain = nft_base_chain(chain); + + switch (basechain->ops[0].hooknum) { + case NF_BR_PRE_ROUTING: + case NF_BR_LOCAL_IN: + break; + default: + return -EOPNOTSUPP; + } + } + return 0; +} + static int nft_reject_bridge_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_reject *priv = nft_expr_priv(expr); - int icmp_code; + int icmp_code, err; + + err = nft_reject_bridge_validate_hooks(ctx->chain); + if (err < 0) + return err; if (tb[NFTA_REJECT_TYPE] == NULL) return -EINVAL; @@ -359,6 +382,13 @@ nla_put_failure: return -1; } +static int nft_reject_bridge_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_reject_bridge_validate_hooks(ctx->chain); +} + static struct nft_expr_type nft_reject_bridge_type; static const struct nft_expr_ops nft_reject_bridge_ops = { .type = &nft_reject_bridge_type, @@ -366,6 +396,7 @@ static const struct nft_expr_ops nft_reject_bridge_ops = { .eval = nft_reject_bridge_eval, .init = nft_reject_bridge_init, .dump = nft_reject_bridge_dump, + .validate = nft_reject_bridge_validate, }; static struct nft_expr_type nft_reject_bridge_type __read_mostly = { -- cgit v1.2.3 From f7065f4bd3fe4ad6bf7e49ba7c68baa2c7046146 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 30 Oct 2014 00:49:57 -0700 Subject: mpls: Fix mpls_gso handler. mpls gso handler needs to pull skb after segmenting skb. CC: Simon Horman Signed-off-by: Pravin B Shelar Acked-by: Simon Horman Signed-off-by: David S. Miller --- net/mpls/mpls_gso.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index f0f5309a2d72..e3545f21a099 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -59,8 +59,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, * above pulled. It will be re-pushed after returning * skb_mac_gso_segment(), an indirect caller of this function. */ - __skb_push(skb, skb->data - skb_mac_header(skb)); - + __skb_pull(skb, skb->data - skb_mac_header(skb)); out: return segs; } -- cgit v1.2.3 From de05c400f7dfa566f598140f8604a5de8067cd5f Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 30 Oct 2014 00:50:04 -0700 Subject: mpls: Allow mpls_gso to be built as module Kconfig already allows mpls to be built as module. Following patch fixes Makefile to do same. CC: Simon Horman Signed-off-by: Pravin B Shelar Acked-by: Simon Horman Signed-off-by: David S. Miller --- net/mpls/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mpls/Makefile b/net/mpls/Makefile index 0a3c171be537..6dec088c2d0f 100644 --- a/net/mpls/Makefile +++ b/net/mpls/Makefile @@ -1,4 +1,4 @@ # # Makefile for MPLS. # -obj-y += mpls_gso.o +obj-$(CONFIG_NET_MPLS_GSO) += mpls_gso.o -- cgit v1.2.3 From e0fb6fb6d52686134b2ece144060219591d4f8d3 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 30 Oct 2014 20:50:15 -0700 Subject: net: ethtool: Return -EOPNOTSUPP if user space tries to read EEPROM with lengh 0 If a driver supports reading EEPROM but no EEPROM is installed in the system, the driver's get_eeprom_len function returns 0. ethtool will subsequently try to read that zero-length EEPROM anyway. If the driver does not support EEPROM access at all, this operation will return -EOPNOTSUPP. If the driver does support EEPROM access but no EEPROM is installed, the operation will return -EINVAL. Return -EOPNOTSUPP in both cases for consistency. Signed-off-by: Guenter Roeck Tested-by: Andrew Lunn Signed-off-by: David S. Miller --- net/core/ethtool.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 1600aa24d36b..06dfb293e5aa 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1036,7 +1036,8 @@ static int ethtool_get_eeprom(struct net_device *dev, void __user *useraddr) { const struct ethtool_ops *ops = dev->ethtool_ops; - if (!ops->get_eeprom || !ops->get_eeprom_len) + if (!ops->get_eeprom || !ops->get_eeprom_len || + !ops->get_eeprom_len(dev)) return -EOPNOTSUPP; return ethtool_get_any_eeprom(dev, useraddr, ops->get_eeprom, @@ -1052,7 +1053,8 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) u8 *data; int ret = 0; - if (!ops->set_eeprom || !ops->get_eeprom_len) + if (!ops->set_eeprom || !ops->get_eeprom_len || + !ops->get_eeprom_len(dev)) return -EOPNOTSUPP; if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) -- cgit v1.2.3