summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/ip_forward.c2
-rw-r--r--net/ipv4/ip_gre.c5
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/ip_tunnel.c13
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c15
-rw-r--r--net/ipv4/netfilter/nf_flow_table_ipv4.c3
-rw-r--r--net/ipv4/route.c18
-rw-r--r--net/ipv4/tcp_illinois.c2
-rw-r--r--net/ipv4/tcp_input.c24
-rw-r--r--net/ipv4/xfrm4_output.c3
10 files changed, 43 insertions, 44 deletions
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 2dd21c3281a1..b54b948b0596 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -55,7 +55,7 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
if (skb->ignore_df)
return false;
- if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+ if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
return false;
return true;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 45d97e9b2759..0901de42ed85 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -970,9 +970,6 @@ static void __gre_tunnel_init(struct net_device *dev)
t_hlen = tunnel->hlen + sizeof(struct iphdr);
- dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
- dev->mtu = ETH_DATA_LEN - t_hlen - 4;
-
dev->features |= GRE_FEATURES;
dev->hw_features |= GRE_FEATURES;
@@ -1290,8 +1287,6 @@ static int erspan_tunnel_init(struct net_device *dev)
erspan_hdr_len(tunnel->erspan_ver);
t_hlen = tunnel->hlen + sizeof(struct iphdr);
- dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
- dev->mtu = ETH_DATA_LEN - t_hlen - 4;
dev->features |= GRE_FEATURES;
dev->hw_features |= GRE_FEATURES;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e8e675be60ec..66340ab750e6 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -248,7 +248,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
/* common case: seglen is <= mtu
*/
- if (skb_gso_validate_mtu(skb, mtu))
+ if (skb_gso_validate_network_len(skb, mtu))
return ip_finish_output2(net, sk, skb);
/* Slowpath - GSO segment length exceeds the egress MTU.
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index d786a8441bce..6d21068f9b55 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -710,16 +710,9 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
}
- if (tunnel->fwmark) {
- init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
- tunnel->fwmark);
- }
- else {
- init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
- skb->mark);
- }
+ init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
+ tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
+ tunnel->fwmark);
if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
goto tx_error;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 4b02ab39ebc5..8a8ae61cea71 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -232,7 +232,6 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
c->hash_mode = i->hash_mode;
c->hash_initval = i->hash_initval;
refcount_set(&c->refcount, 1);
- refcount_set(&c->entries, 1);
spin_lock_bh(&cn->lock);
if (__clusterip_config_find(net, ip)) {
@@ -263,8 +262,10 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
c->notifier.notifier_call = clusterip_netdev_event;
err = register_netdevice_notifier(&c->notifier);
- if (!err)
+ if (!err) {
+ refcount_set(&c->entries, 1);
return c;
+ }
#ifdef CONFIG_PROC_FS
proc_remove(c->pde);
@@ -273,7 +274,7 @@ err:
spin_lock_bh(&cn->lock);
list_del_rcu(&c->list);
spin_unlock_bh(&cn->lock);
- kfree(c);
+ clusterip_config_put(c);
return ERR_PTR(err);
}
@@ -496,12 +497,15 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
return PTR_ERR(config);
}
}
- cipinfo->config = config;
ret = nf_ct_netns_get(par->net, par->family);
- if (ret < 0)
+ if (ret < 0) {
pr_info("cannot load conntrack support for proto=%u\n",
par->family);
+ clusterip_config_entry_put(par->net, config);
+ clusterip_config_put(config);
+ return ret;
+ }
if (!par->net->xt.clusterip_deprecated_warning) {
pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, "
@@ -509,6 +513,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
par->net->xt.clusterip_deprecated_warning = true;
}
+ cipinfo->config = config;
return ret;
}
diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
index 25d2975da156..0cd46bffa469 100644
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -111,6 +111,7 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
default:
return -1;
}
+ csum_replace4(&iph->check, addr, new_addr);
return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
}
@@ -185,7 +186,7 @@ static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
if ((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0)
return false;
- if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+ if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
return false;
return true;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a4f44d815a61..860b3fd2f54b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -128,10 +128,11 @@ static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
static int ip_rt_error_cost __read_mostly = HZ;
static int ip_rt_error_burst __read_mostly = 5 * HZ;
static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
-static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
+static u32 ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
static int ip_rt_min_advmss __read_mostly = 256;
static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
+
/*
* Interface to generic destination cache.
*/
@@ -930,14 +931,23 @@ out_put_peer:
static int ip_error(struct sk_buff *skb)
{
- struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
struct rtable *rt = skb_rtable(skb);
+ struct net_device *dev = skb->dev;
+ struct in_device *in_dev;
struct inet_peer *peer;
unsigned long now;
struct net *net;
bool send;
int code;
+ if (netif_is_l3_master(skb->dev)) {
+ dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
+ if (!dev)
+ goto out;
+ }
+
+ in_dev = __in_dev_get_rcu(dev);
+
/* IP on this device is disabled. */
if (!in_dev)
goto out;
@@ -2818,6 +2828,7 @@ void ip_rt_multicast_event(struct in_device *in_dev)
static int ip_rt_gc_interval __read_mostly = 60 * HZ;
static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
static int ip_rt_gc_elasticity __read_mostly = 8;
+static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU;
static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
void __user *buffer,
@@ -2933,7 +2944,8 @@ static struct ctl_table ipv4_route_table[] = {
.data = &ip_rt_min_pmtu,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &ip_min_valid_pmtu,
},
{
.procname = "min_adv_mss",
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 7c843578f233..faddf4f9a707 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -6,7 +6,7 @@
* The algorithm is described in:
* "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm
* for High-Speed Networks"
- * http://www.ifp.illinois.edu/~srikant/Papers/liubassri06perf.pdf
+ * http://tamerbasar.csl.illinois.edu/LiuBasarSrikantPerfEvalArtJun2008.pdf
*
* Implemented from description in paper and ns-2 simulation.
* Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org>
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 575d3c1fb6e8..9a1b3c1c1c14 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1971,11 +1971,6 @@ void tcp_enter_loss(struct sock *sk)
/* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous
* loss recovery is underway except recurring timeout(s) on
* the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
- *
- * In theory F-RTO can be used repeatedly during loss recovery.
- * In practice this interacts badly with broken middle-boxes that
- * falsely raise the receive window, which results in repeated
- * timeouts and stop-and-go behavior.
*/
tp->frto = net->ipv4.sysctl_tcp_frto &&
(new_recovery || icsk->icsk_retransmits) &&
@@ -2631,18 +2626,14 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
tcp_try_undo_loss(sk, false))
return;
- /* The ACK (s)acks some never-retransmitted data meaning not all
- * the data packets before the timeout were lost. Therefore we
- * undo the congestion window and state. This is essentially
- * the operation in F-RTO (RFC5682 section 3.1 step 3.b). Since
- * a retransmitted skb is permantly marked, we can apply such an
- * operation even if F-RTO was not used.
- */
- if ((flag & FLAG_ORIG_SACK_ACKED) &&
- tcp_try_undo_loss(sk, tp->undo_marker))
- return;
-
if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
+ /* Step 3.b. A timeout is spurious if not all data are
+ * lost, i.e., never-retransmitted data are (s)acked.
+ */
+ if ((flag & FLAG_ORIG_SACK_ACKED) &&
+ tcp_try_undo_loss(sk, true))
+ return;
+
if (after(tp->snd_nxt, tp->high_seq)) {
if (flag & FLAG_DATA_SACKED || is_dupack)
tp->frto = 0; /* Step 3.a. loss was real */
@@ -4001,6 +3992,7 @@ void tcp_reset(struct sock *sk)
/* This barrier is coupled with smp_rmb() in tcp_poll() */
smp_wmb();
+ tcp_write_queue_purge(sk);
tcp_done(sk);
if (!sock_flag(sk, SOCK_DEAD))
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 94b8702603bc..be980c195fc5 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -30,7 +30,8 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
mtu = dst_mtu(skb_dst(skb));
if ((!skb_is_gso(skb) && skb->len > mtu) ||
- (skb_is_gso(skb) && skb_gso_network_seglen(skb) > ip_skb_dst_mtu(skb->sk, skb))) {
+ (skb_is_gso(skb) &&
+ !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) {
skb->protocol = htons(ETH_P_IP);
if (skb->sk)