diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2011-04-21 09:45:37 +0000 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-04-28 13:16:35 -0700 |
commit | f6d8bd051c391c1c0458a30b2a7abcd939329259 (patch) | |
tree | 1dc4daecdeb0b42c2c6b59d7d6b41e091c11db5f /net/ipv4/af_inet.c | |
parent | 0a14842f5a3c0e88a1e59fac5c3025db39721f74 (diff) | |
download | lwn-f6d8bd051c391c1c0458a30b2a7abcd939329259.tar.gz lwn-f6d8bd051c391c1c0458a30b2a7abcd939329259.zip |
inet: add RCU protection to inet->opt
We lack proper synchronization to manipulate inet->opt ip_options
Problem is ip_make_skb() calls ip_setup_cork() and
ip_setup_cork() possibly makes a copy of ipc->opt (struct ip_options),
without any protection against another thread manipulating inet->opt.
Another thread can change inet->opt pointer and free old one under us.
Use RCU to protect inet->opt (changed to inet->inet_opt).
Instead of handling atomic refcounts, just copy ip_options when
necessary, to avoid cache line dirtying.
We cant insert an rcu_head in struct ip_options since its included in
skb->cb[], so this patch is large because I had to introduce a new
ip_options_rcu structure.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/af_inet.c')
-rw-r--r-- | net/ipv4/af_inet.c | 17 |
1 files changed, 12 insertions, 5 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 0413af3e2285..963a621e75c7 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -153,7 +153,7 @@ void inet_sock_destruct(struct sock *sk) WARN_ON(sk->sk_wmem_queued); WARN_ON(sk->sk_forward_alloc); - kfree(inet->opt); + kfree(rcu_dereference_protected(inet->inet_opt, 1)); dst_release(rcu_dereference_check(sk->sk_dst_cache, 1)); sk_refcnt_debug_dec(sk); } @@ -1106,9 +1106,12 @@ static int inet_sk_reselect_saddr(struct sock *sk) struct flowi4 fl4; struct rtable *rt; __be32 new_saddr; + struct ip_options_rcu *inet_opt; - if (inet->opt && inet->opt->srr) - daddr = inet->opt->faddr; + inet_opt = rcu_dereference_protected(inet->inet_opt, + sock_owned_by_user(sk)); + if (inet_opt && inet_opt->opt.srr) + daddr = inet_opt->opt.faddr; /* Query new route. */ rt = ip_route_connect(&fl4, daddr, 0, RT_CONN_FLAGS(sk), @@ -1148,6 +1151,7 @@ int inet_sk_rebuild_header(struct sock *sk) struct inet_sock *inet = inet_sk(sk); struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); __be32 daddr; + struct ip_options_rcu *inet_opt; int err; /* Route is OK, nothing to do. */ @@ -1155,9 +1159,12 @@ int inet_sk_rebuild_header(struct sock *sk) return 0; /* Reroute. */ + rcu_read_lock(); + inet_opt = rcu_dereference(inet->inet_opt); daddr = inet->inet_daddr; - if (inet->opt && inet->opt->srr) - daddr = inet->opt->faddr; + if (inet_opt && inet_opt->opt.srr) + daddr = inet_opt->opt.faddr; + rcu_read_unlock(); rt = ip_route_output_ports(sock_net(sk), sk, daddr, inet->inet_saddr, inet->inet_dport, inet->inet_sport, sk->sk_protocol, RT_CONN_FLAGS(sk), |