summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2014-06-24 10:05:11 -0700
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2014-07-28 08:00:04 -0700
commit86e48c03d774e01ccd71ecba4fc4b5c2bc0b5b41 (patch)
tree65a2d12414c21cbeb61e636e5360399a0c63b18d
parent1b56220b0df8f0963bacbf35637545b550484a64 (diff)
downloadlwn-86e48c03d774e01ccd71ecba4fc4b5c2bc0b5b41.tar.gz
lwn-86e48c03d774e01ccd71ecba4fc4b5c2bc0b5b41.zip
ipv4: fix dst race in sk_dst_get()
[ Upstream commit f88649721268999bdff09777847080a52004f691 ] When IP route cache had been removed in linux-3.6, we broke assumption that dst entries were all freed after rcu grace period. DST_NOCACHE dst were supposed to be freed from dst_release(). But it appears we want to keep such dst around, either in UDP sockets or tunnels. In sk_dst_get() we need to make sure dst refcount is not 0 before incrementing it, or else we might end up freeing a dst twice. DST_NOCACHE set on a dst does not mean this dst can not be attached to a socket or a tunnel. Then, before actual freeing, we need to observe a rcu grace period to make sure all other cpus can catch the fact the dst is no longer usable. Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: Dormando <dormando@rydia.net> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--include/net/sock.h4
-rw-r--r--net/core/dst.c16
2 files changed, 13 insertions, 7 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 72f710d2f75a..ff57aff205cd 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1727,8 +1727,8 @@ sk_dst_get(struct sock *sk)
rcu_read_lock();
dst = rcu_dereference(sk->sk_dst_cache);
- if (dst)
- dst_hold(dst);
+ if (dst && !atomic_inc_not_zero(&dst->__refcnt))
+ dst = NULL;
rcu_read_unlock();
return dst;
}
diff --git a/net/core/dst.c b/net/core/dst.c
index df9cc810ec8e..c0e021871df8 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -267,6 +267,15 @@ again:
}
EXPORT_SYMBOL(dst_destroy);
+static void dst_destroy_rcu(struct rcu_head *head)
+{
+ struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
+
+ dst = dst_destroy(dst);
+ if (dst)
+ __dst_free(dst);
+}
+
void dst_release(struct dst_entry *dst)
{
if (dst) {
@@ -274,11 +283,8 @@ void dst_release(struct dst_entry *dst)
newrefcnt = atomic_dec_return(&dst->__refcnt);
WARN_ON(newrefcnt < 0);
- if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) {
- dst = dst_destroy(dst);
- if (dst)
- __dst_free(dst);
- }
+ if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt)
+ call_rcu(&dst->rcu_head, dst_destroy_rcu);
}
}
EXPORT_SYMBOL(dst_release);