diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2010-04-29 11:01:49 +0000 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-05-01 15:00:15 -0700 |
commit | 43815482370c510c569fd18edb57afcb0fa8cab6 (patch) | |
tree | 063efaae3758402b84f056438b704d1de68f7837 /net/socket.c | |
parent | 83d7eb2979cd3390c375470225dd2d8f2009bc70 (diff) | |
download | lwn-43815482370c510c569fd18edb57afcb0fa8cab6.tar.gz lwn-43815482370c510c569fd18edb57afcb0fa8cab6.zip |
net: sock_def_readable() and friends RCU conversion
sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we
need two atomic operations (and associated dirtying) per incoming
packet.
RCU conversion is pretty much needed :
1) Add a new structure, called "struct socket_wq" to hold all fields
that will need rcu_read_lock() protection (currently: a
wait_queue_head_t and a struct fasync_struct pointer).
[Future patch will add a list anchor for wakeup coalescing]
2) Attach one of such structure to each "struct socket" created in
sock_alloc_inode().
3) Respect RCU grace period when freeing a "struct socket_wq"
4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct
socket_wq"
5) Change sk_sleep() function to use new sk->sk_wq instead of
sk->sk_sleep
6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside
a rcu_read_lock() section.
7) Change all sk_has_sleeper() callers to :
- Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock)
- Use wq_has_sleeper() to eventually wakeup tasks.
- Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock)
8) sock_wake_async() is modified to use rcu protection as well.
9) Exceptions :
macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq"
instead of dynamically allocated ones. They dont need rcu freeing.
Some cleanups or followups are probably needed, (possible
sk_callback_lock conversion to a spinlock for example...).
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/socket.c')
-rw-r--r-- | net/socket.c | 47 |
1 files changed, 36 insertions, 11 deletions
diff --git a/net/socket.c b/net/socket.c index cb7c1f6c0d6e..dae8c6b84a09 100644 --- a/net/socket.c +++ b/net/socket.c @@ -252,9 +252,14 @@ static struct inode *sock_alloc_inode(struct super_block *sb) ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); if (!ei) return NULL; - init_waitqueue_head(&ei->socket.wait); + ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL); + if (!ei->socket.wq) { + kmem_cache_free(sock_inode_cachep, ei); + return NULL; + } + init_waitqueue_head(&ei->socket.wq->wait); + ei->socket.wq->fasync_list = NULL; - ei->socket.fasync_list = NULL; ei->socket.state = SS_UNCONNECTED; ei->socket.flags = 0; ei->socket.ops = NULL; @@ -264,10 +269,21 @@ static struct inode *sock_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } + +static void wq_free_rcu(struct rcu_head *head) +{ + struct socket_wq *wq = container_of(head, struct socket_wq, rcu); + + kfree(wq); +} + static void sock_destroy_inode(struct inode *inode) { - kmem_cache_free(sock_inode_cachep, - container_of(inode, struct socket_alloc, vfs_inode)); + struct socket_alloc *ei; + + ei = container_of(inode, struct socket_alloc, vfs_inode); + call_rcu(&ei->socket.wq->rcu, wq_free_rcu); + kmem_cache_free(sock_inode_cachep, ei); } static void init_once(void *foo) @@ -513,7 +529,7 @@ void sock_release(struct socket *sock) module_put(owner); } - if (sock->fasync_list) + if (sock->wq->fasync_list) printk(KERN_ERR "sock_release: fasync list not empty!\n"); percpu_sub(sockets_in_use, 1); @@ -1080,9 +1096,9 @@ static int sock_fasync(int fd, struct file *filp, int on) lock_sock(sk); - fasync_helper(fd, filp, on, &sock->fasync_list); + fasync_helper(fd, filp, on, &sock->wq->fasync_list); - if (!sock->fasync_list) + if (!sock->wq->fasync_list) sock_reset_flag(sk, SOCK_FASYNC); else sock_set_flag(sk, SOCK_FASYNC); @@ -1091,12 +1107,20 @@ static int sock_fasync(int fd, struct file *filp, int on) return 0; } -/* This function may be called only under socket lock or callback_lock */ +/* This function may be called only under socket lock or callback_lock or rcu_lock */ int sock_wake_async(struct socket *sock, int how, int band) { - if (!sock || !sock->fasync_list) + struct socket_wq *wq; + + if (!sock) return -1; + rcu_read_lock(); + wq = rcu_dereference(sock->wq); + if (!wq || !wq->fasync_list) { + rcu_read_unlock(); + return -1; + } switch (how) { case SOCK_WAKE_WAITD: if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) @@ -1108,11 +1132,12 @@ int sock_wake_async(struct socket *sock, int how, int band) /* fall through */ case SOCK_WAKE_IO: call_kill: - kill_fasync(&sock->fasync_list, SIGIO, band); + kill_fasync(&wq->fasync_list, SIGIO, band); break; case SOCK_WAKE_URG: - kill_fasync(&sock->fasync_list, SIGURG, band); + kill_fasync(&wq->fasync_list, SIGURG, band); } + rcu_read_unlock(); return 0; } |