From f288bef46443eb3a0b212c1c57b222c0497e06f6 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Tue, 21 Aug 2012 11:16:57 +0800 Subject: tipc: fix race/inefficiencies in poll/wait behaviour When an application blocks at poll/select on a TIPC socket while requesting a specific event mask, both the filter_rcv() and wakeupdispatch() case will wake it up unconditionally whenever the state changes (i.e an incoming message arrives, or congestion has subsided). No mask is used. To avoid this, we populate sk->sk_data_ready and sk->sk_write_space with tipc_data_ready and tipc_write_space respectively, which makes tipc more in alignment with the rest of the networking code. These pass the exact set of possible events to the waker in fs/select.c hence avoiding waking up blocked processes unnecessarily. In doing so, we uncover another issue -- that there needs to be a memory barrier in these poll/receive callbacks, otherwise we are subject to the the same race as documented above wq_has_sleeper() [in commit a57de0b4 "net: adding memory barrier to the poll and receive callbacks"]. So we need to replace poll_wait() with sock_poll_wait() and use rcu protection for the sk->sk_wq pointer in these two new functions. Signed-off-by: Ying Xue Signed-off-by: Paul Gortmaker --- net/tipc/socket.c | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index fd5f042dbff4..b5fc8ed1d1fe 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -62,6 +62,8 @@ struct tipc_sock { static int backlog_rcv(struct sock *sk, struct sk_buff *skb); static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf); static void wakeupdispatch(struct tipc_port *tport); +static void tipc_data_ready(struct sock *sk, int len); +static void tipc_write_space(struct sock *sk); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; @@ -221,6 +223,8 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol, sock_init_data(sock, sk); sk->sk_backlog_rcv = backlog_rcv; sk->sk_rcvbuf = TIPC_FLOW_CONTROL_WIN * 2 * TIPC_MAX_USER_MSG_SIZE * 2; + sk->sk_data_ready = tipc_data_ready; + sk->sk_write_space = tipc_write_space; tipc_sk(sk)->p = tp_ptr; tipc_sk(sk)->conn_timeout = CONN_TIMEOUT_DEFAULT; @@ -435,7 +439,7 @@ static unsigned int poll(struct file *file, struct socket *sock, struct sock *sk = sock->sk; u32 mask = 0; - poll_wait(file, sk_sleep(sk), wait); + sock_poll_wait(file, sk_sleep(sk), wait); switch ((int)sock->state) { case SS_READY: @@ -1125,6 +1129,39 @@ exit: return sz_copied ? sz_copied : res; } +/** + * tipc_write_space - wake up thread if port congestion is released + * @sk: socket + */ +static void tipc_write_space(struct sock *sk) +{ + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | + POLLWRNORM | POLLWRBAND); + rcu_read_unlock(); +} + +/** + * tipc_data_ready - wake up threads to indicate messages have been received + * @sk: socket + * @len: the length of messages + */ +static void tipc_data_ready(struct sock *sk, int len) +{ + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_sync_poll(&wq->wait, POLLIN | + POLLRDNORM | POLLRDBAND); + rcu_read_unlock(); +} + /** * rx_queue_full - determine if receive queue can accept another message * @msg: message to be added to queue @@ -1222,8 +1259,7 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf) tipc_disconnect_port(tipc_sk_port(sk)); } - if (waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + sk->sk_data_ready(sk, 0); return TIPC_OK; } @@ -1290,8 +1326,7 @@ static void wakeupdispatch(struct tipc_port *tport) { struct sock *sk = (struct sock *)tport->usr_handle; - if (waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + sk->sk_write_space(sk); } /** -- cgit v1.2.3 From c4fc298ab44011a7f7a391bf00350acf481eeaeb Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Tue, 16 Oct 2012 16:47:06 +0200 Subject: tipc: return POLLOUT for sockets in an unconnected state If an implied connect is attempted on a nonblocking STREAM/SEQPACKET socket during link congestion, the connect message will be discarded and sendmsg will return EAGAIN. This is normal behavior, and the application is expected to poll the socket until POLLOUT is set, after which the connection attempt can be retried. However, the POLLOUT flag is never set for unconnected sockets and poll() always returns a zero mask. The application is then left without a trigger for when it can make another attempt at sending the message. The solution is to check if we're polling on an unconnected socket and set the POLLOUT flag if the TIPC port owned by this socket is not congested. The TIPC ports waiting on a specific link will be marked as 'not congested' when the link congestion have abated. Signed-off-by: Erik Hugne Signed-off-by: Paul Gortmaker --- net/tipc/socket.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b5fc8ed1d1fe..59adc76905e0 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -412,7 +412,7 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr, * socket state flags set * ------------ --------- * unconnected no read flags - * no write flags + * POLLOUT if port is not congested * * connecting POLLIN/POLLRDNORM if ACK/NACK in rx queue * no write flags @@ -442,6 +442,10 @@ static unsigned int poll(struct file *file, struct socket *sock, sock_poll_wait(file, sk_sleep(sk), wait); switch ((int)sock->state) { + case SS_UNCONNECTED: + if (!tipc_sk_port(sk)->congested) + mask |= POLLOUT; + break; case SS_READY: case SS_CONNECTED: if (!tipc_sk_port(sk)->congested) -- cgit v1.2.3 From 7503115107e5862870eaf5133627051b2e23ac0a Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 29 Oct 2012 09:38:15 -0400 Subject: tipc: wake up all waiting threads at socket shutdown When a socket is shut down, we should wake up all thread sleeping on it, instead of just one of them. Otherwise, when several threads are polling the same socket, and one of them does shutdown(), the remaining threads may end up sleeping forever. Also, to align socket usage with common practice in other stacks, we use one of the common socket callback handlers, sk_state_change(), to wake up pending users. This is similar to the usage in e.g. inet_shutdown(). [net/ipv4/af_inet.c]. Signed-off-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: Paul Gortmaker --- net/tipc/socket.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 59adc76905e0..1a720c86e80a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1595,10 +1595,11 @@ restart: case SS_DISCONNECTING: - /* Discard any unreceived messages; wake up sleeping tasks */ + /* Discard any unreceived messages */ discard_rx_queue(sk); - if (waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + + /* Wake up anyone sleeping in poll */ + sk->sk_state_change(sk); res = 0; break; -- cgit v1.2.3