From 98bdb0aa007ff7e8e0061936d8d0e210faf2e655 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 25 Jan 2011 08:17:48 -0800 Subject: libceph: fix socket read error handling If we get EAGAIN when trying to read from the socket, it is not an error. Return 0 from the helper in this case to simplify the error handling cases in the caller (indirectly, try_read). Fix try_read to pass any error to it's caller (con_work) instead of almost always returning 0. This let's us respond to things like socket disconnects. Signed-off-by: Sage Weil --- net/ceph/messenger.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index dff633d62e5b..d95576d40c98 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -252,8 +252,12 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) { struct kvec iov = {buf, len}; struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; + int r; - return kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags); + r = kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags); + if (r == -EAGAIN) + r = 0; + return r; } /* @@ -1821,19 +1825,17 @@ more: dout("try_read connecting\n"); ret = read_partial_banner(con); if (ret <= 0) - goto done; - if (process_banner(con) < 0) { - ret = -1; goto out; - } + ret = process_banner(con); + if (ret < 0) + goto out; } ret = read_partial_connect(con); if (ret <= 0) - goto done; - if (process_connect(con) < 0) { - ret = -1; goto out; - } + ret = process_connect(con); + if (ret < 0) + goto out; goto more; } @@ -1848,7 +1850,7 @@ more: dout("skipping %d / %d bytes\n", skip, -con->in_base_pos); ret = ceph_tcp_recvmsg(con->sock, buf, skip); if (ret <= 0) - goto done; + goto out; con->in_base_pos += ret; if (con->in_base_pos) goto more; @@ -1859,7 +1861,7 @@ more: */ ret = ceph_tcp_recvmsg(con->sock, &con->in_tag, 1); if (ret <= 0) - goto done; + goto out; dout("try_read got tag %d\n", (int)con->in_tag); switch (con->in_tag) { case CEPH_MSGR_TAG_MSG: @@ -1870,7 +1872,7 @@ more: break; case CEPH_MSGR_TAG_CLOSE: set_bit(CLOSED, &con->state); /* fixme */ - goto done; + goto out; default: goto bad_tag; } @@ -1882,13 +1884,12 @@ more: case -EBADMSG: con->error_msg = "bad crc"; ret = -EIO; - goto out; + break; case -EIO: con->error_msg = "io error"; - goto out; - default: - goto done; + break; } + goto out; } if (con->in_tag == CEPH_MSGR_TAG_READY) goto more; @@ -1898,15 +1899,13 @@ more: if (con->in_tag == CEPH_MSGR_TAG_ACK) { ret = read_partial_ack(con); if (ret <= 0) - goto done; + goto out; process_ack(con); goto more; } -done: - ret = 0; out: - dout("try_read done on %p\n", con); + dout("try_read done on %p ret %d\n", con, ret); return ret; bad_tag: -- cgit v1.2.3 From 42961d2333a1855c649fa3790e258ab4f0fa66a4 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 25 Jan 2011 08:19:34 -0800 Subject: libceph: fix socket write error handling Pass errors from writing to the socket up the stack. If we get -EAGAIN, return 0 from the helper to simplify the callers' checks. Signed-off-by: Sage Weil --- net/ceph/messenger.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index d95576d40c98..35b36b86d762 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -268,13 +268,17 @@ static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, size_t kvlen, size_t len, int more) { struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; + int r; if (more) msg.msg_flags |= MSG_MORE; else msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */ - return kernel_sendmsg(sock, &msg, iov, kvlen, len); + r = kernel_sendmsg(sock, &msg, iov, kvlen, len); + if (r == -EAGAIN) + r = 0; + return r; } @@ -851,6 +855,8 @@ static int write_partial_msg_pages(struct ceph_connection *con) (msg->pages || msg->pagelist || msg->bio || in_trail)) kunmap(page); + if (ret == -EAGAIN) + ret = 0; if (ret <= 0) goto out; @@ -1741,16 +1747,12 @@ more_kvec: if (con->out_skip) { ret = write_partial_skip(con); if (ret <= 0) - goto done; - if (ret < 0) { - dout("try_write write_partial_skip err %d\n", ret); - goto done; - } + goto out; } if (con->out_kvec_left) { ret = write_partial_kvec(con); if (ret <= 0) - goto done; + goto out; } /* msg pages? */ @@ -1765,11 +1767,11 @@ more_kvec: if (ret == 1) goto more_kvec; /* we need to send the footer, too! */ if (ret == 0) - goto done; + goto out; if (ret < 0) { dout("try_write write_partial_msg_pages err %d\n", ret); - goto done; + goto out; } } @@ -1793,10 +1795,9 @@ do_next: /* Nothing to do! */ clear_bit(WRITE_PENDING, &con->state); dout("try_write nothing else to write.\n"); -done: ret = 0; out: - dout("try_write done on %p\n", con); + dout("try_write done on %p ret %d\n", con, ret); return ret; } -- cgit v1.2.3 From 5f04d5068a90602b93a7953e9a47c496705c6976 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 20 Feb 2011 11:49:45 -0800 Subject: net: Fix more stale on-stack list_head objects. From: Eric W. Biederman In the beginning with batching unreg_list was a list that was used only once in the lifetime of a network device (I think). Now we have calls using the unreg_list that can happen multiple times in the life of a network device like dev_deactivate and dev_close that are also using the unreg_list. In addition in unregister_netdevice_queue we also do a list_move because for devices like veth pairs it is possible that unregister_netdevice_queue will be called multiple times. So I think the change below to fix dev_deactivate which Eric D. missed will fix this problem. Now to go test that. Signed-off-by: David S. Miller --- net/mac80211/iface.c | 1 + net/sched/sch_generic.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 8acba456744e..7a10a8d1b2d0 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1229,6 +1229,7 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) } mutex_unlock(&local->iflist_mtx); unregister_netdevice_many(&unreg_list); + list_del(&unreg_list); } static u32 ieee80211_idle_off(struct ieee80211_local *local, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 34dc598440a2..1bc698039ae2 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -839,6 +839,7 @@ void dev_deactivate(struct net_device *dev) list_add(&dev->unreg_list, &single); dev_deactivate_many(&single); + list_del(&single); } static void dev_init_scheduler_queue(struct net_device *dev, -- cgit v1.2.3 From c24f691b56107feeba076616982093ee2d3c8fb5 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 7 Feb 2011 12:57:04 +0000 Subject: tcp: undo_retrans counter fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a bug that undo_retrans is incorrectly decremented when undo_marker is not set or undo_retrans is already 0. This happens when sender receives more DSACK ACKs than packets retransmitted during the current undo phase. This may also happen when sender receives DSACK after the undo operation is completed or cancelled. Fix another bug that undo_retrans is incorrectly incremented when sender retransmits an skb and tcp_skb_pcount(skb) > 1 (TSO). This case is rare but not impossible. Signed-off-by: Yuchung Cheng Acked-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 5 +++-- net/ipv4/tcp_output.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eb7f82ebf4a3..65f6c0406245 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1222,7 +1222,7 @@ static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, } /* D-SACK for already forgotten data... Do dumb counting. */ - if (dup_sack && + if (dup_sack && tp->undo_marker && tp->undo_retrans && !after(end_seq_0, prior_snd_una) && after(end_seq_0, tp->undo_marker)) tp->undo_retrans--; @@ -1299,7 +1299,8 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, /* Account D-SACK for retransmitted packet. */ if (dup_sack && (sacked & TCPCB_RETRANS)) { - if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) + if (tp->undo_marker && tp->undo_retrans && + after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) tp->undo_retrans--; if (sacked & TCPCB_SACKED_ACKED) state->reord = min(fack_count, state->reord); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 406f320336e6..dfa5beb0c1c8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2162,7 +2162,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (!tp->retrans_stamp) tp->retrans_stamp = TCP_SKB_CB(skb)->when; - tp->undo_retrans++; + tp->undo_retrans += tcp_skb_pcount(skb); /* snd_nxt is stored to detect loss of retransmitted segment, * see tcp_input.c tcp_sacktag_write_queue(). -- cgit v1.2.3 From 4f919a3bc54da01db829c520ce4b1fabfde1c3f7 Mon Sep 17 00:00:00 2001 From: Daniel J Blueman Date: Tue, 22 Feb 2011 00:11:06 +0800 Subject: fix cfg80211_wext_siwfreq lock ordering... I previously managed to reproduce a hang while scanning wireless channels (reproducible with airodump-ng hopping channels); subsequent lockdep instrumentation revealed a lock ordering issue. Without knowing the design intent, it looks like the locks should be taken in reverse order; please comment. ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.38-rc5-341cd #4 ------------------------------------------------------- airodump-ng/15445 is trying to acquire lock: (&rdev->devlist_mtx){+.+.+.}, at: [] cfg80211_wext_siwfreq+0xc6/0x100 but task is already holding lock: (&wdev->mtx){+.+.+.}, at: [] cfg80211_wext_siwfreq+0xbc/0x100 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&wdev->mtx){+.+.+.}: [] lock_acquire+0xc6/0x280 [] mutex_lock_nested+0x6e/0x4b0 [] cfg80211_netdev_notifier_call+0x430/0x5f0 [] notifier_call_chain+0x8b/0x100 [] raw_notifier_call_chain+0x11/0x20 [] call_netdevice_notifiers+0x32/0x60 [] __dev_notify_flags+0x34/0x80 [] dev_change_flags+0x40/0x70 [] do_setlink+0x1fc/0x8d0 [] rtnl_setlink+0xf2/0x140 [] rtnetlink_rcv_msg+0x163/0x270 [] netlink_rcv_skb+0xa1/0xd0 [] rtnetlink_rcv+0x20/0x30 [] netlink_unicast+0x2ba/0x300 [] netlink_sendmsg+0x267/0x3e0 [] sock_sendmsg+0xe4/0x110 [] sys_sendmsg+0x253/0x3b0 [] system_call_fastpath+0x16/0x1b -> #0 (&rdev->devlist_mtx){+.+.+.}: [] __lock_acquire+0x1622/0x1d10 [] lock_acquire+0xc6/0x280 [] mutex_lock_nested+0x6e/0x4b0 [] cfg80211_wext_siwfreq+0xc6/0x100 [] ioctl_standard_call+0x5d/0xd0 [] T.808+0x163/0x170 [] wext_handle_ioctl+0x3a/0x90 [] dev_ioctl+0x6f2/0x830 [] sock_ioctl+0xfd/0x290 [] do_vfs_ioctl+0x9d/0x590 [] sys_ioctl+0x4a/0x80 [] system_call_fastpath+0x16/0x1b other info that might help us debug this: 2 locks held by airodump-ng/15445: #0: (rtnl_mutex){+.+.+.}, at: [] rtnl_lock+0x12/0x20 #1: (&wdev->mtx){+.+.+.}, at: [] cfg80211_wext_siwfreq+0xbc/0x100 stack backtrace: Pid: 15445, comm: airodump-ng Not tainted 2.6.38-rc5-341cd #4 Call Trace: [] ? print_circular_bug+0xfa/0x100 [] ? __lock_acquire+0x1622/0x1d10 [] ? trace_hardirqs_off_caller+0x29/0xc0 [] ? lock_acquire+0xc6/0x280 [] ? cfg80211_wext_siwfreq+0xc6/0x100 [] ? mark_held_locks+0x67/0x90 [] ? mutex_lock_nested+0x6e/0x4b0 [] ? cfg80211_wext_siwfreq+0xc6/0x100 [] ? mark_held_locks+0x67/0x90 [] ? cfg80211_wext_siwfreq+0xc6/0x100 [] ? cfg80211_wext_siwfreq+0xc6/0x100 [] ? ioctl_standard_call+0x5d/0xd0 [] ? __dev_get_by_name+0x9b/0xc0 [] ? ioctl_standard_call+0x0/0xd0 [] ? T.808+0x163/0x170 [] ? might_fault+0x72/0xd0 [] ? wext_handle_ioctl+0x3a/0x90 [] ? might_fault+0xbb/0xd0 [] ? dev_ioctl+0x6f2/0x830 [] ? put_lock_stats+0xe/0x40 [] ? lock_release_holdtime+0xac/0x150 [] ? sock_ioctl+0xfd/0x290 [] ? do_vfs_ioctl+0x9d/0x590 [] ? fget_light+0x1df/0x3c0 [] ? sys_ioctl+0x4a/0x80 [] ? system_call_fastpath+0x16/0x1b Signed-off-by: Daniel J Blueman Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/wext-compat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 3e5dbd4e4cd5..d112f038edf0 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -802,11 +802,11 @@ int cfg80211_wext_siwfreq(struct net_device *dev, return freq; if (freq == 0) return -EINVAL; - wdev_lock(wdev); mutex_lock(&rdev->devlist_mtx); + wdev_lock(wdev); err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT); - mutex_unlock(&rdev->devlist_mtx); wdev_unlock(wdev); + mutex_unlock(&rdev->devlist_mtx); return err; default: return -EOPNOTSUPP; -- cgit v1.2.3 From 9cc6e0c4c457f84bedcfb04e7dd58a36909c4ef7 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Tue, 15 Feb 2011 13:19:17 +0000 Subject: bridge: Fix IPv6 multicast snooping by storing correct protocol type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The protocol type for IPv6 entries in the hash table for multicast bridge snooping is falsely set to ETH_P_IP, marking it as an IPv4 address, instead of setting it to ETH_P_IPV6, which results in negative look-ups in the hash table later. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 09d5c0987925..17708fccf1ee 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -784,7 +784,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br, return 0; ipv6_addr_copy(&br_group.u.ip6, group); - br_group.proto = htons(ETH_P_IP); + br_group.proto = htons(ETH_P_IPV6); return br_multicast_add_group(br, port, &br_group); } -- cgit v1.2.3 From 649e984d00416cb1a254fdbebd6d3f9fa01c32fa Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Tue, 15 Feb 2011 13:19:18 +0000 Subject: bridge: Fix IPv6 multicast snooping by correcting offset in MLDv2 report MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We actually want a pointer to the grec_nsrcr and not the following field. Otherwise we can get very high values for *nsrcs as the first two bytes of the IPv6 multicast address are being used instead, leading to a failing pskb_may_pull() which results in MLDv2 reports not being parsed. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 17708fccf1ee..d69beaf83627 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1013,7 +1013,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, nsrcs = skb_header_pointer(skb, len + offsetof(struct mld2_grec, - grec_mca), + grec_nsrcs), sizeof(_nsrcs), &_nsrcs); if (!nsrcs) return -EINVAL; -- cgit v1.2.3 From d41db9f3f71548f07b8b6d81a88220d0035b04f6 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Tue, 15 Feb 2011 13:19:19 +0000 Subject: bridge: Add missing ntohs()s for MLDv2 report parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The nsrcs number is 2 Byte wide, therefore we need to call ntohs() before using it. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index d69beaf83627..9ce2af187709 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1020,11 +1020,12 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br, if (!pskb_may_pull(skb, len + sizeof(*grec) + - sizeof(struct in6_addr) * (*nsrcs))) + sizeof(struct in6_addr) * ntohs(*nsrcs))) return -EINVAL; grec = (struct mld2_grec *)(skb->data + len); - len += sizeof(*grec) + sizeof(struct in6_addr) * (*nsrcs); + len += sizeof(*grec) + + sizeof(struct in6_addr) * ntohs(*nsrcs); /* We treat these as MLDv1 reports for now. */ switch (grec->grec_type) { -- cgit v1.2.3 From e4de9f9e8333fbbae951c6e068f501f955123cf0 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Tue, 15 Feb 2011 13:19:21 +0000 Subject: bridge: Allow mcast snooping for transient link local addresses too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the multicast bridge snooping support is not active for link local multicast. I assume this has been done to leave important multicast data untouched, like IPv6 Neighborhood Discovery. In larger, bridged, local networks it could however be desirable to optimize for instance local multicast audio/video streaming too. With the transient flag in IPv6 multicast addresses we have an easy way to optimize such multimedia traffic without tempering with the high priority multicast data from well-known addresses. This patch alters the multicast bridge snooping for IPv6, to take effect for transient multicast addresses instead of non-link-local addresses. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 9ce2af187709..1207a5a0688a 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -37,10 +37,9 @@ rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static inline int ipv6_is_local_multicast(const struct in6_addr *addr) +static inline int ipv6_is_transient_multicast(const struct in6_addr *addr) { - if (ipv6_addr_is_multicast(addr) && - IPV6_ADDR_MC_SCOPE(addr) <= IPV6_ADDR_SCOPE_LINKLOCAL) + if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr)) return 1; return 0; } @@ -780,7 +779,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br, { struct br_ip br_group; - if (ipv6_is_local_multicast(group)) + if (!ipv6_is_transient_multicast(group)) return 0; ipv6_addr_copy(&br_group.u.ip6, group); @@ -1341,7 +1340,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br, { struct br_ip br_group; - if (ipv6_is_local_multicast(group)) + if (!ipv6_is_transient_multicast(group)) return; ipv6_addr_copy(&br_group.u.ip6, group); -- cgit v1.2.3 From 36cff5a10c6b003fa2d0464848d5664b2bf723e0 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Thu, 17 Feb 2011 08:17:51 +0000 Subject: bridge: Fix MLD queries' ethernet source address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Map the IPv6 header's destination multicast address to an ethernet source address instead of the MLD queries multicast address. For instance for a general MLD query (multicast address in the MLD query set to ::), this would wrongly be mapped to 33:33:00:00:00:00, although an MLD queries destination MAC should always be 33:33:00:00:00:01 which matches the IPv6 header's multicast destination ff02::1. Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 1207a5a0688a..c1f24e4d0820 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -434,7 +434,6 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, eth = eth_hdr(skb); memcpy(eth->h_source, br->dev->dev_addr, 6); - ipv6_eth_mc_map(group, eth->h_dest); eth->h_proto = htons(ETH_P_IPV6); skb_put(skb, sizeof(*eth)); @@ -448,6 +447,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, ip6h->hop_limit = 1; ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0); ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1)); + ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); hopopt = (u8 *)(ip6h + 1); hopopt[0] = IPPROTO_ICMPV6; /* next hdr */ -- cgit v1.2.3 From fe29ec41aaa51902aebd63658dfb04fe6fea8be5 Mon Sep 17 00:00:00 2001 From: Linus Lüssing Date: Thu, 17 Feb 2011 08:17:52 +0000 Subject: bridge: Use IPv6 link-local address for multicast listener queries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the bridge multicast snooping feature periodically issues IPv6 general multicast listener queries to sense the absence of a listener. For this, it uses :: as its source address - however RFC 2710 requires: "To be valid, the Query message MUST come from a link-local IPv6 Source Address". Current Linux kernel versions seem to follow this requirement and ignore our bogus MLD queries. With this commit a link local address from the bridge interface is being used to issue the MLD query, resulting in other Linux devices which are multicast listeners in the network to respond with a MLD response (which was not the case before). Signed-off-by: Linus Lüssing Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index c1f24e4d0820..030a002ff8ee 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -445,7 +445,8 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, ip6h->payload_len = htons(8 + sizeof(*mldq)); ip6h->nexthdr = IPPROTO_HOPOPTS; ip6h->hop_limit = 1; - ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0); + ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, + &ip6h->saddr); ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1)); ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); -- cgit v1.2.3 From c486da34390846b430896a407b47f0cea3a4189c Mon Sep 17 00:00:00 2001 From: Lucian Adrian Grijincu Date: Thu, 24 Feb 2011 19:48:03 +0000 Subject: sysctl: ipv6: use correct net in ipv6_sysctl_rtcache_flush Before this patch issuing these commands: fd = open("/proc/sys/net/ipv6/route/flush") unshare(CLONE_NEWNET) write(fd, "stuff") would flush the newly created net, not the original one. The equivalent ipv4 code is correct (stores the net inside ->extra1). Acked-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/route.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a998db6e7895..904312e25a3c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2557,14 +2557,16 @@ static int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - struct net *net = current->nsproxy->net_ns; - int delay = net->ipv6.sysctl.flush_delay; - if (write) { - proc_dointvec(ctl, write, buffer, lenp, ppos); - fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); - return 0; - } else + struct net *net; + int delay; + if (!write) return -EINVAL; + + net = (struct net *)ctl->extra1; + delay = net->ipv6.sysctl.flush_delay; + proc_dointvec(ctl, write, buffer, lenp, ppos); + fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); + return 0; } ctl_table ipv6_route_table_template[] = { @@ -2651,6 +2653,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) if (table) { table[0].data = &net->ipv6.sysctl.flush_delay; + table[0].extra1 = net; table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; -- cgit v1.2.3 From 0a93ea2e897bd793cc0aaaddc397eff32ac8d6fe Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 25 Feb 2011 15:33:17 +0000 Subject: RxRPC: Allocate tokens with kzalloc to avoid oops in rxrpc_destroy With slab poisoning enabled, I see the following oops: Unable to handle kernel paging request for data at address 0x6b6b6b6b6b6b6b73 ... NIP [c0000000006bc61c] .rxrpc_destroy+0x44/0x104 LR [c0000000006bc618] .rxrpc_destroy+0x40/0x104 Call Trace: [c0000000feb2bc00] [c0000000006bc618] .rxrpc_destroy+0x40/0x104 (unreliable) [c0000000feb2bc90] [c000000000349b2c] .key_cleanup+0x1a8/0x20c [c0000000feb2bd40] [c0000000000a2920] .process_one_work+0x2f4/0x4d0 [c0000000feb2be00] [c0000000000a2d50] .worker_thread+0x254/0x468 [c0000000feb2bec0] [c0000000000a868c] .kthread+0xbc/0xc8 [c0000000feb2bf90] [c000000000020e00] .kernel_thread+0x54/0x70 We aren't initialising token->next, but the code in destroy_context relies on the list being NULL terminated. Use kzalloc to zero out all the fields. Signed-off-by: Anton Blanchard Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- net/rxrpc/ar-key.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 5ee16f0353fe..d763793d39de 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -89,11 +89,11 @@ static int rxrpc_instantiate_xdr_rxkad(struct key *key, const __be32 *xdr, return ret; plen -= sizeof(*token); - token = kmalloc(sizeof(*token), GFP_KERNEL); + token = kzalloc(sizeof(*token), GFP_KERNEL); if (!token) return -ENOMEM; - token->kad = kmalloc(plen, GFP_KERNEL); + token->kad = kzalloc(plen, GFP_KERNEL); if (!token->kad) { kfree(token); return -ENOMEM; @@ -731,10 +731,10 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen) goto error; ret = -ENOMEM; - token = kmalloc(sizeof(*token), GFP_KERNEL); + token = kzalloc(sizeof(*token), GFP_KERNEL); if (!token) goto error; - token->kad = kmalloc(plen, GFP_KERNEL); + token->kad = kzalloc(plen, GFP_KERNEL); if (!token->kad) goto error_free; -- cgit v1.2.3 From 5aca1a9e880e06bb7e5fd553a86a330ae7e218b5 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Fri, 25 Feb 2011 13:58:54 -0800 Subject: net: handle addr_type of 0 properly addr_type of 0 means that the type should be adopted from from_dev and not from __hw_addr_del_multiple(). Unfortunately it isn't so and addr_type will always be considered. Fix this by implementing the considered and documented behavior. Signed-off-by: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- net/core/dev_addr_lists.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 508f9c18992f..133fd22ea287 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -144,7 +144,7 @@ void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list, list_for_each_entry(ha, &from_list->list, list) { type = addr_type ? addr_type : ha->type; - __hw_addr_del(to_list, ha->addr, addr_len, addr_type); + __hw_addr_del(to_list, ha->addr, addr_len, type); } } EXPORT_SYMBOL(__hw_addr_del_multiple); -- cgit v1.2.3 From b44d211e166b4b0dae8ce379f9d2e3ac164b5b60 Mon Sep 17 00:00:00 2001 From: Andrey Vagin Date: Mon, 21 Feb 2011 02:40:47 +0000 Subject: netlink: handle errors from netlink_dump() netlink_dump() may failed, but nobody handle its error. It generates output data, when a previous portion has been returned to user space. This mechanism works when all data isn't go in skb. If we enter in netlink_recvmsg() and skb is absent in the recv queue, the netlink_dump() will not been executed. So if netlink_dump() is failed one time, the new data never appear and the reader will sleep forever. netlink_dump() is called from two places: 1. from netlink_sendmsg->...->netlink_dump_start(). In this place we can report error directly and it will be returned by sendmsg(). 2. from netlink_recvmsg There we can't report error directly, because we have a portion of valid output data and call netlink_dump() for prepare the next portion. If netlink_dump() is failed, the socket will be mark as error and the next recvmsg will be failed. Signed-off-by: Andrey Vagin Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 478181d53c55..1f924595bdef 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1407,7 +1407,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, int noblock = flags&MSG_DONTWAIT; size_t copied; struct sk_buff *skb, *data_skb; - int err; + int err, ret; if (flags&MSG_OOB) return -EOPNOTSUPP; @@ -1470,8 +1470,13 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, skb_free_datagram(sk, skb); - if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) - netlink_dump(sk); + if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { + ret = netlink_dump(sk); + if (ret) { + sk->sk_err = ret; + sk->sk_error_report(sk); + } + } scm_recv(sock, msg, siocb->scm, flags); out: @@ -1736,6 +1741,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, struct netlink_callback *cb; struct sock *sk; struct netlink_sock *nlk; + int ret; cb = kzalloc(sizeof(*cb), GFP_KERNEL); if (cb == NULL) @@ -1764,9 +1770,13 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, nlk->cb = cb; mutex_unlock(nlk->cb_mutex); - netlink_dump(sk); + ret = netlink_dump(sk); + sock_put(sk); + if (ret) + return ret; + /* We successfully started a dump, by returning -EINTR we * signal not to send ACK even if it was requested. */ -- cgit v1.2.3 From ff75f40f44ae9b79d520bf32a05d35af74a805c0 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 22 Feb 2011 10:40:25 +0200 Subject: ipvs: fix dst_lock locking on dest update Fix dst_lock usage in __ip_vs_update_dest. We need _bh locking because destination is updated in user context. Can cause lockups on frequent destination updates. Problem reported by Simon Kirby. Bug was introduced in 2.6.37 from the "ipvs: changes for local real server" change. Signed-off-by: Julian Anastasov Signed-off-by: Hans Schillstrom Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_ctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 22f7ad5101ab..ba98e1308f3c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -808,9 +808,9 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, dest->u_threshold = udest->u_threshold; dest->l_threshold = udest->l_threshold; - spin_lock(&dest->dst_lock); + spin_lock_bh(&dest->dst_lock); ip_vs_dst_reset(dest); - spin_unlock(&dest->dst_lock); + spin_unlock_bh(&dest->dst_lock); if (add) ip_vs_new_estimator(&dest->stats); -- cgit v1.2.3 From 720dc34bbbe9493c7bd48b2243058b4e447a929d Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 1 Mar 2011 23:02:07 -0800 Subject: dccp: fix oops on Reset after close This fixes a bug in the order of dccp_rcv_state_process() that still permitted reception even after closing the socket. A Reset after close thus causes a NULL pointer dereference by not preventing operations on an already torn-down socket. dccp_v4_do_rcv() | | state other than OPEN v dccp_rcv_state_process() | | DCCP_PKT_RESET v dccp_rcv_reset() | v dccp_time_wait() WARNING: at net/ipv4/inet_timewait_sock.c:141 __inet_twsk_hashdance+0x48/0x128() Modules linked in: arc4 ecb carl9170 rt2870sta(C) mac80211 r8712u(C) crc_ccitt ah [] (unwind_backtrace+0x0/0xec) from [] (warn_slowpath_common) [] (warn_slowpath_common+0x4c/0x64) from [] (warn_slowpath_n) [] (warn_slowpath_null+0x1c/0x24) from [] (__inet_twsk_hashd) [] (__inet_twsk_hashdance+0x48/0x128) from [] (dccp_time_wai) [] (dccp_time_wait+0x40/0xc8) from [] (dccp_rcv_state_proces) [] (dccp_rcv_state_process+0x120/0x538) from [] (dccp_v4_do_) [] (dccp_v4_do_rcv+0x11c/0x14c) from [] (release_sock+0xac/0) [] (release_sock+0xac/0x110) from [] (dccp_close+0x28c/0x380) [] (dccp_close+0x28c/0x380) from [] (inet_release+0x64/0x70) The fix is by testing the socket state first. Receiving a packet in Closed state now also produces the required "No connection" Reset reply of RFC 4340, 8.3.1. Reported-and-tested-by: Johan Hovold Cc: stable@kernel.org Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- net/dccp/input.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/dccp/input.c b/net/dccp/input.c index 8cde009e8b85..4222e7a654b0 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -614,6 +614,9 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* Caller (dccp_v4_do_rcv) will send Reset */ dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; return 1; + } else if (sk->sk_state == DCCP_CLOSED) { + dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; + return 1; } if (sk->sk_state != DCCP_REQUESTING && sk->sk_state != DCCP_RESPOND) { @@ -668,10 +671,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } switch (sk->sk_state) { - case DCCP_CLOSED: - dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; - return 1; - case DCCP_REQUESTING: queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); if (queued >= 0) -- cgit v1.2.3 From 9ef0298a8e5730d9a46d640014c727f3b4152870 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 2 Mar 2011 12:10:13 +0100 Subject: netfilter: nf_log: avoid oops in (un)bind with invalid nfproto values Like many other places, we have to check that the array index is within allowed limits, or otherwise, a kernel oops and other nastiness can ensue when we access memory beyond the end of the array. [ 5954.115381] BUG: unable to handle kernel paging request at 0000004000000000 [ 5954.120014] IP: __find_logger+0x6f/0xa0 [ 5954.123979] nf_log_bind_pf+0x2b/0x70 [ 5954.123979] nfulnl_recv_config+0xc0/0x4a0 [nfnetlink_log] [ 5954.123979] nfnetlink_rcv_msg+0x12c/0x1b0 [nfnetlink] ... The problem goes back to v2.6.30-rc1~1372~1342~31 where nf_log_bind was decoupled from nf_log_register. Reported-by: Miguel Di Ciurcio Filho , via irc.freenode.net/#netfilter Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/netfilter/nf_log.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index b07393eab88e..91816998ed86 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -85,6 +85,8 @@ EXPORT_SYMBOL(nf_log_unregister); int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger) { + if (pf >= ARRAY_SIZE(nf_loggers)) + return -EINVAL; mutex_lock(&nf_log_mutex); if (__find_logger(pf, logger->name) == NULL) { mutex_unlock(&nf_log_mutex); @@ -98,6 +100,8 @@ EXPORT_SYMBOL(nf_log_bind_pf); void nf_log_unbind_pf(u_int8_t pf) { + if (pf >= ARRAY_SIZE(nf_loggers)) + return; mutex_lock(&nf_log_mutex); rcu_assign_pointer(nf_loggers[pf], NULL); mutex_unlock(&nf_log_mutex); -- cgit v1.2.3 From f3d7bc57c71eba3f279785111bb473b1ef68dcb6 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 2 Mar 2011 10:35:33 +0000 Subject: net: dcbnl: check correct ops in dcbnl_ieee_set() The incorrect ops routine was being tested for in DCB_ATTR_IEEE_PFC attributes. This patch corrects it. Currently, every driver implementing ieee_setets also implements ieee_setpfc so this bug is not actualized yet. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- net/dcb/dcbnl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index d5074a567289..c44348adba3b 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1193,7 +1193,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb, goto err; } - if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setets) { + if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) { struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]); err = ops->ieee_setpfc(netdev, pfc); if (err) -- cgit v1.2.3 From 10003453479ef287a73f8a39593f8f42687ea565 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 28 Feb 2011 03:27:43 +0000 Subject: AF_RXRPC: Handle receiving ACKALL packets The OpenAFS server is now sending ACKALL packets, so we need to handle them. Otherwise we report a protocol error and abort. Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/ar-input.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c index 89315009bab1..1a2b0633fece 100644 --- a/net/rxrpc/ar-input.c +++ b/net/rxrpc/ar-input.c @@ -423,6 +423,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) goto protocol_error; } + case RXRPC_PACKET_TYPE_ACKALL: case RXRPC_PACKET_TYPE_ACK: /* ACK processing is done in process context */ read_lock_bh(&call->state_lock); -- cgit v1.2.3 From 1362fa078dae16776cd439791c6605b224ea6171 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 3 Mar 2011 11:28:58 +0000 Subject: DNS: Fix a NULL pointer deref when trying to read an error key [CVE-2011-1076] When a DNS resolver key is instantiated with an error indication, attempts to read that key will result in an oops because user_read() is expecting there to be a payload - and there isn't one [CVE-2011-1076]. Give the DNS resolver key its own read handler that returns the error cached in key->type_data.x[0] as an error rather than crashing. Also make the kenter() at the beginning of dns_resolver_instantiate() limit the amount of data it prints, since the data is not necessarily NUL-terminated. The buggy code was added in: commit 4a2d789267e00b5a1175ecd2ddefcc78b83fbf09 Author: Wang Lei Date: Wed Aug 11 09:37:58 2010 +0100 Subject: DNS: If the DNS server returns an error, allow that to be cached [ver #2] This can trivially be reproduced by any user with the following program compiled with -lkeyutils: #include #include #include static char payload[] = "#dnserror=6"; int main() { key_serial_t key; key = add_key("dns_resolver", "a", payload, sizeof(payload), KEY_SPEC_SESSION_KEYRING); if (key == -1) err(1, "add_key"); if (keyctl_read(key, NULL, 0) == -1) err(1, "read_key"); return 0; } What should happen is that keyctl_read() reports error 6 (ENXIO) to the user: dns-break: read_key: No such device or address but instead the kernel oopses. This cannot be reproduced with the 'keyutils add' or 'keyutils padd' commands as both of those cut the data down below the NUL termination that must be included in the data. Without this dns_resolver_instantiate() will return -EINVAL and the key will not be instantiated such that it can be read. The oops looks like: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: [] user_read+0x4f/0x8f PGD 3bdf8067 PUD 385b9067 PMD 0 Oops: 0000 [#1] SMP last sysfs file: /sys/devices/pci0000:00/0000:00:19.0/irq CPU 0 Modules linked in: Pid: 2150, comm: dns-break Not tainted 2.6.38-rc7-cachefs+ #468 /DG965RY RIP: 0010:[] [] user_read+0x4f/0x8f RSP: 0018:ffff88003bf47f08 EFLAGS: 00010246 RAX: 0000000000000001 RBX: ffff88003b5ea378 RCX: ffffffff81972368 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88003b5ea378 RBP: ffff88003bf47f28 R08: ffff88003be56620 R09: 0000000000000000 R10: 0000000000000395 R11: 0000000000000002 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ffffffffffffffa1 FS: 00007feab5751700(0000) GS:ffff88003e000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 000000003de40000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process dns-break (pid: 2150, threadinfo ffff88003bf46000, task ffff88003be56090) Stack: ffff88003b5ea378 ffff88003b5ea3a0 0000000000000000 0000000000000000 ffff88003bf47f68 ffffffff811b708e ffff88003c442bc8 0000000000000000 00000000004005a0 00007fffba368060 0000000000000000 0000000000000000 Call Trace: [] keyctl_read_key+0xac/0xcf [] sys_keyctl+0x75/0xb6 [] system_call_fastpath+0x16/0x1b Code: 75 1f 48 83 7b 28 00 75 18 c6 05 58 2b fb 00 01 be bb 00 00 00 48 c7 c7 76 1c 75 81 e8 13 c2 e9 ff 4c 8b b3 e0 00 00 00 4d 85 ed <41> 0f b7 5e 10 74 2d 4d 85 e4 74 28 e8 98 79 ee ff 49 39 dd 48 RIP [] user_read+0x4f/0x8f RSP CR2: 0000000000000010 Signed-off-by: David Howells Acked-by: Jeff Layton cc: Wang Lei Signed-off-by: James Morris --- Documentation/networking/dns_resolver.txt | 9 ++++++++- net/dns_resolver/dns_key.c | 20 +++++++++++++++++--- 2 files changed, 25 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/Documentation/networking/dns_resolver.txt b/Documentation/networking/dns_resolver.txt index aefd1e681804..04ca06325b08 100644 --- a/Documentation/networking/dns_resolver.txt +++ b/Documentation/networking/dns_resolver.txt @@ -61,7 +61,6 @@ before the more general line given above as the first match is the one taken. create dns_resolver foo:* * /usr/sbin/dns.foo %k - ===== USAGE ===== @@ -104,6 +103,14 @@ implemented in the module can be called after doing: returned also. +=============================== +READING DNS KEYS FROM USERSPACE +=============================== + +Keys of dns_resolver type can be read from userspace using keyctl_read() or +"keyctl read/print/pipe". + + ========= MECHANISM ========= diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index 739435a6af39..cfa7a5e1c5c9 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -67,8 +67,9 @@ dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen) size_t result_len = 0; const char *data = _data, *end, *opt; - kenter("%%%d,%s,'%s',%zu", - key->serial, key->description, data, datalen); + kenter("%%%d,%s,'%*.*s',%zu", + key->serial, key->description, + (int)datalen, (int)datalen, data, datalen); if (datalen <= 1 || !data || data[datalen - 1] != '\0') return -EINVAL; @@ -217,6 +218,19 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m) seq_printf(m, ": %u", key->datalen); } +/* + * read the DNS data + * - the key's semaphore is read-locked + */ +static long dns_resolver_read(const struct key *key, + char __user *buffer, size_t buflen) +{ + if (key->type_data.x[0]) + return key->type_data.x[0]; + + return user_read(key, buffer, buflen); +} + struct key_type key_type_dns_resolver = { .name = "dns_resolver", .instantiate = dns_resolver_instantiate, @@ -224,7 +238,7 @@ struct key_type key_type_dns_resolver = { .revoke = user_revoke, .destroy = user_destroy, .describe = dns_resolver_describe, - .read = user_read, + .read = dns_resolver_read, }; static int __init init_dns_resolver(void) -- cgit v1.2.3