diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2021-09-06 06:34:11 -0400 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2021-09-06 06:34:48 -0400 |
commit | e99314a340d27efafab3b7ea226beb239162cd46 (patch) | |
tree | e16aebf4ddd982518c89eda4ccebdaff1c430723 /net/unix/af_unix.c | |
parent | 0d0a19395baa36ab186df8081ab7f7b57c3fade1 (diff) | |
parent | 419025b3b4190ee867ef4fc48fb3bd7da2e67a0c (diff) | |
download | lwn-e99314a340d27efafab3b7ea226beb239162cd46.tar.gz lwn-e99314a340d27efafab3b7ea226beb239162cd46.zip |
Merge tag 'kvmarm-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 updates for 5.15
- Page ownership tracking between host EL1 and EL2
- Rely on userspace page tables to create large stage-2 mappings
- Fix incompatibility between pKVM and kmemleak
- Fix the PMU reset state, and improve the performance of the virtual PMU
- Move over to the generic KVM entry code
- Address PSCI reset issues w.r.t. save/restore
- Preliminary rework for the upcoming pKVM fixed feature
- A bunch of MM cleanups
- a vGIC fix for timer spurious interrupts
- Various cleanups
Diffstat (limited to 'net/unix/af_unix.c')
-rw-r--r-- | net/unix/af_unix.c | 51 |
1 files changed, 49 insertions, 2 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 23c92ad15c61..ba7ced947e51 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1526,6 +1526,53 @@ out: return err; } +static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) +{ + scm->fp = scm_fp_dup(UNIXCB(skb).fp); + + /* + * Garbage collection of unix sockets starts by selecting a set of + * candidate sockets which have reference only from being in flight + * (total_refs == inflight_refs). This condition is checked once during + * the candidate collection phase, and candidates are marked as such, so + * that non-candidates can later be ignored. While inflight_refs is + * protected by unix_gc_lock, total_refs (file count) is not, hence this + * is an instantaneous decision. + * + * Once a candidate, however, the socket must not be reinstalled into a + * file descriptor while the garbage collection is in progress. + * + * If the above conditions are met, then the directed graph of + * candidates (*) does not change while unix_gc_lock is held. + * + * Any operations that changes the file count through file descriptors + * (dup, close, sendmsg) does not change the graph since candidates are + * not installed in fds. + * + * Dequeing a candidate via recvmsg would install it into an fd, but + * that takes unix_gc_lock to decrement the inflight count, so it's + * serialized with garbage collection. + * + * MSG_PEEK is special in that it does not change the inflight count, + * yet does install the socket into an fd. The following lock/unlock + * pair is to ensure serialization with garbage collection. It must be + * done between incrementing the file count and installing the file into + * an fd. + * + * If garbage collection starts after the barrier provided by the + * lock/unlock, then it will see the elevated refcount and not mark this + * as a candidate. If a garbage collection is already in progress + * before the file count was incremented, then the lock/unlock pair will + * ensure that garbage collection is finished before progressing to + * installing the fd. + * + * (*) A -> B where B is on the queue of A or B is on the queue of C + * which is on the queue of listening socket A. + */ + spin_lock(&unix_gc_lock); + spin_unlock(&unix_gc_lock); +} + static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) { int err = 0; @@ -2175,7 +2222,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, sk_peek_offset_fwd(sk, size); if (UNIXCB(skb).fp) - scm.fp = scm_fp_dup(UNIXCB(skb).fp); + unix_peek_fds(&scm, skb); } err = (flags & MSG_TRUNC) ? skb->len - skip : size; @@ -2418,7 +2465,7 @@ unlock: /* It is questionable, see note in unix_dgram_recvmsg. */ if (UNIXCB(skb).fp) - scm.fp = scm_fp_dup(UNIXCB(skb).fp); + unix_peek_fds(&scm, skb); sk_peek_offset_fwd(sk, chunk); |