file: reclaim 24 bytes from f_owner

We do embedd struct fown_struct into struct file letting it take up 32 bytes in total. We could tweak struct fown_struct to be more compact but really it shouldn't even be embedded in struct file in the first place. Instead, actual users of struct fown_struct should allocate the struct on demand. This frees up 24 bytes in struct file. That will have some potentially user-visible changes for the ownership fcntl()s. Some of them can now fail due to allocation failures. Practically, that probably will almost never happen as the allocations are small and they only happen once per file. The fown_struct is used during kill_fasync() which is used by e.g., pipes to generate a SIGIO signal. Sending of such signals is conditional on userspace having set an owner for the file using one of the F_OWNER fcntl()s. Such users will be unaffected if struct fown_struct is allocated during the fcntl() call. There are a few subsystems that call __f_setown() expecting file->f_owner to be allocated: (1) tun devices file->f_op->fasync::tun_chr_fasync() -> __f_setown() There are no callers of tun_chr_fasync(). (2) tty devices file->f_op->fasync::tty_fasync() -> __tty_fasync() -> __f_setown() tty_fasync() has no additional callers but __tty_fasync() has. Note that __tty_fasync() only calls __f_setown() if the @on argument is true. It's called from: file->f_op->release::tty_release() -> tty_release() -> __tty_fasync() -> __f_setown() tty_release() calls __tty_fasync() with @on false => __f_setown() is never called from tty_release(). => All callers of tty_release() are safe as well. file->f_op->release::tty_open() -> tty_release() -> __tty_fasync() -> __f_setown() __tty_hangup() calls __tty_fasync() with @on false => __f_setown() is never called from tty_release(). => All callers of __tty_hangup() are safe as well. From the callchains it's obvious that (1) and (2) end up getting called via file->f_op->fasync(). That can happen either through the F_SETFL fcntl() with the FASYNC flag raised or via the FIOASYNC ioctl(). If FASYNC is requested and the file isn't already FASYNC then file->f_op->fasync() is called with @on true which ends up causing both (1) and (2) to call __f_setown(). (1) and (2) are the only subsystems that call __f_setown() from the file->f_op->fasync() handler. So both (1) and (2) have been updated to allocate a struct fown_struct prior to calling fasync_helper() to register with the fasync infrastructure. That's safe as they both call fasync_helper() which also does allocations if @on is true. The other interesting case are file leases: (3) file leases lease_manager_ops->lm_setup::lease_setup() -> __f_setown() Which in turn is called from: generic_add_lease() -> lease_manager_ops->lm_setup::lease_setup() -> __f_setown() So here again we can simply make generic_add_lease() allocate struct fown_struct prior to the lease_manager_ops->lm_setup::lease_setup() which happens under a spinlock. With that the two remaining subsystems that call __f_setown() are: (4) dnotify (5) sockets Both have their own custom ioctls to set struct fown_struct and both have been converted to allocate a struct fown_struct on demand from their respective ioctls. Interactions with O_PATH are fine as well e.g., when opening a /dev/tty as O_PATH then no file->f_op->open() happens thus no file->f_owner is allocated. That's fine as no file operation will be set for those and the device has never been opened. fcntl()s called on such things will just allocate a ->f_owner on demand. Although I have zero idea why'd you care about f_owner on an O_PATH fd. Link: https://lore.kernel.org/r/20240813-work-f_owner-v2-1-4e9343a79f9f@kernel.org Reviewed-by: Jeff Layton <jlayton@kernel.org> Signed-off-by: Christian Brauner <brauner@kernel.org>
author: Christian Brauner <brauner@kernel.org> 2024-08-09 18:00:01 +0200
committer: Christian Brauner <brauner@kernel.org> 2024-08-28 13:05:39 +0200
commit: 1934b212615dc617ac84fc306333ab2b9fc3b04f (patch)
tree: 4517b77bb007c4d7ebcbedb22ac7eea5dc58667b /fs/fcntl.c
parent: 47ac09b91befbb6a235ab620c32af719f8208399 (diff)
download: lwn-1934b212615dc617ac84fc306333ab2b9fc3b04f.tar.gz
lwn-1934b212615dc617ac84fc306333ab2b9fc3b04f.zip
1 files changed, 132 insertions, 34 deletions
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 300e5d9ad913..0587a0e221a6 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -33,6 +33,8 @@
 #include <asm/siginfo.h>
 #include <linux/uaccess.h>
 
+#include "internal.h"
+
 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
 
 static int setfl(int fd, struct file * filp, unsigned int arg)
@@ -87,22 +89,64 @@ static int setfl(int fd, struct file * filp, unsigned int arg)
 	return error;
 }
 
+/*
+ * Allocate an file->f_owner struct if it doesn't exist, handling racing
+ * allocations correctly.
+ */
+int file_f_owner_allocate(struct file *file)
+{
+	struct fown_struct *f_owner;
+
+	f_owner = file_f_owner(file);
+	if (f_owner)
+		return 0;
+
+	f_owner = kzalloc(sizeof(struct fown_struct), GFP_KERNEL);
+	if (!f_owner)
+		return -ENOMEM;
+
+	rwlock_init(&f_owner->lock);
+	f_owner->file = file;
+	/* If someone else raced us, drop our allocation. */
+	if (unlikely(cmpxchg(&file->f_owner, NULL, f_owner)))
+		kfree(f_owner);
+	return 0;
+}
+EXPORT_SYMBOL(file_f_owner_allocate);
+
+void file_f_owner_release(struct file *file)
+{
+	struct fown_struct *f_owner;
+
+	f_owner = file_f_owner(file);
+	if (f_owner) {
+		put_pid(f_owner->pid);
+		kfree(f_owner);
+	}
+}
+
 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
                      int force)
 {
-	write_lock_irq(&filp->f_owner.lock);
-	if (force || !filp->f_owner.pid) {
-		put_pid(filp->f_owner.pid);
-		filp->f_owner.pid = get_pid(pid);
-		filp->f_owner.pid_type = type;
+	struct fown_struct *f_owner;
+
+	f_owner = file_f_owner(filp);
+	if (WARN_ON_ONCE(!f_owner))
+		return;
+
+	write_lock_irq(&f_owner->lock);
+	if (force || !f_owner->pid) {
+		put_pid(f_owner->pid);
+		f_owner->pid = get_pid(pid);
+		f_owner->pid_type = type;
 
 		if (pid) {
 			const struct cred *cred = current_cred();
-			filp->f_owner.uid = cred->uid;
-			filp->f_owner.euid = cred->euid;
+			f_owner->uid = cred->uid;
+			f_owner->euid = cred->euid;
 		}
 	}
-	write_unlock_irq(&filp->f_owner.lock);
+	write_unlock_irq(&f_owner->lock);
 }
 
 void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
@@ -119,6 +163,8 @@ int f_setown(struct file *filp, int who, int force)
 	struct pid *pid = NULL;
 	int ret = 0;
 
+	might_sleep();
+
 	type = PIDTYPE_TGID;
 	if (who < 0) {
 		/* avoid overflow below */
@@ -129,6 +175,10 @@ int f_setown(struct file *filp, int who, int force)
 		who = -who;
 	}
 
+	ret = file_f_owner_allocate(filp);
+	if (ret)
+		return ret;
+
 	rcu_read_lock();
 	if (who) {
 		pid = find_vpid(who);
@@ -152,16 +202,21 @@ void f_delown(struct file *filp)
 pid_t f_getown(struct file *filp)
 {
 	pid_t pid = 0;
+	struct fown_struct *f_owner;
+
+	f_owner = file_f_owner(filp);
+	if (!f_owner)
+		return pid;
 
-	read_lock_irq(&filp->f_owner.lock);
+	read_lock_irq(&f_owner->lock);
 	rcu_read_lock();
-	if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) {
-		pid = pid_vnr(filp->f_owner.pid);
-		if (filp->f_owner.pid_type == PIDTYPE_PGID)
+	if (pid_task(f_owner->pid, f_owner->pid_type)) {
+		pid = pid_vnr(f_owner->pid);
+		if (f_owner->pid_type == PIDTYPE_PGID)
 			pid = -pid;
 	}
 	rcu_read_unlock();
-	read_unlock_irq(&filp->f_owner.lock);
+	read_unlock_irq(&f_owner->lock);
 	return pid;
 }
 
@@ -194,6 +249,10 @@ static int f_setown_ex(struct file *filp, unsigned long arg)
 		return -EINVAL;
 	}
 
+	ret = file_f_owner_allocate(filp);
+	if (ret)
+		return ret;
+
 	rcu_read_lock();
 	pid = find_vpid(owner.pid);
 	if (owner.pid && !pid)
@@ -210,13 +269,20 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
 	struct f_owner_ex __user *owner_p = (void __user *)arg;
 	struct f_owner_ex owner = {};
 	int ret = 0;
+	struct fown_struct *f_owner;
+	enum pid_type pid_type = PIDTYPE_PID;
 
-	read_lock_irq(&filp->f_owner.lock);
-	rcu_read_lock();
-	if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type))
-		owner.pid = pid_vnr(filp->f_owner.pid);
-	rcu_read_unlock();
-	switch (filp->f_owner.pid_type) {
+	f_owner = file_f_owner(filp);
+	if (f_owner) {
+		read_lock_irq(&f_owner->lock);
+		rcu_read_lock();
+		if (pid_task(f_owner->pid, f_owner->pid_type))
+			owner.pid = pid_vnr(f_owner->pid);
+		rcu_read_unlock();
+		pid_type = f_owner->pid_type;
+	}
+
+	switch (pid_type) {
 	case PIDTYPE_PID:
 		owner.type = F_OWNER_TID;
 		break;
@@ -234,7 +300,8 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
 		ret = -EINVAL;
 		break;
 	}
-	read_unlock_irq(&filp->f_owner.lock);
+	if (f_owner)
+		read_unlock_irq(&f_owner->lock);
 
 	if (!ret) {
 		ret = copy_to_user(owner_p, &owner, sizeof(owner));
@@ -248,14 +315,18 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
 static int f_getowner_uids(struct file *filp, unsigned long arg)
 {
 	struct user_namespace *user_ns = current_user_ns();
+	struct fown_struct *f_owner;
 	uid_t __user *dst = (void __user *)arg;
-	uid_t src[2];
+	uid_t src[2] = {0, 0};
 	int err;
 
-	read_lock_irq(&filp->f_owner.lock);
-	src[0] = from_kuid(user_ns, filp->f_owner.uid);
-	src[1] = from_kuid(user_ns, filp->f_owner.euid);
-	read_unlock_irq(&filp->f_owner.lock);
+	f_owner = file_f_owner(filp);
+	if (f_owner) {
+		read_lock_irq(&f_owner->lock);
+		src[0] = from_kuid(user_ns, f_owner->uid);
+		src[1] = from_kuid(user_ns, f_owner->euid);
+		read_unlock_irq(&f_owner->lock);
+	}
 
 	err  = put_user(src[0], &dst[0]);
 	err |= put_user(src[1], &dst[1]);
@@ -343,6 +414,30 @@ static long f_dupfd_query(int fd, struct file *filp)
 	return f.file == filp;
 }
 
+static int f_owner_sig(struct file *filp, int signum, bool setsig)
+{
+	int ret = 0;
+	struct fown_struct *f_owner;
+
+	might_sleep();
+
+	if (setsig) {
+		if (!valid_signal(signum))
+			return -EINVAL;
+
+		ret = file_f_owner_allocate(filp);
+		if (ret)
+			return ret;
+	}
+
+	f_owner = file_f_owner(filp);
+	if (setsig)
+		f_owner->signum = signum;
+	else if (f_owner)
+		ret = f_owner->signum;
+	return ret;
+}
+
 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		struct file *filp)
 {
@@ -421,15 +516,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		err = f_getowner_uids(filp, arg);
 		break;
 	case F_GETSIG:
-		err = filp->f_owner.signum;
+		err = f_owner_sig(filp, 0, false);
 		break;
 	case F_SETSIG:
-		/* arg == 0 restores default behaviour. */
-		if (!valid_signal(argi)) {
-			break;
-		}
-		err = 0;
-		filp->f_owner.signum = argi;
+		err = f_owner_sig(filp, argi, true);
 		break;
 	case F_GETLEASE:
 		err = fcntl_getlease(filp);
@@ -844,14 +934,19 @@ static void send_sigurg_to_task(struct task_struct *p,
 		do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type);
 }
 
-int send_sigurg(struct fown_struct *fown)
+int send_sigurg(struct file *file)
 {
+	struct fown_struct *fown;
 	struct task_struct *p;
 	enum pid_type type;
 	struct pid *pid;
 	unsigned long flags;
 	int ret = 0;
 	
+	fown = file_f_owner(file);
+	if (!fown)
+		return 0;
+
 	read_lock_irqsave(&fown->lock, flags);
 
 	type = fown->pid_type;
@@ -1027,13 +1122,16 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
 		}
 		read_lock_irqsave(&fa->fa_lock, flags);
 		if (fa->fa_file) {
-			fown = &fa->fa_file->f_owner;
+			fown = file_f_owner(fa->fa_file);
+			if (!fown)
+				goto next;
 			/* Don't send SIGURG to processes which have not set a
 			   queued signum: SIGURG has its own default signalling
 			   mechanism. */
 			if (!(sig == SIGURG && fown->signum == 0))
 				send_sigio(fown, fa->fa_fd, band);
 		}
+next:
 		read_unlock_irqrestore(&fa->fa_lock, flags);
 		fa = rcu_dereference(fa->fa_next);
 	}
author	Christian Brauner <brauner@kernel.org>	2024-08-09 18:00:01 +0200
committer	Christian Brauner <brauner@kernel.org>	2024-08-28 13:05:39 +0200
commit	1934b212615dc617ac84fc306333ab2b9fc3b04f (patch)
tree	4517b77bb007c4d7ebcbedb22ac7eea5dc58667b /fs/fcntl.c
parent	47ac09b91befbb6a235ab620c32af719f8208399 (diff)
download	lwn-1934b212615dc617ac84fc306333ab2b9fc3b04f.tar.gz lwn-1934b212615dc617ac84fc306333ab2b9fc3b04f.zip