summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2015-01-10 17:53:21 -0500
committerAl Viro <viro@zeniv.linux.org.uk>2015-01-25 23:17:28 -0500
commit59eda0e07f43c950d31756213b607af673e551f0 (patch)
treef40f7b67133576c36a65a4cba9aca5df68d00f34
parentfdab684d7202774bfd8762d4a656a553b787c8ec (diff)
downloadlwn-59eda0e07f43c950d31756213b607af673e551f0.tar.gz
lwn-59eda0e07f43c950d31756213b607af673e551f0.zip
new fs_pin killing logics
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--fs/fs_pin.c54
-rw-r--r--include/linux/fs_pin.h13
-rw-r--r--include/linux/pid_namespace.h4
-rw-r--r--kernel/acct.c81
4 files changed, 96 insertions, 56 deletions
diff --git a/fs/fs_pin.c b/fs/fs_pin.c
index 50ef7d2ef03c..0c77bdc238b2 100644
--- a/fs/fs_pin.c
+++ b/fs/fs_pin.c
@@ -1,4 +1,5 @@
#include <linux/fs.h>
+#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/fs_pin.h>
#include "internal.h"
@@ -12,6 +13,10 @@ void pin_remove(struct fs_pin *pin)
hlist_del(&pin->m_list);
hlist_del(&pin->s_list);
spin_unlock(&pin_lock);
+ spin_lock_irq(&pin->wait.lock);
+ pin->done = 1;
+ wake_up_locked(&pin->wait);
+ spin_unlock_irq(&pin->wait.lock);
}
void pin_insert_group(struct fs_pin *pin, struct vfsmount *m, struct hlist_head *p)
@@ -28,19 +33,58 @@ void pin_insert(struct fs_pin *pin, struct vfsmount *m)
pin_insert_group(pin, m, &m->mnt_sb->s_pins);
}
+void pin_kill(struct fs_pin *p)
+{
+ wait_queue_t wait;
+
+ if (!p) {
+ rcu_read_unlock();
+ return;
+ }
+ init_wait(&wait);
+ spin_lock_irq(&p->wait.lock);
+ if (likely(!p->done)) {
+ p->done = -1;
+ spin_unlock_irq(&p->wait.lock);
+ rcu_read_unlock();
+ p->kill(p);
+ return;
+ }
+ if (p->done > 0) {
+ spin_unlock_irq(&p->wait.lock);
+ rcu_read_unlock();
+ return;
+ }
+ __add_wait_queue(&p->wait, &wait);
+ while (1) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock_irq(&p->wait.lock);
+ rcu_read_unlock();
+ schedule();
+ rcu_read_lock();
+ if (likely(list_empty(&wait.task_list)))
+ break;
+ /* OK, we know p couldn't have been freed yet */
+ spin_lock_irq(&p->wait.lock);
+ if (p->done > 0) {
+ spin_unlock_irq(&p->wait.lock);
+ break;
+ }
+ }
+ rcu_read_unlock();
+}
+
void mnt_pin_kill(struct mount *m)
{
while (1) {
struct hlist_node *p;
- struct fs_pin *pin;
rcu_read_lock();
p = ACCESS_ONCE(m->mnt_pins.first);
if (!p) {
rcu_read_unlock();
break;
}
- pin = hlist_entry(p, struct fs_pin, m_list);
- pin->kill(pin);
+ pin_kill(hlist_entry(p, struct fs_pin, m_list));
}
}
@@ -48,14 +92,12 @@ void group_pin_kill(struct hlist_head *p)
{
while (1) {
struct hlist_node *q;
- struct fs_pin *pin;
rcu_read_lock();
q = ACCESS_ONCE(p->first);
if (!q) {
rcu_read_unlock();
break;
}
- pin = hlist_entry(q, struct fs_pin, s_list);
- pin->kill(pin);
+ pin_kill(hlist_entry(q, struct fs_pin, s_list));
}
}
diff --git a/include/linux/fs_pin.h b/include/linux/fs_pin.h
index 2be38d1464ae..9dc4e0384bfb 100644
--- a/include/linux/fs_pin.h
+++ b/include/linux/fs_pin.h
@@ -1,11 +1,22 @@
-#include <linux/fs.h>
+#include <linux/wait.h>
struct fs_pin {
+ wait_queue_head_t wait;
+ int done;
struct hlist_node s_list;
struct hlist_node m_list;
void (*kill)(struct fs_pin *);
};
+struct vfsmount;
+
+static inline void init_fs_pin(struct fs_pin *p, void (*kill)(struct fs_pin *))
+{
+ init_waitqueue_head(&p->wait);
+ p->kill = kill;
+}
+
void pin_remove(struct fs_pin *);
void pin_insert_group(struct fs_pin *, struct vfsmount *, struct hlist_head *);
void pin_insert(struct fs_pin *, struct vfsmount *);
+void pin_kill(struct fs_pin *);
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index b9cf6c51b181..918b117a7cd3 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -19,7 +19,7 @@ struct pidmap {
#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
#define PIDMAP_ENTRIES ((PID_MAX_LIMIT+BITS_PER_PAGE-1)/BITS_PER_PAGE)
-struct bsd_acct_struct;
+struct fs_pin;
struct pid_namespace {
struct kref kref;
@@ -37,7 +37,7 @@ struct pid_namespace {
struct dentry *proc_thread_self;
#endif
#ifdef CONFIG_BSD_PROCESS_ACCT
- struct bsd_acct_struct *bacct;
+ struct fs_pin *bacct;
#endif
struct user_namespace *user_ns;
struct work_struct proc_work;
diff --git a/kernel/acct.c b/kernel/acct.c
index cf6588ab517b..e6c10d1a4058 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -76,7 +76,6 @@ int acct_parm[3] = {4, 2, 30};
/*
* External references and all of the globals.
*/
-static void do_acct_process(struct bsd_acct_struct *acct);
struct bsd_acct_struct {
struct fs_pin pin;
@@ -91,6 +90,8 @@ struct bsd_acct_struct {
struct completion done;
};
+static void do_acct_process(struct bsd_acct_struct *acct);
+
/*
* Check the amount of free space and suspend/resume accordingly.
*/
@@ -132,13 +133,18 @@ static void acct_put(struct bsd_acct_struct *p)
kfree_rcu(p, rcu);
}
+static inline struct bsd_acct_struct *to_acct(struct fs_pin *p)
+{
+ return p ? container_of(p, struct bsd_acct_struct, pin) : NULL;
+}
+
static struct bsd_acct_struct *acct_get(struct pid_namespace *ns)
{
struct bsd_acct_struct *res;
again:
smp_rmb();
rcu_read_lock();
- res = ACCESS_ONCE(ns->bacct);
+ res = to_acct(ACCESS_ONCE(ns->bacct));
if (!res) {
rcu_read_unlock();
return NULL;
@@ -150,7 +156,7 @@ again:
}
rcu_read_unlock();
mutex_lock(&res->lock);
- if (!res->ns) {
+ if (res != to_acct(ACCESS_ONCE(ns->bacct))) {
mutex_unlock(&res->lock);
acct_put(res);
goto again;
@@ -158,6 +164,19 @@ again:
return res;
}
+static void acct_pin_kill(struct fs_pin *pin)
+{
+ struct bsd_acct_struct *acct = to_acct(pin);
+ mutex_lock(&acct->lock);
+ do_acct_process(acct);
+ schedule_work(&acct->work);
+ wait_for_completion(&acct->done);
+ cmpxchg(&acct->ns->bacct, pin, NULL);
+ mutex_unlock(&acct->lock);
+ pin_remove(pin);
+ acct_put(acct);
+}
+
static void close_work(struct work_struct *work)
{
struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work);
@@ -168,49 +187,13 @@ static void close_work(struct work_struct *work)
complete(&acct->done);
}
-static void acct_kill(struct bsd_acct_struct *acct)
-{
- if (acct) {
- struct pid_namespace *ns = acct->ns;
- do_acct_process(acct);
- INIT_WORK(&acct->work, close_work);
- init_completion(&acct->done);
- schedule_work(&acct->work);
- wait_for_completion(&acct->done);
- pin_remove(&acct->pin);
- cmpxchg(&ns->bacct, acct, NULL);
- acct->ns = NULL;
- atomic_long_dec(&acct->count);
- mutex_unlock(&acct->lock);
- acct_put(acct);
- }
-}
-
-static void acct_pin_kill(struct fs_pin *pin)
-{
- struct bsd_acct_struct *acct;
- acct = container_of(pin, struct bsd_acct_struct, pin);
- if (!atomic_long_inc_not_zero(&acct->count)) {
- rcu_read_unlock();
- cpu_relax();
- return;
- }
- rcu_read_unlock();
- mutex_lock(&acct->lock);
- if (!acct->ns) {
- mutex_unlock(&acct->lock);
- acct_put(acct);
- acct = NULL;
- }
- acct_kill(acct);
-}
-
static int acct_on(struct filename *pathname)
{
struct file *file;
struct vfsmount *mnt, *internal;
struct pid_namespace *ns = task_active_pid_ns(current);
- struct bsd_acct_struct *acct, *old;
+ struct bsd_acct_struct *acct;
+ struct fs_pin *old;
int err;
acct = kzalloc(sizeof(struct bsd_acct_struct), GFP_KERNEL);
@@ -252,18 +235,20 @@ static int acct_on(struct filename *pathname)
file->f_path.mnt = internal;
atomic_long_set(&acct->count, 1);
- acct->pin.kill = acct_pin_kill;
+ init_fs_pin(&acct->pin, acct_pin_kill);
acct->file = file;
acct->needcheck = jiffies;
acct->ns = ns;
mutex_init(&acct->lock);
+ INIT_WORK(&acct->work, close_work);
+ init_completion(&acct->done);
mutex_lock_nested(&acct->lock, 1); /* nobody has seen it yet */
pin_insert(&acct->pin, mnt);
- old = acct_get(ns);
- ns->bacct = acct;
- acct_kill(old);
+ rcu_read_lock();
+ old = xchg(&ns->bacct, &acct->pin);
mutex_unlock(&acct->lock);
+ pin_kill(old);
mnt_drop_write(mnt);
mntput(mnt);
return 0;
@@ -299,7 +284,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
mutex_unlock(&acct_on_mutex);
putname(tmp);
} else {
- acct_kill(acct_get(task_active_pid_ns(current)));
+ rcu_read_lock();
+ pin_kill(task_active_pid_ns(current)->bacct);
}
return error;
@@ -307,7 +293,8 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
void acct_exit_ns(struct pid_namespace *ns)
{
- acct_kill(acct_get(ns));
+ rcu_read_lock();
+ pin_kill(ns->bacct);
}
/*