summaryrefslogtreecommitdiff
path: root/kernel/time/namespace.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time/namespace.c')
-rw-r--r--kernel/time/namespace.c229
1 files changed, 51 insertions, 178 deletions
diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c
index 0775b9ec952a..4bca3f78c8ea 100644
--- a/kernel/time/namespace.c
+++ b/kernel/time/namespace.c
@@ -12,13 +12,15 @@
#include <linux/seq_file.h>
#include <linux/proc_ns.h>
#include <linux/export.h>
+#include <linux/nstree.h>
#include <linux/time.h>
#include <linux/slab.h>
#include <linux/cred.h>
#include <linux/err.h>
#include <linux/mm.h>
+#include <linux/cleanup.h>
-#include <vdso/datapage.h>
+#include "namespace_internal.h"
ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim,
struct timens_offsets *ns_offsets)
@@ -88,29 +90,27 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns,
goto fail;
err = -ENOMEM;
- ns = kmalloc(sizeof(*ns), GFP_KERNEL_ACCOUNT);
+ ns = kzalloc_obj(*ns, GFP_KERNEL_ACCOUNT);
if (!ns)
goto fail_dec;
- refcount_set(&ns->ns.count, 1);
-
- ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
- if (!ns->vvar_page)
+ err = timens_vdso_alloc_vvar_page(ns);
+ if (err)
goto fail_free;
- err = ns_alloc_inum(&ns->ns);
+ err = ns_common_init(ns);
if (err)
goto fail_free_page;
ns->ucounts = ucounts;
- ns->ns.ops = &timens_operations;
ns->user_ns = get_user_ns(user_ns);
ns->offsets = old_ns->offsets;
ns->frozen_offsets = false;
+ ns_tree_add(ns);
return ns;
fail_free_page:
- __free_page(ns->vvar_page);
+ timens_vdso_free_vvar_page(ns);
fail_free:
kfree(ns);
fail_dec:
@@ -130,7 +130,7 @@ fail:
*
* Return: timens_for_children namespace or ERR_PTR.
*/
-struct time_namespace *copy_time_ns(unsigned long flags,
+struct time_namespace *copy_time_ns(u64 flags,
struct user_namespace *user_ns, struct time_namespace *old_ns)
{
if (!(flags & CLONE_NEWTIME))
@@ -139,155 +139,47 @@ struct time_namespace *copy_time_ns(unsigned long flags,
return clone_time_ns(user_ns, old_ns);
}
-static struct timens_offset offset_from_ts(struct timespec64 off)
-{
- struct timens_offset ret;
-
- ret.sec = off.tv_sec;
- ret.nsec = off.tv_nsec;
-
- return ret;
-}
-
-/*
- * A time namespace VVAR page has the same layout as the VVAR page which
- * contains the system wide VDSO data.
- *
- * For a normal task the VVAR pages are installed in the normal ordering:
- * VVAR
- * PVCLOCK
- * HVCLOCK
- * TIMENS <- Not really required
- *
- * Now for a timens task the pages are installed in the following order:
- * TIMENS
- * PVCLOCK
- * HVCLOCK
- * VVAR
- *
- * The check for vdso_data->clock_mode is in the unlikely path of
- * the seq begin magic. So for the non-timens case most of the time
- * 'seq' is even, so the branch is not taken.
- *
- * If 'seq' is odd, i.e. a concurrent update is in progress, the extra check
- * for vdso_data->clock_mode is a non-issue. The task is spin waiting for the
- * update to finish and for 'seq' to become even anyway.
- *
- * Timens page has vdso_data->clock_mode set to VDSO_CLOCKMODE_TIMENS which
- * enforces the time namespace handling path.
- */
-static void timens_setup_vdso_data(struct vdso_data *vdata,
- struct time_namespace *ns)
-{
- struct timens_offset *offset = vdata->offset;
- struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic);
- struct timens_offset boottime = offset_from_ts(ns->offsets.boottime);
-
- vdata->seq = 1;
- vdata->clock_mode = VDSO_CLOCKMODE_TIMENS;
- offset[CLOCK_MONOTONIC] = monotonic;
- offset[CLOCK_MONOTONIC_RAW] = monotonic;
- offset[CLOCK_MONOTONIC_COARSE] = monotonic;
- offset[CLOCK_BOOTTIME] = boottime;
- offset[CLOCK_BOOTTIME_ALARM] = boottime;
-}
-
-struct page *find_timens_vvar_page(struct vm_area_struct *vma)
-{
- if (likely(vma->vm_mm == current->mm))
- return current->nsproxy->time_ns->vvar_page;
-
- /*
- * VM_PFNMAP | VM_IO protect .fault() handler from being called
- * through interfaces like /proc/$pid/mem or
- * process_vm_{readv,writev}() as long as there's no .access()
- * in special_mapping_vmops().
- * For more details check_vma_flags() and __access_remote_vm()
- */
-
- WARN(1, "vvar_page accessed remotely");
-
- return NULL;
-}
-
-/*
- * Protects possibly multiple offsets writers racing each other
- * and tasks entering the namespace.
- */
-static DEFINE_MUTEX(offset_lock);
-
-static void timens_set_vvar_page(struct task_struct *task,
- struct time_namespace *ns)
-{
- struct vdso_data *vdata;
- unsigned int i;
-
- if (ns == &init_time_ns)
- return;
-
- /* Fast-path, taken by every task in namespace except the first. */
- if (likely(ns->frozen_offsets))
- return;
-
- mutex_lock(&offset_lock);
- /* Nothing to-do: vvar_page has been already initialized. */
- if (ns->frozen_offsets)
- goto out;
-
- ns->frozen_offsets = true;
- vdata = arch_get_vdso_data(page_address(ns->vvar_page));
-
- for (i = 0; i < CS_BASES; i++)
- timens_setup_vdso_data(&vdata[i], ns);
-
-out:
- mutex_unlock(&offset_lock);
-}
+DEFINE_MUTEX(timens_offset_lock);
void free_time_ns(struct time_namespace *ns)
{
+ ns_tree_remove(ns);
dec_time_namespaces(ns->ucounts);
put_user_ns(ns->user_ns);
- ns_free_inum(&ns->ns);
- __free_page(ns->vvar_page);
- kfree(ns);
-}
-
-static struct time_namespace *to_time_ns(struct ns_common *ns)
-{
- return container_of(ns, struct time_namespace, ns);
+ ns_common_free(ns);
+ timens_vdso_free_vvar_page(ns);
+ /* Concurrent nstree traversal depends on a grace period. */
+ kfree_rcu(ns, ns.ns_rcu);
}
static struct ns_common *timens_get(struct task_struct *task)
{
- struct time_namespace *ns = NULL;
+ struct time_namespace *ns;
struct nsproxy *nsproxy;
- task_lock(task);
+ guard(task_lock)(task);
nsproxy = task->nsproxy;
- if (nsproxy) {
- ns = nsproxy->time_ns;
- get_time_ns(ns);
- }
- task_unlock(task);
+ if (!nsproxy)
+ return NULL;
- return ns ? &ns->ns : NULL;
+ ns = nsproxy->time_ns;
+ get_time_ns(ns);
+ return &ns->ns;
}
static struct ns_common *timens_for_children_get(struct task_struct *task)
{
- struct time_namespace *ns = NULL;
+ struct time_namespace *ns;
struct nsproxy *nsproxy;
- task_lock(task);
+ guard(task_lock)(task);
nsproxy = task->nsproxy;
- if (nsproxy) {
- ns = nsproxy->time_ns_for_children;
- get_time_ns(ns);
- }
- task_unlock(task);
+ if (!nsproxy)
+ return NULL;
- return ns ? &ns->ns : NULL;
+ ns = nsproxy->time_ns_for_children;
+ get_time_ns(ns);
+ return &ns->ns;
}
static void timens_put(struct ns_common *ns)
@@ -295,12 +187,6 @@ static void timens_put(struct ns_common *ns)
put_time_ns(to_time_ns(ns));
}
-void timens_commit(struct task_struct *tsk, struct time_namespace *ns)
-{
- timens_set_vvar_page(tsk, ns);
- vdso_join_timens(tsk, ns);
-}
-
static int timens_install(struct nsset *nsset, struct ns_common *new)
{
struct nsproxy *nsproxy = nsset->nsproxy;
@@ -364,36 +250,33 @@ static void show_offset(struct seq_file *m, int clockid, struct timespec64 *ts)
void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m)
{
- struct ns_common *ns;
- struct time_namespace *time_ns;
+ struct time_namespace *time_ns __free(time_ns) = NULL;
+ struct ns_common *ns = timens_for_children_get(p);
- ns = timens_for_children_get(p);
if (!ns)
return;
+
time_ns = to_time_ns(ns);
show_offset(m, CLOCK_MONOTONIC, &time_ns->offsets.monotonic);
show_offset(m, CLOCK_BOOTTIME, &time_ns->offsets.boottime);
- put_time_ns(time_ns);
}
int proc_timens_set_offset(struct file *file, struct task_struct *p,
struct proc_timens_offset *offsets, int noffsets)
{
- struct ns_common *ns;
- struct time_namespace *time_ns;
+ struct time_namespace *time_ns __free(time_ns) = NULL;
+ struct ns_common *ns = timens_for_children_get(p);
struct timespec64 tp;
- int i, err;
+ int i;
- ns = timens_for_children_get(p);
if (!ns)
return -ESRCH;
+
time_ns = to_time_ns(ns);
- if (!file_ns_capable(file, time_ns->user_ns, CAP_SYS_TIME)) {
- put_time_ns(time_ns);
+ if (!file_ns_capable(file, time_ns->user_ns, CAP_SYS_TIME))
return -EPERM;
- }
for (i = 0; i < noffsets; i++) {
struct proc_timens_offset *off = &offsets[i];
@@ -406,15 +289,12 @@ int proc_timens_set_offset(struct file *file, struct task_struct *p,
ktime_get_boottime_ts64(&tp);
break;
default:
- err = -EINVAL;
- goto out;
+ return -EINVAL;
}
- err = -ERANGE;
-
if (off->val.tv_sec > KTIME_SEC_MAX ||
off->val.tv_sec < -KTIME_SEC_MAX)
- goto out;
+ return -ERANGE;
tp = timespec64_add(tp, off->val);
/*
@@ -422,16 +302,13 @@ int proc_timens_set_offset(struct file *file, struct task_struct *p,
* still unreachable.
*/
if (tp.tv_sec < 0 || tp.tv_sec > KTIME_SEC_MAX / 2)
- goto out;
+ return -ERANGE;
}
- mutex_lock(&offset_lock);
- if (time_ns->frozen_offsets) {
- err = -EACCES;
- goto out_unlock;
- }
+ guard(mutex)(&timens_offset_lock);
+ if (time_ns->frozen_offsets)
+ return -EACCES;
- err = 0;
/* Don't report errors after this line */
for (i = 0; i < noffsets; i++) {
struct proc_timens_offset *off = &offsets[i];
@@ -449,17 +326,11 @@ int proc_timens_set_offset(struct file *file, struct task_struct *p,
*offset = off->val;
}
-out_unlock:
- mutex_unlock(&offset_lock);
-out:
- put_time_ns(time_ns);
-
- return err;
+ return 0;
}
const struct proc_ns_operations timens_operations = {
.name = "time",
- .type = CLONE_NEWTIME,
.get = timens_get,
.put = timens_put,
.install = timens_install,
@@ -469,7 +340,6 @@ const struct proc_ns_operations timens_operations = {
const struct proc_ns_operations timens_for_children_operations = {
.name = "time_for_children",
.real_ns_name = "time",
- .type = CLONE_NEWTIME,
.get = timens_for_children_get,
.put = timens_put,
.install = timens_install,
@@ -477,9 +347,12 @@ const struct proc_ns_operations timens_for_children_operations = {
};
struct time_namespace init_time_ns = {
- .ns.count = REFCOUNT_INIT(3),
+ .ns = NS_COMMON_INIT(init_time_ns),
.user_ns = &init_user_ns,
- .ns.inum = PROC_TIME_INIT_INO,
- .ns.ops = &timens_operations,
.frozen_offsets = true,
};
+
+void __init time_ns_init(void)
+{
+ ns_tree_add(&init_time_ns);
+}