diff options
Diffstat (limited to 'kernel/time/namespace.c')
| -rw-r--r-- | kernel/time/namespace.c | 229 |
1 files changed, 51 insertions, 178 deletions
diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index 0775b9ec952a..4bca3f78c8ea 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -12,13 +12,15 @@ #include <linux/seq_file.h> #include <linux/proc_ns.h> #include <linux/export.h> +#include <linux/nstree.h> #include <linux/time.h> #include <linux/slab.h> #include <linux/cred.h> #include <linux/err.h> #include <linux/mm.h> +#include <linux/cleanup.h> -#include <vdso/datapage.h> +#include "namespace_internal.h" ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim, struct timens_offsets *ns_offsets) @@ -88,29 +90,27 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns, goto fail; err = -ENOMEM; - ns = kmalloc(sizeof(*ns), GFP_KERNEL_ACCOUNT); + ns = kzalloc_obj(*ns, GFP_KERNEL_ACCOUNT); if (!ns) goto fail_dec; - refcount_set(&ns->ns.count, 1); - - ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); - if (!ns->vvar_page) + err = timens_vdso_alloc_vvar_page(ns); + if (err) goto fail_free; - err = ns_alloc_inum(&ns->ns); + err = ns_common_init(ns); if (err) goto fail_free_page; ns->ucounts = ucounts; - ns->ns.ops = &timens_operations; ns->user_ns = get_user_ns(user_ns); ns->offsets = old_ns->offsets; ns->frozen_offsets = false; + ns_tree_add(ns); return ns; fail_free_page: - __free_page(ns->vvar_page); + timens_vdso_free_vvar_page(ns); fail_free: kfree(ns); fail_dec: @@ -130,7 +130,7 @@ fail: * * Return: timens_for_children namespace or ERR_PTR. */ -struct time_namespace *copy_time_ns(unsigned long flags, +struct time_namespace *copy_time_ns(u64 flags, struct user_namespace *user_ns, struct time_namespace *old_ns) { if (!(flags & CLONE_NEWTIME)) @@ -139,155 +139,47 @@ struct time_namespace *copy_time_ns(unsigned long flags, return clone_time_ns(user_ns, old_ns); } -static struct timens_offset offset_from_ts(struct timespec64 off) -{ - struct timens_offset ret; - - ret.sec = off.tv_sec; - ret.nsec = off.tv_nsec; - - return ret; -} - -/* - * A time namespace VVAR page has the same layout as the VVAR page which - * contains the system wide VDSO data. - * - * For a normal task the VVAR pages are installed in the normal ordering: - * VVAR - * PVCLOCK - * HVCLOCK - * TIMENS <- Not really required - * - * Now for a timens task the pages are installed in the following order: - * TIMENS - * PVCLOCK - * HVCLOCK - * VVAR - * - * The check for vdso_data->clock_mode is in the unlikely path of - * the seq begin magic. So for the non-timens case most of the time - * 'seq' is even, so the branch is not taken. - * - * If 'seq' is odd, i.e. a concurrent update is in progress, the extra check - * for vdso_data->clock_mode is a non-issue. The task is spin waiting for the - * update to finish and for 'seq' to become even anyway. - * - * Timens page has vdso_data->clock_mode set to VDSO_CLOCKMODE_TIMENS which - * enforces the time namespace handling path. - */ -static void timens_setup_vdso_data(struct vdso_data *vdata, - struct time_namespace *ns) -{ - struct timens_offset *offset = vdata->offset; - struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic); - struct timens_offset boottime = offset_from_ts(ns->offsets.boottime); - - vdata->seq = 1; - vdata->clock_mode = VDSO_CLOCKMODE_TIMENS; - offset[CLOCK_MONOTONIC] = monotonic; - offset[CLOCK_MONOTONIC_RAW] = monotonic; - offset[CLOCK_MONOTONIC_COARSE] = monotonic; - offset[CLOCK_BOOTTIME] = boottime; - offset[CLOCK_BOOTTIME_ALARM] = boottime; -} - -struct page *find_timens_vvar_page(struct vm_area_struct *vma) -{ - if (likely(vma->vm_mm == current->mm)) - return current->nsproxy->time_ns->vvar_page; - - /* - * VM_PFNMAP | VM_IO protect .fault() handler from being called - * through interfaces like /proc/$pid/mem or - * process_vm_{readv,writev}() as long as there's no .access() - * in special_mapping_vmops(). - * For more details check_vma_flags() and __access_remote_vm() - */ - - WARN(1, "vvar_page accessed remotely"); - - return NULL; -} - -/* - * Protects possibly multiple offsets writers racing each other - * and tasks entering the namespace. - */ -static DEFINE_MUTEX(offset_lock); - -static void timens_set_vvar_page(struct task_struct *task, - struct time_namespace *ns) -{ - struct vdso_data *vdata; - unsigned int i; - - if (ns == &init_time_ns) - return; - - /* Fast-path, taken by every task in namespace except the first. */ - if (likely(ns->frozen_offsets)) - return; - - mutex_lock(&offset_lock); - /* Nothing to-do: vvar_page has been already initialized. */ - if (ns->frozen_offsets) - goto out; - - ns->frozen_offsets = true; - vdata = arch_get_vdso_data(page_address(ns->vvar_page)); - - for (i = 0; i < CS_BASES; i++) - timens_setup_vdso_data(&vdata[i], ns); - -out: - mutex_unlock(&offset_lock); -} +DEFINE_MUTEX(timens_offset_lock); void free_time_ns(struct time_namespace *ns) { + ns_tree_remove(ns); dec_time_namespaces(ns->ucounts); put_user_ns(ns->user_ns); - ns_free_inum(&ns->ns); - __free_page(ns->vvar_page); - kfree(ns); -} - -static struct time_namespace *to_time_ns(struct ns_common *ns) -{ - return container_of(ns, struct time_namespace, ns); + ns_common_free(ns); + timens_vdso_free_vvar_page(ns); + /* Concurrent nstree traversal depends on a grace period. */ + kfree_rcu(ns, ns.ns_rcu); } static struct ns_common *timens_get(struct task_struct *task) { - struct time_namespace *ns = NULL; + struct time_namespace *ns; struct nsproxy *nsproxy; - task_lock(task); + guard(task_lock)(task); nsproxy = task->nsproxy; - if (nsproxy) { - ns = nsproxy->time_ns; - get_time_ns(ns); - } - task_unlock(task); + if (!nsproxy) + return NULL; - return ns ? &ns->ns : NULL; + ns = nsproxy->time_ns; + get_time_ns(ns); + return &ns->ns; } static struct ns_common *timens_for_children_get(struct task_struct *task) { - struct time_namespace *ns = NULL; + struct time_namespace *ns; struct nsproxy *nsproxy; - task_lock(task); + guard(task_lock)(task); nsproxy = task->nsproxy; - if (nsproxy) { - ns = nsproxy->time_ns_for_children; - get_time_ns(ns); - } - task_unlock(task); + if (!nsproxy) + return NULL; - return ns ? &ns->ns : NULL; + ns = nsproxy->time_ns_for_children; + get_time_ns(ns); + return &ns->ns; } static void timens_put(struct ns_common *ns) @@ -295,12 +187,6 @@ static void timens_put(struct ns_common *ns) put_time_ns(to_time_ns(ns)); } -void timens_commit(struct task_struct *tsk, struct time_namespace *ns) -{ - timens_set_vvar_page(tsk, ns); - vdso_join_timens(tsk, ns); -} - static int timens_install(struct nsset *nsset, struct ns_common *new) { struct nsproxy *nsproxy = nsset->nsproxy; @@ -364,36 +250,33 @@ static void show_offset(struct seq_file *m, int clockid, struct timespec64 *ts) void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m) { - struct ns_common *ns; - struct time_namespace *time_ns; + struct time_namespace *time_ns __free(time_ns) = NULL; + struct ns_common *ns = timens_for_children_get(p); - ns = timens_for_children_get(p); if (!ns) return; + time_ns = to_time_ns(ns); show_offset(m, CLOCK_MONOTONIC, &time_ns->offsets.monotonic); show_offset(m, CLOCK_BOOTTIME, &time_ns->offsets.boottime); - put_time_ns(time_ns); } int proc_timens_set_offset(struct file *file, struct task_struct *p, struct proc_timens_offset *offsets, int noffsets) { - struct ns_common *ns; - struct time_namespace *time_ns; + struct time_namespace *time_ns __free(time_ns) = NULL; + struct ns_common *ns = timens_for_children_get(p); struct timespec64 tp; - int i, err; + int i; - ns = timens_for_children_get(p); if (!ns) return -ESRCH; + time_ns = to_time_ns(ns); - if (!file_ns_capable(file, time_ns->user_ns, CAP_SYS_TIME)) { - put_time_ns(time_ns); + if (!file_ns_capable(file, time_ns->user_ns, CAP_SYS_TIME)) return -EPERM; - } for (i = 0; i < noffsets; i++) { struct proc_timens_offset *off = &offsets[i]; @@ -406,15 +289,12 @@ int proc_timens_set_offset(struct file *file, struct task_struct *p, ktime_get_boottime_ts64(&tp); break; default: - err = -EINVAL; - goto out; + return -EINVAL; } - err = -ERANGE; - if (off->val.tv_sec > KTIME_SEC_MAX || off->val.tv_sec < -KTIME_SEC_MAX) - goto out; + return -ERANGE; tp = timespec64_add(tp, off->val); /* @@ -422,16 +302,13 @@ int proc_timens_set_offset(struct file *file, struct task_struct *p, * still unreachable. */ if (tp.tv_sec < 0 || tp.tv_sec > KTIME_SEC_MAX / 2) - goto out; + return -ERANGE; } - mutex_lock(&offset_lock); - if (time_ns->frozen_offsets) { - err = -EACCES; - goto out_unlock; - } + guard(mutex)(&timens_offset_lock); + if (time_ns->frozen_offsets) + return -EACCES; - err = 0; /* Don't report errors after this line */ for (i = 0; i < noffsets; i++) { struct proc_timens_offset *off = &offsets[i]; @@ -449,17 +326,11 @@ int proc_timens_set_offset(struct file *file, struct task_struct *p, *offset = off->val; } -out_unlock: - mutex_unlock(&offset_lock); -out: - put_time_ns(time_ns); - - return err; + return 0; } const struct proc_ns_operations timens_operations = { .name = "time", - .type = CLONE_NEWTIME, .get = timens_get, .put = timens_put, .install = timens_install, @@ -469,7 +340,6 @@ const struct proc_ns_operations timens_operations = { const struct proc_ns_operations timens_for_children_operations = { .name = "time_for_children", .real_ns_name = "time", - .type = CLONE_NEWTIME, .get = timens_for_children_get, .put = timens_put, .install = timens_install, @@ -477,9 +347,12 @@ const struct proc_ns_operations timens_for_children_operations = { }; struct time_namespace init_time_ns = { - .ns.count = REFCOUNT_INIT(3), + .ns = NS_COMMON_INIT(init_time_ns), .user_ns = &init_user_ns, - .ns.inum = PROC_TIME_INIT_INO, - .ns.ops = &timens_operations, .frozen_offsets = true, }; + +void __init time_ns_init(void) +{ + ns_tree_add(&init_time_ns); +} |
