summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-01-27 16:47:05 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-01-27 16:47:05 -0800
commite279160f491392f1345f6eb4b0eeec5a6a2ecdd7 (patch)
tree018410c41f919774f70e6350fc251556bf789944 /kernel
parent534b0a8b677443c0aa8c4c71ff7887f08a2b9b41 (diff)
parentfd928f3e32ba09381b287f8b732418434d932855 (diff)
downloadlwn-e279160f491392f1345f6eb4b0eeec5a6a2ecdd7.tar.gz
lwn-e279160f491392f1345f6eb4b0eeec5a6a2ecdd7.zip
Merge tag 'timers-core-2020-01-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timer updates from Thomas Gleixner: "The timekeeping and timers departement provides: - Time namespace support: If a container migrates from one host to another then it expects that clocks based on MONOTONIC and BOOTTIME are not subject to disruption. Due to different boot time and non-suspended runtime these clocks can differ significantly on two hosts, in the worst case time goes backwards which is a violation of the POSIX requirements. The time namespace addresses this problem. It allows to set offsets for clock MONOTONIC and BOOTTIME once after creation and before tasks are associated with the namespace. These offsets are taken into account by timers and timekeeping including the VDSO. Offsets for wall clock based clocks (REALTIME/TAI) are not provided by this mechanism. While in theory possible, the overhead and code complexity would be immense and not justified by the esoteric potential use cases which were discussed at Plumbers '18. The overhead for tasks in the root namespace (ie where host time offsets = 0) is in the noise and great effort was made to ensure that especially in the VDSO. If time namespace is disabled in the kernel configuration the code is compiled out. Kudos to Andrei Vagin and Dmitry Sofanov who implemented this feature and kept on for more than a year addressing review comments, finding better solutions. A pleasant experience. - Overhaul of the alarmtimer device dependency handling to ensure that the init/suspend/resume ordering is correct. - A new clocksource/event driver for Microchip PIT64 - Suspend/resume support for the Hyper-V clocksource - The usual pile of fixes, updates and improvements mostly in the driver code" * tag 'timers-core-2020-01-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (71 commits) alarmtimer: Make alarmtimer_get_rtcdev() a stub when CONFIG_RTC_CLASS=n alarmtimer: Use wakeup source from alarmtimer platform device alarmtimer: Make alarmtimer platform device child of RTC device alarmtimer: Update alarmtimer_get_rtcdev() docs to reflect reality hrtimer: Add missing sparse annotation for __run_timer() lib/vdso: Only read hrtimer_res when needed in __cvdso_clock_getres() MIPS: vdso: Define BUILD_VDSO32 when building a 32bit kernel clocksource/drivers/hyper-v: Set TSC clocksource as default w/ InvariantTSC clocksource/drivers/hyper-v: Untangle stimers and timesync from clocksources clocksource/drivers/timer-microchip-pit64b: Fix sparse warning clocksource/drivers/exynos_mct: Rename Exynos to lowercase clocksource/drivers/timer-ti-dm: Fix uninitialized pointer access clocksource/drivers/timer-ti-dm: Switch to platform_get_irq clocksource/drivers/timer-ti-dm: Convert to devm_platform_ioremap_resource clocksource/drivers/em_sti: Fix variable declaration in em_sti_probe clocksource/drivers/em_sti: Convert to devm_platform_ioremap_resource clocksource/drivers/bcm2835_timer: Fix memory leak of timer clocksource/drivers/cadence-ttc: Use ttc driver as platform driver clocksource/drivers/timer-microchip-pit64b: Add Microchip PIT64B support clocksource/drivers/hyper-v: Reserve PAGE_SIZE space for tsc page ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/fork.c16
-rw-r--r--kernel/nsproxy.c41
-rw-r--r--kernel/time/Makefile1
-rw-r--r--kernel/time/alarmtimer.c121
-rw-r--r--kernel/time/hrtimer.c14
-rw-r--r--kernel/time/namespace.c468
-rw-r--r--kernel/time/posix-clock.c8
-rw-r--r--kernel/time/posix-cpu-timers.c32
-rw-r--r--kernel/time/posix-stubs.c15
-rw-r--r--kernel/time/posix-timers.c88
-rw-r--r--kernel/time/posix-timers.h7
-rw-r--r--kernel/time/sched_clock.c7
-rw-r--r--kernel/time/tick-common.c2
13 files changed, 703 insertions, 117 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 080809560072..ef82feb4bddc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1832,6 +1832,7 @@ static __latent_entropy struct task_struct *copy_process(
struct multiprocess_signals delayed;
struct file *pidfile = NULL;
u64 clone_flags = args->flags;
+ struct nsproxy *nsp = current->nsproxy;
/*
* Don't allow sharing the root directory with processes in a different
@@ -1874,8 +1875,16 @@ static __latent_entropy struct task_struct *copy_process(
*/
if (clone_flags & CLONE_THREAD) {
if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) ||
- (task_active_pid_ns(current) !=
- current->nsproxy->pid_ns_for_children))
+ (task_active_pid_ns(current) != nsp->pid_ns_for_children))
+ return ERR_PTR(-EINVAL);
+ }
+
+ /*
+ * If the new process will be in a different time namespace
+ * do not allow it to share VM or a thread group with the forking task.
+ */
+ if (clone_flags & (CLONE_THREAD | CLONE_VM)) {
+ if (nsp->time_ns != nsp->time_ns_for_children)
return ERR_PTR(-EINVAL);
}
@@ -2821,7 +2830,8 @@ static int check_unshare_flags(unsigned long unshare_flags)
if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
- CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP))
+ CLONE_NEWUSER|CLONE_NEWPID|CLONE_NEWCGROUP|
+ CLONE_NEWTIME))
return -EINVAL;
/*
* Not implemented, but pretend it works if there is nothing
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index c815f58e6bc0..ed9882108cd2 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -18,6 +18,7 @@
#include <linux/pid_namespace.h>
#include <net/net_namespace.h>
#include <linux/ipc_namespace.h>
+#include <linux/time_namespace.h>
#include <linux/proc_ns.h>
#include <linux/file.h>
#include <linux/syscalls.h>
@@ -40,6 +41,10 @@ struct nsproxy init_nsproxy = {
#ifdef CONFIG_CGROUPS
.cgroup_ns = &init_cgroup_ns,
#endif
+#ifdef CONFIG_TIME_NS
+ .time_ns = &init_time_ns,
+ .time_ns_for_children = &init_time_ns,
+#endif
};
static inline struct nsproxy *create_nsproxy(void)
@@ -106,8 +111,18 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
goto out_net;
}
+ new_nsp->time_ns_for_children = copy_time_ns(flags, user_ns,
+ tsk->nsproxy->time_ns_for_children);
+ if (IS_ERR(new_nsp->time_ns_for_children)) {
+ err = PTR_ERR(new_nsp->time_ns_for_children);
+ goto out_time;
+ }
+ new_nsp->time_ns = get_time_ns(tsk->nsproxy->time_ns);
+
return new_nsp;
+out_time:
+ put_net(new_nsp->net_ns);
out_net:
put_cgroup_ns(new_nsp->cgroup_ns);
out_cgroup:
@@ -136,15 +151,16 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
struct nsproxy *old_ns = tsk->nsproxy;
struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
struct nsproxy *new_ns;
+ int ret;
if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
CLONE_NEWPID | CLONE_NEWNET |
- CLONE_NEWCGROUP)))) {
- get_nsproxy(old_ns);
- return 0;
- }
-
- if (!ns_capable(user_ns, CAP_SYS_ADMIN))
+ CLONE_NEWCGROUP | CLONE_NEWTIME)))) {
+ if (likely(old_ns->time_ns_for_children == old_ns->time_ns)) {
+ get_nsproxy(old_ns);
+ return 0;
+ }
+ } else if (!ns_capable(user_ns, CAP_SYS_ADMIN))
return -EPERM;
/*
@@ -162,6 +178,12 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
if (IS_ERR(new_ns))
return PTR_ERR(new_ns);
+ ret = timens_on_fork(new_ns, tsk);
+ if (ret) {
+ free_nsproxy(new_ns);
+ return ret;
+ }
+
tsk->nsproxy = new_ns;
return 0;
}
@@ -176,6 +198,10 @@ void free_nsproxy(struct nsproxy *ns)
put_ipc_ns(ns->ipc_ns);
if (ns->pid_ns_for_children)
put_pid_ns(ns->pid_ns_for_children);
+ if (ns->time_ns)
+ put_time_ns(ns->time_ns);
+ if (ns->time_ns_for_children)
+ put_time_ns(ns->time_ns_for_children);
put_cgroup_ns(ns->cgroup_ns);
put_net(ns->net_ns);
kmem_cache_free(nsproxy_cachep, ns);
@@ -192,7 +218,8 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
int err = 0;
if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
- CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP)))
+ CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP |
+ CLONE_NEWTIME)))
return 0;
user_ns = new_cred ? new_cred->user_ns : current_user_ns();
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 1867044800bb..c8f00168afe8 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -19,3 +19,4 @@ obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o
obj-$(CONFIG_HAVE_GENERIC_VDSO) += vsyscall.o
obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
obj-$(CONFIG_TEST_UDELAY) += test_udelay.o
+obj-$(CONFIG_TIME_NS) += namespace.o
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 451f9d05ccfe..2ffb466af77e 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -26,6 +26,7 @@
#include <linux/freezer.h>
#include <linux/compat.h>
#include <linux/module.h>
+#include <linux/time_namespace.h>
#include "posix-timers.h"
@@ -36,13 +37,15 @@
* struct alarm_base - Alarm timer bases
* @lock: Lock for syncrhonized access to the base
* @timerqueue: Timerqueue head managing the list of events
- * @gettime: Function to read the time correlating to the base
+ * @get_ktime: Function to read the time correlating to the base
+ * @get_timespec: Function to read the namespace time correlating to the base
* @base_clockid: clockid for the base
*/
static struct alarm_base {
spinlock_t lock;
struct timerqueue_head timerqueue;
- ktime_t (*gettime)(void);
+ ktime_t (*get_ktime)(void);
+ void (*get_timespec)(struct timespec64 *tp);
clockid_t base_clockid;
} alarm_bases[ALARM_NUMTYPE];
@@ -55,8 +58,6 @@ static DEFINE_SPINLOCK(freezer_delta_lock);
#endif
#ifdef CONFIG_RTC_CLASS
-static struct wakeup_source *ws;
-
/* rtc timer and device for setting alarm wakeups at suspend */
static struct rtc_timer rtctimer;
static struct rtc_device *rtcdev;
@@ -66,8 +67,6 @@ static DEFINE_SPINLOCK(rtcdev_lock);
* alarmtimer_get_rtcdev - Return selected rtcdevice
*
* This function returns the rtc device to use for wakealarms.
- * If one has not already been chosen, it checks to see if a
- * functional rtc device is available.
*/
struct rtc_device *alarmtimer_get_rtcdev(void)
{
@@ -87,7 +86,8 @@ static int alarmtimer_rtc_add_device(struct device *dev,
{
unsigned long flags;
struct rtc_device *rtc = to_rtc_device(dev);
- struct wakeup_source *__ws;
+ struct platform_device *pdev;
+ int ret = 0;
if (rtcdev)
return -EBUSY;
@@ -97,26 +97,31 @@ static int alarmtimer_rtc_add_device(struct device *dev,
if (!device_may_wakeup(rtc->dev.parent))
return -1;
- __ws = wakeup_source_register(dev, "alarmtimer");
+ pdev = platform_device_register_data(dev, "alarmtimer",
+ PLATFORM_DEVID_AUTO, NULL, 0);
+ if (!IS_ERR(pdev))
+ device_init_wakeup(&pdev->dev, true);
spin_lock_irqsave(&rtcdev_lock, flags);
- if (!rtcdev) {
+ if (!IS_ERR(pdev) && !rtcdev) {
if (!try_module_get(rtc->owner)) {
- spin_unlock_irqrestore(&rtcdev_lock, flags);
- return -1;
+ ret = -1;
+ goto unlock;
}
rtcdev = rtc;
/* hold a reference so it doesn't go away */
get_device(dev);
- ws = __ws;
- __ws = NULL;
+ pdev = NULL;
+ } else {
+ ret = -1;
}
+unlock:
spin_unlock_irqrestore(&rtcdev_lock, flags);
- wakeup_source_unregister(__ws);
+ platform_device_unregister(pdev);
- return 0;
+ return ret;
}
static inline void alarmtimer_rtc_timer_init(void)
@@ -138,11 +143,6 @@ static void alarmtimer_rtc_interface_remove(void)
class_interface_unregister(&alarmtimer_rtc_interface);
}
#else
-struct rtc_device *alarmtimer_get_rtcdev(void)
-{
- return NULL;
-}
-#define rtcdev (NULL)
static inline int alarmtimer_rtc_interface_setup(void) { return 0; }
static inline void alarmtimer_rtc_interface_remove(void) { }
static inline void alarmtimer_rtc_timer_init(void) { }
@@ -207,7 +207,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
spin_unlock_irqrestore(&base->lock, flags);
if (alarm->function)
- restart = alarm->function(alarm, base->gettime());
+ restart = alarm->function(alarm, base->get_ktime());
spin_lock_irqsave(&base->lock, flags);
if (restart != ALARMTIMER_NORESTART) {
@@ -217,7 +217,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
}
spin_unlock_irqrestore(&base->lock, flags);
- trace_alarmtimer_fired(alarm, base->gettime());
+ trace_alarmtimer_fired(alarm, base->get_ktime());
return ret;
}
@@ -225,7 +225,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
ktime_t alarm_expires_remaining(const struct alarm *alarm)
{
struct alarm_base *base = &alarm_bases[alarm->type];
- return ktime_sub(alarm->node.expires, base->gettime());
+ return ktime_sub(alarm->node.expires, base->get_ktime());
}
EXPORT_SYMBOL_GPL(alarm_expires_remaining);
@@ -270,7 +270,7 @@ static int alarmtimer_suspend(struct device *dev)
spin_unlock_irqrestore(&base->lock, flags);
if (!next)
continue;
- delta = ktime_sub(next->expires, base->gettime());
+ delta = ktime_sub(next->expires, base->get_ktime());
if (!min || (delta < min)) {
expires = next->expires;
min = delta;
@@ -281,7 +281,7 @@ static int alarmtimer_suspend(struct device *dev)
return 0;
if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) {
- __pm_wakeup_event(ws, 2 * MSEC_PER_SEC);
+ pm_wakeup_event(dev, 2 * MSEC_PER_SEC);
return -EBUSY;
}
@@ -296,7 +296,7 @@ static int alarmtimer_suspend(struct device *dev)
/* Set alarm, if in the past reject suspend briefly to handle */
ret = rtc_timer_start(rtc, &rtctimer, now, 0);
if (ret < 0)
- __pm_wakeup_event(ws, MSEC_PER_SEC);
+ pm_wakeup_event(dev, MSEC_PER_SEC);
return ret;
}
@@ -364,7 +364,7 @@ void alarm_start(struct alarm *alarm, ktime_t start)
hrtimer_start(&alarm->timer, alarm->node.expires, HRTIMER_MODE_ABS);
spin_unlock_irqrestore(&base->lock, flags);
- trace_alarmtimer_start(alarm, base->gettime());
+ trace_alarmtimer_start(alarm, base->get_ktime());
}
EXPORT_SYMBOL_GPL(alarm_start);
@@ -377,7 +377,7 @@ void alarm_start_relative(struct alarm *alarm, ktime_t start)
{
struct alarm_base *base = &alarm_bases[alarm->type];
- start = ktime_add_safe(start, base->gettime());
+ start = ktime_add_safe(start, base->get_ktime());
alarm_start(alarm, start);
}
EXPORT_SYMBOL_GPL(alarm_start_relative);
@@ -414,7 +414,7 @@ int alarm_try_to_cancel(struct alarm *alarm)
alarmtimer_dequeue(base, alarm);
spin_unlock_irqrestore(&base->lock, flags);
- trace_alarmtimer_cancel(alarm, base->gettime());
+ trace_alarmtimer_cancel(alarm, base->get_ktime());
return ret;
}
EXPORT_SYMBOL_GPL(alarm_try_to_cancel);
@@ -474,7 +474,7 @@ u64 alarm_forward_now(struct alarm *alarm, ktime_t interval)
{
struct alarm_base *base = &alarm_bases[alarm->type];
- return alarm_forward(alarm, base->gettime(), interval);
+ return alarm_forward(alarm, base->get_ktime(), interval);
}
EXPORT_SYMBOL_GPL(alarm_forward_now);
@@ -500,7 +500,7 @@ static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type)
return;
}
- delta = ktime_sub(absexp, base->gettime());
+ delta = ktime_sub(absexp, base->get_ktime());
spin_lock_irqsave(&freezer_delta_lock, flags);
if (!freezer_delta || (delta < freezer_delta)) {
@@ -632,7 +632,7 @@ static void alarm_timer_arm(struct k_itimer *timr, ktime_t expires,
struct alarm_base *base = &alarm_bases[alarm->type];
if (!absolute)
- expires = ktime_add_safe(expires, base->gettime());
+ expires = ktime_add_safe(expires, base->get_ktime());
if (sigev_none)
alarm->node.expires = expires;
else
@@ -657,24 +657,41 @@ static int alarm_clock_getres(const clockid_t which_clock, struct timespec64 *tp
}
/**
- * alarm_clock_get - posix clock_get interface
+ * alarm_clock_get_timespec - posix clock_get_timespec interface
* @which_clock: clockid
* @tp: timespec to fill.
*
- * Provides the underlying alarm base time.
+ * Provides the underlying alarm base time in a tasks time namespace.
*/
-static int alarm_clock_get(clockid_t which_clock, struct timespec64 *tp)
+static int alarm_clock_get_timespec(clockid_t which_clock, struct timespec64 *tp)
{
struct alarm_base *base = &alarm_bases[clock2alarm(which_clock)];
if (!alarmtimer_get_rtcdev())
return -EINVAL;
- *tp = ktime_to_timespec64(base->gettime());
+ base->get_timespec(tp);
+
return 0;
}
/**
+ * alarm_clock_get_ktime - posix clock_get_ktime interface
+ * @which_clock: clockid
+ *
+ * Provides the underlying alarm base time in the root namespace.
+ */
+static ktime_t alarm_clock_get_ktime(clockid_t which_clock)
+{
+ struct alarm_base *base = &alarm_bases[clock2alarm(which_clock)];
+
+ if (!alarmtimer_get_rtcdev())
+ return -EINVAL;
+
+ return base->get_ktime();
+}
+
+/**
* alarm_timer_create - posix timer_create interface
* @new_timer: k_itimer pointer to manage
*
@@ -747,7 +764,7 @@ static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp,
struct timespec64 rmt;
ktime_t rem;
- rem = ktime_sub(absexp, alarm_bases[type].gettime());
+ rem = ktime_sub(absexp, alarm_bases[type].get_ktime());
if (rem <= 0)
return 0;
@@ -816,9 +833,11 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
exp = timespec64_to_ktime(*tsreq);
/* Convert (if necessary) to absolute time */
if (flags != TIMER_ABSTIME) {
- ktime_t now = alarm_bases[type].gettime();
+ ktime_t now = alarm_bases[type].get_ktime();
exp = ktime_add_safe(now, exp);
+ } else {
+ exp = timens_ktime_to_host(which_clock, exp);
}
ret = alarmtimer_do_nsleep(&alarm, exp, type);
@@ -837,7 +856,8 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
const struct k_clock alarm_clock = {
.clock_getres = alarm_clock_getres,
- .clock_get = alarm_clock_get,
+ .clock_get_ktime = alarm_clock_get_ktime,
+ .clock_get_timespec = alarm_clock_get_timespec,
.timer_create = alarm_timer_create,
.timer_set = common_timer_set,
.timer_del = common_timer_del,
@@ -866,6 +886,12 @@ static struct platform_driver alarmtimer_driver = {
}
};
+static void get_boottime_timespec(struct timespec64 *tp)
+{
+ ktime_get_boottime_ts64(tp);
+ timens_add_boottime(tp);
+}
+
/**
* alarmtimer_init - Initialize alarm timer code
*
@@ -874,17 +900,18 @@ static struct platform_driver alarmtimer_driver = {
*/
static int __init alarmtimer_init(void)
{
- struct platform_device *pdev;
- int error = 0;
+ int error;
int i;
alarmtimer_rtc_timer_init();
/* Initialize alarm bases */
alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME;
- alarm_bases[ALARM_REALTIME].gettime = &ktime_get_real;
+ alarm_bases[ALARM_REALTIME].get_ktime = &ktime_get_real;
+ alarm_bases[ALARM_REALTIME].get_timespec = ktime_get_real_ts64,
alarm_bases[ALARM_BOOTTIME].base_clockid = CLOCK_BOOTTIME;
- alarm_bases[ALARM_BOOTTIME].gettime = &ktime_get_boottime;
+ alarm_bases[ALARM_BOOTTIME].get_ktime = &ktime_get_boottime;
+ alarm_bases[ALARM_BOOTTIME].get_timespec = get_boottime_timespec;
for (i = 0; i < ALARM_NUMTYPE; i++) {
timerqueue_init_head(&alarm_bases[i].timerqueue);
spin_lock_init(&alarm_bases[i].lock);
@@ -898,15 +925,7 @@ static int __init alarmtimer_init(void)
if (error)
goto out_if;
- pdev = platform_device_register_simple("alarmtimer", -1, NULL, 0);
- if (IS_ERR(pdev)) {
- error = PTR_ERR(pdev);
- goto out_drv;
- }
return 0;
-
-out_drv:
- platform_driver_unregister(&alarmtimer_driver);
out_if:
alarmtimer_rtc_interface_remove();
return error;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 8de90ea31280..3a609e7344f3 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1477,7 +1477,7 @@ EXPORT_SYMBOL_GPL(hrtimer_active);
static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
struct hrtimer_clock_base *base,
struct hrtimer *timer, ktime_t *now,
- unsigned long flags)
+ unsigned long flags) __must_hold(&cpu_base->lock)
{
enum hrtimer_restart (*fn)(struct hrtimer *);
int restart;
@@ -1910,8 +1910,8 @@ static long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
return ret;
}
-long hrtimer_nanosleep(const struct timespec64 *rqtp,
- const enum hrtimer_mode mode, const clockid_t clockid)
+long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
+ const clockid_t clockid)
{
struct restart_block *restart;
struct hrtimer_sleeper t;
@@ -1923,7 +1923,7 @@ long hrtimer_nanosleep(const struct timespec64 *rqtp,
slack = 0;
hrtimer_init_sleeper_on_stack(&t, clockid, mode);
- hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack);
+ hrtimer_set_expires_range_ns(&t.timer, rqtp, slack);
ret = do_nanosleep(&t, mode);
if (ret != -ERESTART_RESTARTBLOCK)
goto out;
@@ -1958,7 +1958,8 @@ SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp,
current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
current->restart_block.nanosleep.rmtp = rmtp;
- return hrtimer_nanosleep(&tu, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
+ return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL,
+ CLOCK_MONOTONIC);
}
#endif
@@ -1978,7 +1979,8 @@ SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp,
current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
current->restart_block.nanosleep.compat_rmtp = rmtp;
- return hrtimer_nanosleep(&tu, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
+ return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL,
+ CLOCK_MONOTONIC);
}
#endif
diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c
new file mode 100644
index 000000000000..12858507d75a
--- /dev/null
+++ b/kernel/time/namespace.c
@@ -0,0 +1,468 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Author: Andrei Vagin <avagin@openvz.org>
+ * Author: Dmitry Safonov <dima@arista.com>
+ */
+
+#include <linux/time_namespace.h>
+#include <linux/user_namespace.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/task.h>
+#include <linux/seq_file.h>
+#include <linux/proc_ns.h>
+#include <linux/export.h>
+#include <linux/time.h>
+#include <linux/slab.h>
+#include <linux/cred.h>
+#include <linux/err.h>
+#include <linux/mm.h>
+
+#include <vdso/datapage.h>
+
+ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim,
+ struct timens_offsets *ns_offsets)
+{
+ ktime_t offset;
+
+ switch (clockid) {
+ case CLOCK_MONOTONIC:
+ offset = timespec64_to_ktime(ns_offsets->monotonic);
+ break;
+ case CLOCK_BOOTTIME:
+ case CLOCK_BOOTTIME_ALARM:
+ offset = timespec64_to_ktime(ns_offsets->boottime);
+ break;
+ default:
+ return tim;
+ }
+
+ /*
+ * Check that @tim value is in [offset, KTIME_MAX + offset]
+ * and subtract offset.
+ */
+ if (tim < offset) {
+ /*
+ * User can specify @tim *absolute* value - if it's lesser than
+ * the time namespace's offset - it's already expired.
+ */
+ tim = 0;
+ } else {
+ tim = ktime_sub(tim, offset);
+ if (unlikely(tim > KTIME_MAX))
+ tim = KTIME_MAX;
+ }
+
+ return tim;
+}
+
+static struct ucounts *inc_time_namespaces(struct user_namespace *ns)
+{
+ return inc_ucount(ns, current_euid(), UCOUNT_TIME_NAMESPACES);
+}
+
+static void dec_time_namespaces(struct ucounts *ucounts)
+{
+ dec_ucount(ucounts, UCOUNT_TIME_NAMESPACES);
+}
+
+/**
+ * clone_time_ns - Clone a time namespace
+ * @user_ns: User namespace which owns a new namespace.
+ * @old_ns: Namespace to clone
+ *
+ * Clone @old_ns and set the clone refcount to 1
+ *
+ * Return: The new namespace or ERR_PTR.
+ */
+static struct time_namespace *clone_time_ns(struct user_namespace *user_ns,
+ struct time_namespace *old_ns)
+{
+ struct time_namespace *ns;
+ struct ucounts *ucounts;
+ int err;
+
+ err = -ENOSPC;
+ ucounts = inc_time_namespaces(user_ns);
+ if (!ucounts)
+ goto fail;
+
+ err = -ENOMEM;
+ ns = kmalloc(sizeof(*ns), GFP_KERNEL);
+ if (!ns)
+ goto fail_dec;
+
+ kref_init(&ns->kref);
+
+ ns->vvar_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!ns->vvar_page)
+ goto fail_free;
+
+ err = ns_alloc_inum(&ns->ns);
+ if (err)
+ goto fail_free_page;
+
+ ns->ucounts = ucounts;
+ ns->ns.ops = &timens_operations;
+ ns->user_ns = get_user_ns(user_ns);
+ ns->offsets = old_ns->offsets;
+ ns->frozen_offsets = false;
+ return ns;
+
+fail_free_page:
+ __free_page(ns->vvar_page);
+fail_free:
+ kfree(ns);
+fail_dec:
+ dec_time_namespaces(ucounts);
+fail:
+ return ERR_PTR(err);
+}
+
+/**
+ * copy_time_ns - Create timens_for_children from @old_ns
+ * @flags: Cloning flags
+ * @user_ns: User namespace which owns a new namespace.
+ * @old_ns: Namespace to clone
+ *
+ * If CLONE_NEWTIME specified in @flags, creates a new timens_for_children;
+ * adds a refcounter to @old_ns otherwise.
+ *
+ * Return: timens_for_children namespace or ERR_PTR.
+ */
+struct time_namespace *copy_time_ns(unsigned long flags,
+ struct user_namespace *user_ns, struct time_namespace *old_ns)
+{
+ if (!(flags & CLONE_NEWTIME))
+ return get_time_ns(old_ns);
+
+ return clone_time_ns(user_ns, old_ns);
+}
+
+static struct timens_offset offset_from_ts(struct timespec64 off)
+{
+ struct timens_offset ret;
+
+ ret.sec = off.tv_sec;
+ ret.nsec = off.tv_nsec;
+
+ return ret;
+}
+
+/*
+ * A time namespace VVAR page has the same layout as the VVAR page which
+ * contains the system wide VDSO data.
+ *
+ * For a normal task the VVAR pages are installed in the normal ordering:
+ * VVAR
+ * PVCLOCK
+ * HVCLOCK
+ * TIMENS <- Not really required
+ *
+ * Now for a timens task the pages are installed in the following order:
+ * TIMENS
+ * PVCLOCK
+ * HVCLOCK
+ * VVAR
+ *
+ * The check for vdso_data->clock_mode is in the unlikely path of
+ * the seq begin magic. So for the non-timens case most of the time
+ * 'seq' is even, so the branch is not taken.
+ *
+ * If 'seq' is odd, i.e. a concurrent update is in progress, the extra check
+ * for vdso_data->clock_mode is a non-issue. The task is spin waiting for the
+ * update to finish and for 'seq' to become even anyway.
+ *
+ * Timens page has vdso_data->clock_mode set to VCLOCK_TIMENS which enforces
+ * the time namespace handling path.
+ */
+static void timens_setup_vdso_data(struct vdso_data *vdata,
+ struct time_namespace *ns)
+{
+ struct timens_offset *offset = vdata->offset;
+ struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic);
+ struct timens_offset boottime = offset_from_ts(ns->offsets.boottime);
+
+ vdata->seq = 1;
+ vdata->clock_mode = VCLOCK_TIMENS;
+ offset[CLOCK_MONOTONIC] = monotonic;
+ offset[CLOCK_MONOTONIC_RAW] = monotonic;
+ offset[CLOCK_MONOTONIC_COARSE] = monotonic;
+ offset[CLOCK_BOOTTIME] = boottime;
+ offset[CLOCK_BOOTTIME_ALARM] = boottime;
+}
+
+/*
+ * Protects possibly multiple offsets writers racing each other
+ * and tasks entering the namespace.
+ */
+static DEFINE_MUTEX(offset_lock);
+
+static void timens_set_vvar_page(struct task_struct *task,
+ struct time_namespace *ns)
+{
+ struct vdso_data *vdata;
+ unsigned int i;
+
+ if (ns == &init_time_ns)
+ return;
+
+ /* Fast-path, taken by every task in namespace except the first. */
+ if (likely(ns->frozen_offsets))
+ return;
+
+ mutex_lock(&offset_lock);
+ /* Nothing to-do: vvar_page has been already initialized. */
+ if (ns->frozen_offsets)
+ goto out;
+
+ ns->frozen_offsets = true;
+ vdata = arch_get_vdso_data(page_address(ns->vvar_page));
+
+ for (i = 0; i < CS_BASES; i++)
+ timens_setup_vdso_data(&vdata[i], ns);
+
+out:
+ mutex_unlock(&offset_lock);
+}
+
+void free_time_ns(struct kref *kref)
+{
+ struct time_namespace *ns;
+
+ ns = container_of(kref, struct time_namespace, kref);
+ dec_time_namespaces(ns->ucounts);
+ put_user_ns(ns->user_ns);
+ ns_free_inum(&ns->ns);
+ __free_page(ns->vvar_page);
+ kfree(ns);
+}
+
+static struct time_namespace *to_time_ns(struct ns_common *ns)
+{
+ return container_of(ns, struct time_namespace, ns);
+}
+
+static struct ns_common *timens_get(struct task_struct *task)
+{
+ struct time_namespace *ns = NULL;
+ struct nsproxy *nsproxy;
+
+ task_lock(task);
+ nsproxy = task->nsproxy;
+ if (nsproxy) {
+ ns = nsproxy->time_ns;
+ get_time_ns(ns);
+ }
+ task_unlock(task);
+
+ return ns ? &ns->ns : NULL;
+}
+
+static struct ns_common *timens_for_children_get(struct task_struct *task)
+{
+ struct time_namespace *ns = NULL;
+ struct nsproxy *nsproxy;
+
+ task_lock(task);
+ nsproxy = task->nsproxy;
+ if (nsproxy) {
+ ns = nsproxy->time_ns_for_children;
+ get_time_ns(ns);
+ }
+ task_unlock(task);
+
+ return ns ? &ns->ns : NULL;
+}
+
+static void timens_put(struct ns_common *ns)
+{
+ put_time_ns(to_time_ns(ns));
+}
+
+static int timens_install(struct nsproxy *nsproxy, struct ns_common *new)
+{
+ struct time_namespace *ns = to_time_ns(new);
+ int err;
+
+ if (!current_is_single_threaded())
+ return -EUSERS;
+
+ if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
+ !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
+ return -EPERM;
+
+ timens_set_vvar_page(current, ns);
+
+ err = vdso_join_timens(current, ns);
+ if (err)
+ return err;
+
+ get_time_ns(ns);
+ put_time_ns(nsproxy->time_ns);
+ nsproxy->time_ns = ns;
+
+ get_time_ns(ns);
+ put_time_ns(nsproxy->time_ns_for_children);
+ nsproxy->time_ns_for_children = ns;
+ return 0;
+}
+
+int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk)
+{
+ struct ns_common *nsc = &nsproxy->time_ns_for_children->ns;
+ struct time_namespace *ns = to_time_ns(nsc);
+ int err;
+
+ /* create_new_namespaces() already incremented the ref counter */
+ if (nsproxy->time_ns == nsproxy->time_ns_for_children)
+ return 0;
+
+ timens_set_vvar_page(tsk, ns);
+
+ err = vdso_join_timens(tsk, ns);
+ if (err)
+ return err;
+
+ get_time_ns(ns);
+ put_time_ns(nsproxy->time_ns);
+ nsproxy->time_ns = ns;
+
+ return 0;
+}
+
+static struct user_namespace *timens_owner(struct ns_common *ns)
+{
+ return to_time_ns(ns)->user_ns;
+}
+
+static void show_offset(struct seq_file *m, int clockid, struct timespec64 *ts)
+{
+ seq_printf(m, "%d %lld %ld\n", clockid, ts->tv_sec, ts->tv_nsec);
+}
+
+void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m)
+{
+ struct ns_common *ns;
+ struct time_namespace *time_ns;
+
+ ns = timens_for_children_get(p);
+ if (!ns)
+ return;
+ time_ns = to_time_ns(ns);
+
+ show_offset(m, CLOCK_MONOTONIC, &time_ns->offsets.monotonic);
+ show_offset(m, CLOCK_BOOTTIME, &time_ns->offsets.boottime);
+ put_time_ns(time_ns);
+}
+
+int proc_timens_set_offset(struct file *file, struct task_struct *p,
+ struct proc_timens_offset *offsets, int noffsets)
+{
+ struct ns_common *ns;
+ struct time_namespace *time_ns;
+ struct timespec64 tp;
+ int i, err;
+
+ ns = timens_for_children_get(p);
+ if (!ns)
+ return -ESRCH;
+ time_ns = to_time_ns(ns);
+
+ if (!file_ns_capable(file, time_ns->user_ns, CAP_SYS_TIME)) {
+ put_time_ns(time_ns);
+ return -EPERM;
+ }
+
+ for (i = 0; i < noffsets; i++) {
+ struct proc_timens_offset *off = &offsets[i];
+
+ switch (off->clockid) {
+ case CLOCK_MONOTONIC:
+ ktime_get_ts64(&tp);
+ break;
+ case CLOCK_BOOTTIME:
+ ktime_get_boottime_ts64(&tp);
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = -ERANGE;
+
+ if (off->val.tv_sec > KTIME_SEC_MAX ||
+ off->val.tv_sec < -KTIME_SEC_MAX)
+ goto out;
+
+ tp = timespec64_add(tp, off->val);
+ /*
+ * KTIME_SEC_MAX is divided by 2 to be sure that KTIME_MAX is
+ * still unreachable.
+ */
+ if (tp.tv_sec < 0 || tp.tv_sec > KTIME_SEC_MAX / 2)
+ goto out;
+ }
+
+ mutex_lock(&offset_lock);
+ if (time_ns->frozen_offsets) {
+ err = -EACCES;
+ goto out_unlock;
+ }
+
+ err = 0;
+ /* Don't report errors after this line */
+ for (i = 0; i < noffsets; i++) {
+ struct proc_timens_offset *off = &offsets[i];
+ struct timespec64 *offset = NULL;
+
+ switch (off->clockid) {
+ case CLOCK_MONOTONIC:
+ offset = &time_ns->offsets.monotonic;
+ break;
+ case CLOCK_BOOTTIME:
+ offset = &time_ns->offsets.boottime;
+ break;
+ }
+
+ *offset = off->val;
+ }
+
+out_unlock:
+ mutex_unlock(&offset_lock);
+out:
+ put_time_ns(time_ns);
+
+ return err;
+}
+
+const struct proc_ns_operations timens_operations = {
+ .name = "time",
+ .type = CLONE_NEWTIME,
+ .get = timens_get,
+ .put = timens_put,
+ .install = timens_install,
+ .owner = timens_owner,
+};
+
+const struct proc_ns_operations timens_for_children_operations = {
+ .name = "time_for_children",
+ .type = CLONE_NEWTIME,
+ .get = timens_for_children_get,
+ .put = timens_put,
+ .install = timens_install,
+ .owner = timens_owner,
+};
+
+struct time_namespace init_time_ns = {
+ .kref = KREF_INIT(3),
+ .user_ns = &init_user_ns,
+ .ns.inum = PROC_TIME_INIT_INO,
+ .ns.ops = &timens_operations,
+ .frozen_offsets = true,
+};
+
+static int __init time_ns_init(void)
+{
+ return 0;
+}
+subsys_initcall(time_ns_init);
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index 200fb2d3be99..77c0c2370b6d 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -310,8 +310,8 @@ out:
}
const struct k_clock clock_posix_dynamic = {
- .clock_getres = pc_clock_getres,
- .clock_set = pc_clock_settime,
- .clock_get = pc_clock_gettime,
- .clock_adj = pc_clock_adjtime,
+ .clock_getres = pc_clock_getres,
+ .clock_set = pc_clock_settime,
+ .clock_get_timespec = pc_clock_gettime,
+ .clock_adj = pc_clock_adjtime,
};
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 42d512fcfda2..8ff6da77a01f 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1391,26 +1391,26 @@ static int thread_cpu_timer_create(struct k_itimer *timer)
}
const struct k_clock clock_posix_cpu = {
- .clock_getres = posix_cpu_clock_getres,
- .clock_set = posix_cpu_clock_set,
- .clock_get = posix_cpu_clock_get,
- .timer_create = posix_cpu_timer_create,
- .nsleep = posix_cpu_nsleep,
- .timer_set = posix_cpu_timer_set,
- .timer_del = posix_cpu_timer_del,
- .timer_get = posix_cpu_timer_get,
- .timer_rearm = posix_cpu_timer_rearm,
+ .clock_getres = posix_cpu_clock_getres,
+ .clock_set = posix_cpu_clock_set,
+ .clock_get_timespec = posix_cpu_clock_get,
+ .timer_create = posix_cpu_timer_create,
+ .nsleep = posix_cpu_nsleep,
+ .timer_set = posix_cpu_timer_set,
+ .timer_del = posix_cpu_timer_del,
+ .timer_get = posix_cpu_timer_get,
+ .timer_rearm = posix_cpu_timer_rearm,
};
const struct k_clock clock_process = {
- .clock_getres = process_cpu_clock_getres,
- .clock_get = process_cpu_clock_get,
- .timer_create = process_cpu_timer_create,
- .nsleep = process_cpu_nsleep,
+ .clock_getres = process_cpu_clock_getres,
+ .clock_get_timespec = process_cpu_clock_get,
+ .timer_create = process_cpu_timer_create,
+ .nsleep = process_cpu_nsleep,
};
const struct k_clock clock_thread = {
- .clock_getres = thread_cpu_clock_getres,
- .clock_get = thread_cpu_clock_get,
- .timer_create = thread_cpu_timer_create,
+ .clock_getres = thread_cpu_clock_getres,
+ .clock_get_timespec = thread_cpu_clock_get,
+ .timer_create = thread_cpu_timer_create,
};
diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
index 20c65a7d4e3a..fcb3b21d8bdc 100644
--- a/kernel/time/posix-stubs.c
+++ b/kernel/time/posix-stubs.c
@@ -14,6 +14,7 @@
#include <linux/ktime.h>
#include <linux/timekeeping.h>
#include <linux/posix-timers.h>
+#include <linux/time_namespace.h>
#include <linux/compat.h>
#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
@@ -77,9 +78,11 @@ int do_clock_gettime(clockid_t which_clock, struct timespec64 *tp)
break;
case CLOCK_MONOTONIC:
ktime_get_ts64(tp);
+ timens_add_monotonic(tp);
break;
case CLOCK_BOOTTIME:
ktime_get_boottime_ts64(tp);
+ timens_add_boottime(tp);
break;
default:
return -EINVAL;
@@ -126,6 +129,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
struct __kernel_timespec __user *, rmtp)
{
struct timespec64 t;
+ ktime_t texp;
switch (which_clock) {
case CLOCK_REALTIME:
@@ -144,7 +148,10 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
rmtp = NULL;
current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE;
current->restart_block.nanosleep.rmtp = rmtp;
- return hrtimer_nanosleep(&t, flags & TIMER_ABSTIME ?
+ texp = timespec64_to_ktime(t);
+ if (flags & TIMER_ABSTIME)
+ texp = timens_ktime_to_host(which_clock, texp);
+ return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ?
HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
which_clock);
}
@@ -215,6 +222,7 @@ SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
struct old_timespec32 __user *, rmtp)
{
struct timespec64 t;
+ ktime_t texp;
switch (which_clock) {
case CLOCK_REALTIME:
@@ -233,7 +241,10 @@ SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
rmtp = NULL;
current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE;
current->restart_block.nanosleep.compat_rmtp = rmtp;
- return hrtimer_nanosleep(&t, flags & TIMER_ABSTIME ?
+ texp = timespec64_to_ktime(t);
+ if (flags & TIMER_ABSTIME)
+ texp = timens_ktime_to_host(which_clock, texp);
+ return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ?
HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
which_clock);
}
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 0ec5b7a1d769..ff0eb30de346 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -30,6 +30,7 @@
#include <linux/hashtable.h>
#include <linux/compat.h>
#include <linux/nospec.h>
+#include <linux/time_namespace.h>
#include "timekeeping.h"
#include "posix-timers.h"
@@ -165,12 +166,17 @@ static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
}
/* Get clock_realtime */
-static int posix_clock_realtime_get(clockid_t which_clock, struct timespec64 *tp)
+static int posix_get_realtime_timespec(clockid_t which_clock, struct timespec64 *tp)
{
ktime_get_real_ts64(tp);
return 0;
}
+static ktime_t posix_get_realtime_ktime(clockid_t which_clock)
+{
+ return ktime_get_real();
+}
+
/* Set clock_realtime */
static int posix_clock_realtime_set(const clockid_t which_clock,
const struct timespec64 *tp)
@@ -187,18 +193,25 @@ static int posix_clock_realtime_adj(const clockid_t which_clock,
/*
* Get monotonic time for posix timers
*/
-static int posix_ktime_get_ts(clockid_t which_clock, struct timespec64 *tp)
+static int posix_get_monotonic_timespec(clockid_t which_clock, struct timespec64 *tp)
{
ktime_get_ts64(tp);
+ timens_add_monotonic(tp);
return 0;
}
+static ktime_t posix_get_monotonic_ktime(clockid_t which_clock)
+{
+ return ktime_get();
+}
+
/*
* Get monotonic-raw time for posix timers
*/
static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 *tp)
{
ktime_get_raw_ts64(tp);
+ timens_add_monotonic(tp);
return 0;
}
@@ -213,6 +226,7 @@ static int posix_get_monotonic_coarse(clockid_t which_clock,
struct timespec64 *tp)
{
ktime_get_coarse_ts64(tp);
+ timens_add_monotonic(tp);
return 0;
}
@@ -222,18 +236,29 @@ static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 *
return 0;
}
-static int posix_get_boottime(const clockid_t which_clock, struct timespec64 *tp)
+static int posix_get_boottime_timespec(const clockid_t which_clock, struct timespec64 *tp)
{
ktime_get_boottime_ts64(tp);
+ timens_add_boottime(tp);
return 0;
}
-static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp)
+static ktime_t posix_get_boottime_ktime(const clockid_t which_clock)
+{
+ return ktime_get_boottime();
+}
+
+static int posix_get_tai_timespec(clockid_t which_clock, struct timespec64 *tp)
{
ktime_get_clocktai_ts64(tp);
return 0;
}
+static ktime_t posix_get_tai_ktime(clockid_t which_clock)
+{
+ return ktime_get_clocktai();
+}
+
static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec64 *tp)
{
tp->tv_sec = 0;
@@ -645,7 +670,6 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting)
{
const struct k_clock *kc = timr->kclock;
ktime_t now, remaining, iv;
- struct timespec64 ts64;
bool sig_none;
sig_none = timr->it_sigev_notify == SIGEV_NONE;
@@ -663,12 +687,7 @@ void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting)
return;
}
- /*
- * The timespec64 based conversion is suboptimal, but it's not
- * worth to implement yet another callback.
- */
- kc->clock_get(timr->it_clock, &ts64);
- now = timespec64_to_ktime(ts64);
+ now = kc->clock_get_ktime(timr->it_clock);
/*
* When a requeue is pending or this is a SIGEV_NONE timer move the
@@ -781,7 +800,7 @@ static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires,
* Posix magic: Relative CLOCK_REALTIME timers are not affected by
* clock modifications, so they become CLOCK_MONOTONIC based under the
* hood. See hrtimer_init(). Update timr->kclock, so the generic
- * functions which use timr->kclock->clock_get() work.
+ * functions which use timr->kclock->clock_get_*() work.
*
* Note: it_clock stays unmodified, because the next timer_set() might
* use ABSTIME, so it needs to switch back.
@@ -866,6 +885,8 @@ int common_timer_set(struct k_itimer *timr, int flags,
timr->it_interval = timespec64_to_ktime(new_setting->it_interval);
expires = timespec64_to_ktime(new_setting->it_value);
+ if (flags & TIMER_ABSTIME)
+ expires = timens_ktime_to_host(timr->it_clock, expires);
sigev_none = timr->it_sigev_notify == SIGEV_NONE;
kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none);
@@ -1067,7 +1088,7 @@ SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
if (!kc)
return -EINVAL;
- error = kc->clock_get(which_clock, &kernel_tp);
+ error = kc->clock_get_timespec(which_clock, &kernel_tp);
if (!error && put_timespec64(&kernel_tp, tp))
error = -EFAULT;
@@ -1149,7 +1170,7 @@ SYSCALL_DEFINE2(clock_gettime32, clockid_t, which_clock,
if (!kc)
return -EINVAL;
- err = kc->clock_get(which_clock, &ts);
+ err = kc->clock_get_timespec(which_clock, &ts);
if (!err && put_old_timespec32(&ts, tp))
err = -EFAULT;
@@ -1200,7 +1221,22 @@ SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock,
static int common_nsleep(const clockid_t which_clock, int flags,
const struct timespec64 *rqtp)
{
- return hrtimer_nanosleep(rqtp, flags & TIMER_ABSTIME ?
+ ktime_t texp = timespec64_to_ktime(*rqtp);
+
+ return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ?
+ HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
+ which_clock);
+}
+
+static int common_nsleep_timens(const clockid_t which_clock, int flags,
+ const struct timespec64 *rqtp)
+{
+ ktime_t texp = timespec64_to_ktime(*rqtp);
+
+ if (flags & TIMER_ABSTIME)
+ texp = timens_ktime_to_host(which_clock, texp);
+
+ return hrtimer_nanosleep(texp, flags & TIMER_ABSTIME ?
HRTIMER_MODE_ABS : HRTIMER_MODE_REL,
which_clock);
}
@@ -1261,7 +1297,8 @@ SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
static const struct k_clock clock_realtime = {
.clock_getres = posix_get_hrtimer_res,
- .clock_get = posix_clock_realtime_get,
+ .clock_get_timespec = posix_get_realtime_timespec,
+ .clock_get_ktime = posix_get_realtime_ktime,
.clock_set = posix_clock_realtime_set,
.clock_adj = posix_clock_realtime_adj,
.nsleep = common_nsleep,
@@ -1279,8 +1316,9 @@ static const struct k_clock clock_realtime = {
static const struct k_clock clock_monotonic = {
.clock_getres = posix_get_hrtimer_res,
- .clock_get = posix_ktime_get_ts,
- .nsleep = common_nsleep,
+ .clock_get_timespec = posix_get_monotonic_timespec,
+ .clock_get_ktime = posix_get_monotonic_ktime,
+ .nsleep = common_nsleep_timens,
.timer_create = common_timer_create,
.timer_set = common_timer_set,
.timer_get = common_timer_get,
@@ -1295,22 +1333,23 @@ static const struct k_clock clock_monotonic = {
static const struct k_clock clock_monotonic_raw = {
.clock_getres = posix_get_hrtimer_res,
- .clock_get = posix_get_monotonic_raw,
+ .clock_get_timespec = posix_get_monotonic_raw,
};
static const struct k_clock clock_realtime_coarse = {
.clock_getres = posix_get_coarse_res,
- .clock_get = posix_get_realtime_coarse,
+ .clock_get_timespec = posix_get_realtime_coarse,
};
static const struct k_clock clock_monotonic_coarse = {
.clock_getres = posix_get_coarse_res,
- .clock_get = posix_get_monotonic_coarse,
+ .clock_get_timespec = posix_get_monotonic_coarse,
};
static const struct k_clock clock_tai = {
.clock_getres = posix_get_hrtimer_res,
- .clock_get = posix_get_tai,
+ .clock_get_ktime = posix_get_tai_ktime,
+ .clock_get_timespec = posix_get_tai_timespec,
.nsleep = common_nsleep,
.timer_create = common_timer_create,
.timer_set = common_timer_set,
@@ -1326,8 +1365,9 @@ static const struct k_clock clock_tai = {
static const struct k_clock clock_boottime = {
.clock_getres = posix_get_hrtimer_res,
- .clock_get = posix_get_boottime,
- .nsleep = common_nsleep,
+ .clock_get_ktime = posix_get_boottime_ktime,
+ .clock_get_timespec = posix_get_boottime_timespec,
+ .nsleep = common_nsleep_timens,
.timer_create = common_timer_create,
.timer_set = common_timer_set,
.timer_get = common_timer_get,
diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h
index 897c29e162b9..f32a2ebba9b8 100644
--- a/kernel/time/posix-timers.h
+++ b/kernel/time/posix-timers.h
@@ -6,8 +6,11 @@ struct k_clock {
struct timespec64 *tp);
int (*clock_set)(const clockid_t which_clock,
const struct timespec64 *tp);
- int (*clock_get)(const clockid_t which_clock,
- struct timespec64 *tp);
+ /* Returns the clock value in the current time namespace. */
+ int (*clock_get_timespec)(const clockid_t which_clock,
+ struct timespec64 *tp);
+ /* Returns the clock value in the root time namespace. */
+ ktime_t (*clock_get_ktime)(const clockid_t which_clock);
int (*clock_adj)(const clockid_t which_clock, struct __kernel_timex *tx);
int (*timer_create)(struct k_itimer *timer);
int (*nsleep)(const clockid_t which_clock, int flags,
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index dbd69052eaa6..e4332e3e2d56 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -169,14 +169,15 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
{
u64 res, wrap, new_mask, new_epoch, cyc, ns;
u32 new_mult, new_shift;
- unsigned long r;
+ unsigned long r, flags;
char r_unit;
struct clock_read_data rd;
if (cd.rate > rate)
return;
- WARN_ON(!irqs_disabled());
+ /* Cannot register a sched_clock with interrupts on */
+ local_irq_save(flags);
/* Calculate the mult/shift to convert counter ticks to ns. */
clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600);
@@ -233,6 +234,8 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
enable_sched_clock_irqtime();
+ local_irq_restore(flags);
+
pr_debug("Registered %pS as sched_clock source\n", read);
}
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 59225b484e4e..7e5d3524e924 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -11,6 +11,7 @@
#include <linux/err.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
+#include <linux/nmi.h>
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
@@ -558,6 +559,7 @@ void tick_unfreeze(void)
trace_suspend_resume(TPS("timekeeping_freeze"),
smp_processor_id(), false);
} else {
+ touch_softlockup_watchdog();
tick_resume_local();
}