summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-04-14 10:53:44 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-04-14 10:53:44 -0700
commitf21f7b5162e9dbde6d3d5ce727d4ca2552d76ce9 (patch)
tree2c1d858605001adedeff10f66f031e20da1db34d /lib
parentc1fe867b5bf9c57ab7856486d342720e2b205eed (diff)
parent7138a8698a39e81eb153e05500823fff76d5b3bd (diff)
downloadlwn-f21f7b5162e9dbde6d3d5ce727d4ca2552d76ce9.tar.gz
lwn-f21f7b5162e9dbde6d3d5ce727d4ca2552d76ce9.zip
Merge tag 'timers-vdso-2026-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull vdso updates from Thomas Gleixner: - Make the handling of compat functions consistent and more robust - Rework the underlying data store so that it is dynamically allocated, which allows the conversion of the last holdout SPARC64 to the generic VDSO implementation - Rework the SPARC64 VDSO to utilize the generic implementation - Mop up the left overs of the non-generic VDSO support in the core code - Expand the VDSO selftest and make them more robust - Allow time namespaces to be enabled independently of the generic VDSO support, which was not possible before due to SPARC64 not using it - Various cleanups and improvements in the related code * tag 'timers-vdso-2026-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (51 commits) timens: Use task_lock guard in timens_get*() timens: Use mutex guard in proc_timens_set_offset() timens: Simplify some calls to put_time_ns() timens: Add a __free() wrapper for put_time_ns() timens: Remove dependency on the vDSO vdso/timens: Move functions to new file selftests: vDSO: vdso_test_correctness: Add a test for time() selftests: vDSO: vdso_test_correctness: Use facilities from parse_vdso.c selftests: vDSO: vdso_test_correctness: Handle different tv_usec types selftests: vDSO: vdso_test_correctness: Drop SYS_getcpu fallbacks selftests: vDSO: vdso_test_gettimeofday: Remove nolibc checks Revert "selftests: vDSO: parse_vdso: Use UAPI headers instead of libc headers" random: vDSO: Remove ifdeffery random: vDSO: Trim vDSO includes vdso/datapage: Trim down unnecessary includes vdso/datapage: Remove inclusion of gettimeofday.h vdso/helpers: Explicitly include vdso/processor.h vdso/gettimeofday: Add explicit includes random: vDSO: Add explicit includes MIPS: vdso: Explicitly include asm/vdso/vdso.h ...
Diffstat (limited to 'lib')
-rw-r--r--lib/vdso/datastore.c122
-rw-r--r--lib/vdso/getrandom.c3
-rw-r--r--lib/vdso/gettimeofday.c99
3 files changed, 106 insertions, 118 deletions
diff --git a/lib/vdso/datastore.c b/lib/vdso/datastore.c
index a565c30c71a0..cf5d784a4a5a 100644
--- a/lib/vdso/datastore.c
+++ b/lib/vdso/datastore.c
@@ -1,64 +1,92 @@
// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/linkage.h>
-#include <linux/mmap_lock.h>
+#include <linux/gfp.h>
+#include <linux/init.h>
#include <linux/mm.h>
#include <linux/time_namespace.h>
#include <linux/types.h>
#include <linux/vdso_datastore.h>
#include <vdso/datapage.h>
-/*
- * The vDSO data page.
- */
+static u8 vdso_initdata[VDSO_NR_PAGES * PAGE_SIZE] __aligned(PAGE_SIZE) __initdata = {};
+
#ifdef CONFIG_GENERIC_GETTIMEOFDAY
-static union {
- struct vdso_time_data data;
- u8 page[PAGE_SIZE];
-} vdso_time_data_store __page_aligned_data;
-struct vdso_time_data *vdso_k_time_data = &vdso_time_data_store.data;
-static_assert(sizeof(vdso_time_data_store) == PAGE_SIZE);
+struct vdso_time_data *vdso_k_time_data __refdata =
+ (void *)&vdso_initdata[VDSO_TIME_PAGE_OFFSET * PAGE_SIZE];
+
+static_assert(sizeof(struct vdso_time_data) <= PAGE_SIZE);
#endif /* CONFIG_GENERIC_GETTIMEOFDAY */
#ifdef CONFIG_VDSO_GETRANDOM
-static union {
- struct vdso_rng_data data;
- u8 page[PAGE_SIZE];
-} vdso_rng_data_store __page_aligned_data;
-struct vdso_rng_data *vdso_k_rng_data = &vdso_rng_data_store.data;
-static_assert(sizeof(vdso_rng_data_store) == PAGE_SIZE);
+struct vdso_rng_data *vdso_k_rng_data __refdata =
+ (void *)&vdso_initdata[VDSO_RNG_PAGE_OFFSET * PAGE_SIZE];
+
+static_assert(sizeof(struct vdso_rng_data) <= PAGE_SIZE);
#endif /* CONFIG_VDSO_GETRANDOM */
#ifdef CONFIG_ARCH_HAS_VDSO_ARCH_DATA
-static union {
- struct vdso_arch_data data;
- u8 page[VDSO_ARCH_DATA_SIZE];
-} vdso_arch_data_store __page_aligned_data;
-struct vdso_arch_data *vdso_k_arch_data = &vdso_arch_data_store.data;
+struct vdso_arch_data *vdso_k_arch_data __refdata =
+ (void *)&vdso_initdata[VDSO_ARCH_PAGES_START * PAGE_SIZE];
#endif /* CONFIG_ARCH_HAS_VDSO_ARCH_DATA */
+void __init vdso_setup_data_pages(void)
+{
+ unsigned int order = get_order(VDSO_NR_PAGES * PAGE_SIZE);
+ struct page *pages;
+
+ /*
+ * Allocate the data pages dynamically. SPARC does not support mapping
+ * static pages to be mapped into userspace.
+ * It is also a requirement for mlockall() support.
+ *
+ * Do not use folios. In time namespaces the pages are mapped in a different order
+ * to userspace, which is not handled by the folio optimizations in finish_fault().
+ */
+ pages = alloc_pages(GFP_KERNEL, order);
+ if (!pages)
+ panic("Unable to allocate VDSO storage pages");
+
+ /* The pages are mapped one-by-one into userspace and each one needs to be refcounted. */
+ split_page(pages, order);
+
+ /* Move the data already written by other subsystems to the new pages */
+ memcpy(page_address(pages), vdso_initdata, VDSO_NR_PAGES * PAGE_SIZE);
+
+ if (IS_ENABLED(CONFIG_GENERIC_GETTIMEOFDAY))
+ vdso_k_time_data = page_address(pages + VDSO_TIME_PAGE_OFFSET);
+
+ if (IS_ENABLED(CONFIG_VDSO_GETRANDOM))
+ vdso_k_rng_data = page_address(pages + VDSO_RNG_PAGE_OFFSET);
+
+ if (IS_ENABLED(CONFIG_ARCH_HAS_VDSO_ARCH_DATA))
+ vdso_k_arch_data = page_address(pages + VDSO_ARCH_PAGES_START);
+}
+
static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
- struct page *timens_page = find_timens_vvar_page(vma);
- unsigned long addr, pfn;
- vm_fault_t err;
+ struct page *page, *timens_page;
+
+ timens_page = find_timens_vvar_page(vma);
switch (vmf->pgoff) {
case VDSO_TIME_PAGE_OFFSET:
if (!IS_ENABLED(CONFIG_GENERIC_GETTIMEOFDAY))
return VM_FAULT_SIGBUS;
- pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data));
+ page = virt_to_page(vdso_k_time_data);
if (timens_page) {
/*
* Fault in VVAR page too, since it will be accessed
* to get clock data anyway.
*/
+ unsigned long addr;
+ vm_fault_t err;
+
addr = vmf->address + VDSO_TIMENS_PAGE_OFFSET * PAGE_SIZE;
- err = vmf_insert_pfn(vma, addr, pfn);
+ err = vmf_insert_page(vma, addr, page);
if (unlikely(err & VM_FAULT_ERROR))
return err;
- pfn = page_to_pfn(timens_page);
+ page = timens_page;
}
break;
case VDSO_TIMENS_PAGE_OFFSET:
@@ -71,24 +99,25 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
*/
if (!IS_ENABLED(CONFIG_TIME_NS) || !timens_page)
return VM_FAULT_SIGBUS;
- pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data));
+ page = virt_to_page(vdso_k_time_data);
break;
case VDSO_RNG_PAGE_OFFSET:
if (!IS_ENABLED(CONFIG_VDSO_GETRANDOM))
return VM_FAULT_SIGBUS;
- pfn = __phys_to_pfn(__pa_symbol(vdso_k_rng_data));
+ page = virt_to_page(vdso_k_rng_data);
break;
case VDSO_ARCH_PAGES_START ... VDSO_ARCH_PAGES_END:
if (!IS_ENABLED(CONFIG_ARCH_HAS_VDSO_ARCH_DATA))
return VM_FAULT_SIGBUS;
- pfn = __phys_to_pfn(__pa_symbol(vdso_k_arch_data)) +
- vmf->pgoff - VDSO_ARCH_PAGES_START;
+ page = virt_to_page(vdso_k_arch_data) + vmf->pgoff - VDSO_ARCH_PAGES_START;
break;
default:
return VM_FAULT_SIGBUS;
}
- return vmf_insert_pfn(vma, vmf->address, pfn);
+ get_page(page);
+ vmf->page = page;
+ return 0;
}
const struct vm_special_mapping vdso_vvar_mapping = {
@@ -100,31 +129,6 @@ struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned
{
return _install_special_mapping(mm, addr, VDSO_NR_PAGES * PAGE_SIZE,
VM_READ | VM_MAYREAD | VM_IO | VM_DONTDUMP |
- VM_PFNMAP | VM_SEALED_SYSMAP,
+ VM_MIXEDMAP | VM_SEALED_SYSMAP,
&vdso_vvar_mapping);
}
-
-#ifdef CONFIG_TIME_NS
-/*
- * The vvar page layout depends on whether a task belongs to the root or
- * non-root time namespace. Whenever a task changes its namespace, the VVAR
- * page tables are cleared and then they will be re-faulted with a
- * corresponding layout.
- * See also the comment near timens_setup_vdso_clock_data() for details.
- */
-int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
-{
- struct mm_struct *mm = task->mm;
- struct vm_area_struct *vma;
- VMA_ITERATOR(vmi, mm, 0);
-
- mmap_read_lock(mm);
- for_each_vma(vmi, vma) {
- if (vma_is_special_mapping(vma, &vdso_vvar_mapping))
- zap_vma_pages(vma);
- }
- mmap_read_unlock(mm);
-
- return 0;
-}
-#endif
diff --git a/lib/vdso/getrandom.c b/lib/vdso/getrandom.c
index 440f8a6203a6..7e29005aa208 100644
--- a/lib/vdso/getrandom.c
+++ b/lib/vdso/getrandom.c
@@ -7,8 +7,11 @@
#include <linux/minmax.h>
#include <vdso/datapage.h>
#include <vdso/getrandom.h>
+#include <vdso/limits.h>
#include <vdso/unaligned.h>
+#include <asm/barrier.h>
#include <asm/vdso/getrandom.h>
+#include <uapi/linux/errno.h>
#include <uapi/linux/mman.h>
#include <uapi/linux/random.h>
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index 4399e143d43a..a5798bd26d20 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -3,8 +3,25 @@
* Generic userspace implementations of gettimeofday() and similar.
*/
#include <vdso/auxclock.h>
+#include <vdso/clocksource.h>
#include <vdso/datapage.h>
#include <vdso/helpers.h>
+#include <vdso/ktime.h>
+#include <vdso/limits.h>
+#include <vdso/math64.h>
+#include <vdso/time32.h>
+#include <vdso/time64.h>
+
+/*
+ * The generic vDSO implementation requires that gettimeofday.h
+ * provides:
+ * - __arch_get_hw_counter(): to get the hw counter based on the
+ * clock_mode.
+ * - gettimeofday_fallback(): fallback for gettimeofday.
+ * - clock_gettime_fallback(): fallback for clock_gettime.
+ * - clock_getres_fallback(): fallback for clock_getres.
+ */
+#include <asm/vdso/gettimeofday.h>
/* Bring in default accessors */
#include <vdso/vsyscall.h>
@@ -135,7 +152,7 @@ bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *
if (!vdso_get_timestamp(vd, vc, clk, &sec, &ns))
return false;
- } while (unlikely(vdso_read_retry(vc, seq)));
+ } while (vdso_read_retry(vc, seq));
/* Add the namespace offset */
sec += offs->sec;
@@ -158,28 +175,12 @@ bool do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc,
return false;
do {
- /*
- * Open coded function vdso_read_begin() to handle
- * VDSO_CLOCKMODE_TIMENS. Time namespace enabled tasks have a
- * special VVAR page installed which has vc->seq set to 1 and
- * vc->clock_mode set to VDSO_CLOCKMODE_TIMENS. For non time
- * namespace affected tasks this does not affect performance
- * because if vc->seq is odd, i.e. a concurrent update is in
- * progress the extra check for vc->clock_mode is just a few
- * extra instructions while spin waiting for vc->seq to become
- * even again.
- */
- while (unlikely((seq = READ_ONCE(vc->seq)) & 1)) {
- if (IS_ENABLED(CONFIG_TIME_NS) &&
- vc->clock_mode == VDSO_CLOCKMODE_TIMENS)
- return do_hres_timens(vd, vc, clk, ts);
- cpu_relax();
- }
- smp_rmb();
+ if (vdso_read_begin_timens(vc, &seq))
+ return do_hres_timens(vd, vc, clk, ts);
if (!vdso_get_timestamp(vd, vc, clk, &sec, &ns))
return false;
- } while (unlikely(vdso_read_retry(vc, seq)));
+ } while (vdso_read_retry(vc, seq));
vdso_set_timespec(ts, sec, ns);
@@ -204,7 +205,7 @@ bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock
seq = vdso_read_begin(vc);
sec = vdso_ts->sec;
nsec = vdso_ts->nsec;
- } while (unlikely(vdso_read_retry(vc, seq)));
+ } while (vdso_read_retry(vc, seq));
/* Add the namespace offset */
sec += offs->sec;
@@ -223,21 +224,12 @@ bool do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc,
u32 seq;
do {
- /*
- * Open coded function vdso_read_begin() to handle
- * VDSO_CLOCK_TIMENS. See comment in do_hres().
- */
- while ((seq = READ_ONCE(vc->seq)) & 1) {
- if (IS_ENABLED(CONFIG_TIME_NS) &&
- vc->clock_mode == VDSO_CLOCKMODE_TIMENS)
- return do_coarse_timens(vd, vc, clk, ts);
- cpu_relax();
- }
- smp_rmb();
+ if (vdso_read_begin_timens(vc, &seq))
+ return do_coarse_timens(vd, vc, clk, ts);
ts->tv_sec = vdso_ts->sec;
ts->tv_nsec = vdso_ts->nsec;
- } while (unlikely(vdso_read_retry(vc, seq)));
+ } while (vdso_read_retry(vc, seq));
return true;
}
@@ -256,20 +248,12 @@ bool do_aux(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_ti
vc = &vd->aux_clock_data[idx];
do {
- /*
- * Open coded function vdso_read_begin() to handle
- * VDSO_CLOCK_TIMENS. See comment in do_hres().
- */
- while ((seq = READ_ONCE(vc->seq)) & 1) {
- if (IS_ENABLED(CONFIG_TIME_NS) && vc->clock_mode == VDSO_CLOCKMODE_TIMENS) {
- vd = __arch_get_vdso_u_timens_data(vd);
- vc = &vd->aux_clock_data[idx];
- /* Re-read from the real time data page */
- continue;
- }
- cpu_relax();
+ if (vdso_read_begin_timens(vc, &seq)) {
+ vd = __arch_get_vdso_u_timens_data(vd);
+ vc = &vd->aux_clock_data[idx];
+ /* Re-read from the real time data page */
+ continue;
}
- smp_rmb();
/* Auxclock disabled? */
if (vc->clock_mode == VDSO_CLOCKMODE_NONE)
@@ -277,7 +261,7 @@ bool do_aux(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_ti
if (!vdso_get_timestamp(vd, vc, VDSO_BASE_AUX, &sec, &ns))
return false;
- } while (unlikely(vdso_read_retry(vc, seq)));
+ } while (vdso_read_retry(vc, seq));
vdso_set_timespec(ts, sec, ns);
@@ -313,7 +297,7 @@ __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock,
return do_hres(vd, vc, clock, ts);
}
-static __maybe_unused int
+static int
__cvdso_clock_gettime_data(const struct vdso_time_data *vd, clockid_t clock,
struct __kernel_timespec *ts)
{
@@ -333,7 +317,7 @@ __cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
}
#ifdef BUILD_VDSO32
-static __maybe_unused int
+static int
__cvdso_clock_gettime32_data(const struct vdso_time_data *vd, clockid_t clock,
struct old_timespec32 *res)
{
@@ -359,7 +343,7 @@ __cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res)
}
#endif /* BUILD_VDSO32 */
-static __maybe_unused int
+static int
__cvdso_gettimeofday_data(const struct vdso_time_data *vd,
struct __kernel_old_timeval *tv, struct timezone *tz)
{
@@ -376,8 +360,7 @@ __cvdso_gettimeofday_data(const struct vdso_time_data *vd,
}
if (unlikely(tz != NULL)) {
- if (IS_ENABLED(CONFIG_TIME_NS) &&
- vc->clock_mode == VDSO_CLOCKMODE_TIMENS)
+ if (vdso_is_timens_clock(vc))
vd = __arch_get_vdso_u_timens_data(vd);
tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest;
@@ -394,14 +377,13 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
}
#ifdef VDSO_HAS_TIME
-static __maybe_unused __kernel_old_time_t
+static __kernel_old_time_t
__cvdso_time_data(const struct vdso_time_data *vd, __kernel_old_time_t *time)
{
const struct vdso_clock *vc = vd->clock_data;
__kernel_old_time_t t;
- if (IS_ENABLED(CONFIG_TIME_NS) &&
- vc->clock_mode == VDSO_CLOCKMODE_TIMENS) {
+ if (vdso_is_timens_clock(vc)) {
vd = __arch_get_vdso_u_timens_data(vd);
vc = vd->clock_data;
}
@@ -432,8 +414,7 @@ bool __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t cloc
if (!vdso_clockid_valid(clock))
return false;
- if (IS_ENABLED(CONFIG_TIME_NS) &&
- vc->clock_mode == VDSO_CLOCKMODE_TIMENS)
+ if (vdso_is_timens_clock(vc))
vd = __arch_get_vdso_u_timens_data(vd);
/*
@@ -464,7 +445,7 @@ bool __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t cloc
return true;
}
-static __maybe_unused
+static
int __cvdso_clock_getres_data(const struct vdso_time_data *vd, clockid_t clock,
struct __kernel_timespec *res)
{
@@ -484,7 +465,7 @@ int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res)
}
#ifdef BUILD_VDSO32
-static __maybe_unused int
+static int
__cvdso_clock_getres_time32_data(const struct vdso_time_data *vd, clockid_t clock,
struct old_timespec32 *res)
{