diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-14 10:53:44 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-04-14 10:53:44 -0700 |
| commit | f21f7b5162e9dbde6d3d5ce727d4ca2552d76ce9 (patch) | |
| tree | 2c1d858605001adedeff10f66f031e20da1db34d /lib | |
| parent | c1fe867b5bf9c57ab7856486d342720e2b205eed (diff) | |
| parent | 7138a8698a39e81eb153e05500823fff76d5b3bd (diff) | |
| download | lwn-f21f7b5162e9dbde6d3d5ce727d4ca2552d76ce9.tar.gz lwn-f21f7b5162e9dbde6d3d5ce727d4ca2552d76ce9.zip | |
Merge tag 'timers-vdso-2026-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull vdso updates from Thomas Gleixner:
- Make the handling of compat functions consistent and more robust
- Rework the underlying data store so that it is dynamically allocated,
which allows the conversion of the last holdout SPARC64 to the
generic VDSO implementation
- Rework the SPARC64 VDSO to utilize the generic implementation
- Mop up the left overs of the non-generic VDSO support in the core
code
- Expand the VDSO selftest and make them more robust
- Allow time namespaces to be enabled independently of the generic VDSO
support, which was not possible before due to SPARC64 not using it
- Various cleanups and improvements in the related code
* tag 'timers-vdso-2026-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (51 commits)
timens: Use task_lock guard in timens_get*()
timens: Use mutex guard in proc_timens_set_offset()
timens: Simplify some calls to put_time_ns()
timens: Add a __free() wrapper for put_time_ns()
timens: Remove dependency on the vDSO
vdso/timens: Move functions to new file
selftests: vDSO: vdso_test_correctness: Add a test for time()
selftests: vDSO: vdso_test_correctness: Use facilities from parse_vdso.c
selftests: vDSO: vdso_test_correctness: Handle different tv_usec types
selftests: vDSO: vdso_test_correctness: Drop SYS_getcpu fallbacks
selftests: vDSO: vdso_test_gettimeofday: Remove nolibc checks
Revert "selftests: vDSO: parse_vdso: Use UAPI headers instead of libc headers"
random: vDSO: Remove ifdeffery
random: vDSO: Trim vDSO includes
vdso/datapage: Trim down unnecessary includes
vdso/datapage: Remove inclusion of gettimeofday.h
vdso/helpers: Explicitly include vdso/processor.h
vdso/gettimeofday: Add explicit includes
random: vDSO: Add explicit includes
MIPS: vdso: Explicitly include asm/vdso/vdso.h
...
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/vdso/datastore.c | 122 | ||||
| -rw-r--r-- | lib/vdso/getrandom.c | 3 | ||||
| -rw-r--r-- | lib/vdso/gettimeofday.c | 99 |
3 files changed, 106 insertions, 118 deletions
diff --git a/lib/vdso/datastore.c b/lib/vdso/datastore.c index a565c30c71a0..cf5d784a4a5a 100644 --- a/lib/vdso/datastore.c +++ b/lib/vdso/datastore.c @@ -1,64 +1,92 @@ // SPDX-License-Identifier: GPL-2.0-only -#include <linux/linkage.h> -#include <linux/mmap_lock.h> +#include <linux/gfp.h> +#include <linux/init.h> #include <linux/mm.h> #include <linux/time_namespace.h> #include <linux/types.h> #include <linux/vdso_datastore.h> #include <vdso/datapage.h> -/* - * The vDSO data page. - */ +static u8 vdso_initdata[VDSO_NR_PAGES * PAGE_SIZE] __aligned(PAGE_SIZE) __initdata = {}; + #ifdef CONFIG_GENERIC_GETTIMEOFDAY -static union { - struct vdso_time_data data; - u8 page[PAGE_SIZE]; -} vdso_time_data_store __page_aligned_data; -struct vdso_time_data *vdso_k_time_data = &vdso_time_data_store.data; -static_assert(sizeof(vdso_time_data_store) == PAGE_SIZE); +struct vdso_time_data *vdso_k_time_data __refdata = + (void *)&vdso_initdata[VDSO_TIME_PAGE_OFFSET * PAGE_SIZE]; + +static_assert(sizeof(struct vdso_time_data) <= PAGE_SIZE); #endif /* CONFIG_GENERIC_GETTIMEOFDAY */ #ifdef CONFIG_VDSO_GETRANDOM -static union { - struct vdso_rng_data data; - u8 page[PAGE_SIZE]; -} vdso_rng_data_store __page_aligned_data; -struct vdso_rng_data *vdso_k_rng_data = &vdso_rng_data_store.data; -static_assert(sizeof(vdso_rng_data_store) == PAGE_SIZE); +struct vdso_rng_data *vdso_k_rng_data __refdata = + (void *)&vdso_initdata[VDSO_RNG_PAGE_OFFSET * PAGE_SIZE]; + +static_assert(sizeof(struct vdso_rng_data) <= PAGE_SIZE); #endif /* CONFIG_VDSO_GETRANDOM */ #ifdef CONFIG_ARCH_HAS_VDSO_ARCH_DATA -static union { - struct vdso_arch_data data; - u8 page[VDSO_ARCH_DATA_SIZE]; -} vdso_arch_data_store __page_aligned_data; -struct vdso_arch_data *vdso_k_arch_data = &vdso_arch_data_store.data; +struct vdso_arch_data *vdso_k_arch_data __refdata = + (void *)&vdso_initdata[VDSO_ARCH_PAGES_START * PAGE_SIZE]; #endif /* CONFIG_ARCH_HAS_VDSO_ARCH_DATA */ +void __init vdso_setup_data_pages(void) +{ + unsigned int order = get_order(VDSO_NR_PAGES * PAGE_SIZE); + struct page *pages; + + /* + * Allocate the data pages dynamically. SPARC does not support mapping + * static pages to be mapped into userspace. + * It is also a requirement for mlockall() support. + * + * Do not use folios. In time namespaces the pages are mapped in a different order + * to userspace, which is not handled by the folio optimizations in finish_fault(). + */ + pages = alloc_pages(GFP_KERNEL, order); + if (!pages) + panic("Unable to allocate VDSO storage pages"); + + /* The pages are mapped one-by-one into userspace and each one needs to be refcounted. */ + split_page(pages, order); + + /* Move the data already written by other subsystems to the new pages */ + memcpy(page_address(pages), vdso_initdata, VDSO_NR_PAGES * PAGE_SIZE); + + if (IS_ENABLED(CONFIG_GENERIC_GETTIMEOFDAY)) + vdso_k_time_data = page_address(pages + VDSO_TIME_PAGE_OFFSET); + + if (IS_ENABLED(CONFIG_VDSO_GETRANDOM)) + vdso_k_rng_data = page_address(pages + VDSO_RNG_PAGE_OFFSET); + + if (IS_ENABLED(CONFIG_ARCH_HAS_VDSO_ARCH_DATA)) + vdso_k_arch_data = page_address(pages + VDSO_ARCH_PAGES_START); +} + static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { - struct page *timens_page = find_timens_vvar_page(vma); - unsigned long addr, pfn; - vm_fault_t err; + struct page *page, *timens_page; + + timens_page = find_timens_vvar_page(vma); switch (vmf->pgoff) { case VDSO_TIME_PAGE_OFFSET: if (!IS_ENABLED(CONFIG_GENERIC_GETTIMEOFDAY)) return VM_FAULT_SIGBUS; - pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data)); + page = virt_to_page(vdso_k_time_data); if (timens_page) { /* * Fault in VVAR page too, since it will be accessed * to get clock data anyway. */ + unsigned long addr; + vm_fault_t err; + addr = vmf->address + VDSO_TIMENS_PAGE_OFFSET * PAGE_SIZE; - err = vmf_insert_pfn(vma, addr, pfn); + err = vmf_insert_page(vma, addr, page); if (unlikely(err & VM_FAULT_ERROR)) return err; - pfn = page_to_pfn(timens_page); + page = timens_page; } break; case VDSO_TIMENS_PAGE_OFFSET: @@ -71,24 +99,25 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, */ if (!IS_ENABLED(CONFIG_TIME_NS) || !timens_page) return VM_FAULT_SIGBUS; - pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data)); + page = virt_to_page(vdso_k_time_data); break; case VDSO_RNG_PAGE_OFFSET: if (!IS_ENABLED(CONFIG_VDSO_GETRANDOM)) return VM_FAULT_SIGBUS; - pfn = __phys_to_pfn(__pa_symbol(vdso_k_rng_data)); + page = virt_to_page(vdso_k_rng_data); break; case VDSO_ARCH_PAGES_START ... VDSO_ARCH_PAGES_END: if (!IS_ENABLED(CONFIG_ARCH_HAS_VDSO_ARCH_DATA)) return VM_FAULT_SIGBUS; - pfn = __phys_to_pfn(__pa_symbol(vdso_k_arch_data)) + - vmf->pgoff - VDSO_ARCH_PAGES_START; + page = virt_to_page(vdso_k_arch_data) + vmf->pgoff - VDSO_ARCH_PAGES_START; break; default: return VM_FAULT_SIGBUS; } - return vmf_insert_pfn(vma, vmf->address, pfn); + get_page(page); + vmf->page = page; + return 0; } const struct vm_special_mapping vdso_vvar_mapping = { @@ -100,31 +129,6 @@ struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned { return _install_special_mapping(mm, addr, VDSO_NR_PAGES * PAGE_SIZE, VM_READ | VM_MAYREAD | VM_IO | VM_DONTDUMP | - VM_PFNMAP | VM_SEALED_SYSMAP, + VM_MIXEDMAP | VM_SEALED_SYSMAP, &vdso_vvar_mapping); } - -#ifdef CONFIG_TIME_NS -/* - * The vvar page layout depends on whether a task belongs to the root or - * non-root time namespace. Whenever a task changes its namespace, the VVAR - * page tables are cleared and then they will be re-faulted with a - * corresponding layout. - * See also the comment near timens_setup_vdso_clock_data() for details. - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, mm, 0); - - mmap_read_lock(mm); - for_each_vma(vmi, vma) { - if (vma_is_special_mapping(vma, &vdso_vvar_mapping)) - zap_vma_pages(vma); - } - mmap_read_unlock(mm); - - return 0; -} -#endif diff --git a/lib/vdso/getrandom.c b/lib/vdso/getrandom.c index 440f8a6203a6..7e29005aa208 100644 --- a/lib/vdso/getrandom.c +++ b/lib/vdso/getrandom.c @@ -7,8 +7,11 @@ #include <linux/minmax.h> #include <vdso/datapage.h> #include <vdso/getrandom.h> +#include <vdso/limits.h> #include <vdso/unaligned.h> +#include <asm/barrier.h> #include <asm/vdso/getrandom.h> +#include <uapi/linux/errno.h> #include <uapi/linux/mman.h> #include <uapi/linux/random.h> diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 4399e143d43a..a5798bd26d20 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -3,8 +3,25 @@ * Generic userspace implementations of gettimeofday() and similar. */ #include <vdso/auxclock.h> +#include <vdso/clocksource.h> #include <vdso/datapage.h> #include <vdso/helpers.h> +#include <vdso/ktime.h> +#include <vdso/limits.h> +#include <vdso/math64.h> +#include <vdso/time32.h> +#include <vdso/time64.h> + +/* + * The generic vDSO implementation requires that gettimeofday.h + * provides: + * - __arch_get_hw_counter(): to get the hw counter based on the + * clock_mode. + * - gettimeofday_fallback(): fallback for gettimeofday. + * - clock_gettime_fallback(): fallback for clock_gettime. + * - clock_getres_fallback(): fallback for clock_getres. + */ +#include <asm/vdso/gettimeofday.h> /* Bring in default accessors */ #include <vdso/vsyscall.h> @@ -135,7 +152,7 @@ bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock * if (!vdso_get_timestamp(vd, vc, clk, &sec, &ns)) return false; - } while (unlikely(vdso_read_retry(vc, seq))); + } while (vdso_read_retry(vc, seq)); /* Add the namespace offset */ sec += offs->sec; @@ -158,28 +175,12 @@ bool do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, return false; do { - /* - * Open coded function vdso_read_begin() to handle - * VDSO_CLOCKMODE_TIMENS. Time namespace enabled tasks have a - * special VVAR page installed which has vc->seq set to 1 and - * vc->clock_mode set to VDSO_CLOCKMODE_TIMENS. For non time - * namespace affected tasks this does not affect performance - * because if vc->seq is odd, i.e. a concurrent update is in - * progress the extra check for vc->clock_mode is just a few - * extra instructions while spin waiting for vc->seq to become - * even again. - */ - while (unlikely((seq = READ_ONCE(vc->seq)) & 1)) { - if (IS_ENABLED(CONFIG_TIME_NS) && - vc->clock_mode == VDSO_CLOCKMODE_TIMENS) - return do_hres_timens(vd, vc, clk, ts); - cpu_relax(); - } - smp_rmb(); + if (vdso_read_begin_timens(vc, &seq)) + return do_hres_timens(vd, vc, clk, ts); if (!vdso_get_timestamp(vd, vc, clk, &sec, &ns)) return false; - } while (unlikely(vdso_read_retry(vc, seq))); + } while (vdso_read_retry(vc, seq)); vdso_set_timespec(ts, sec, ns); @@ -204,7 +205,7 @@ bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock seq = vdso_read_begin(vc); sec = vdso_ts->sec; nsec = vdso_ts->nsec; - } while (unlikely(vdso_read_retry(vc, seq))); + } while (vdso_read_retry(vc, seq)); /* Add the namespace offset */ sec += offs->sec; @@ -223,21 +224,12 @@ bool do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc, u32 seq; do { - /* - * Open coded function vdso_read_begin() to handle - * VDSO_CLOCK_TIMENS. See comment in do_hres(). - */ - while ((seq = READ_ONCE(vc->seq)) & 1) { - if (IS_ENABLED(CONFIG_TIME_NS) && - vc->clock_mode == VDSO_CLOCKMODE_TIMENS) - return do_coarse_timens(vd, vc, clk, ts); - cpu_relax(); - } - smp_rmb(); + if (vdso_read_begin_timens(vc, &seq)) + return do_coarse_timens(vd, vc, clk, ts); ts->tv_sec = vdso_ts->sec; ts->tv_nsec = vdso_ts->nsec; - } while (unlikely(vdso_read_retry(vc, seq))); + } while (vdso_read_retry(vc, seq)); return true; } @@ -256,20 +248,12 @@ bool do_aux(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_ti vc = &vd->aux_clock_data[idx]; do { - /* - * Open coded function vdso_read_begin() to handle - * VDSO_CLOCK_TIMENS. See comment in do_hres(). - */ - while ((seq = READ_ONCE(vc->seq)) & 1) { - if (IS_ENABLED(CONFIG_TIME_NS) && vc->clock_mode == VDSO_CLOCKMODE_TIMENS) { - vd = __arch_get_vdso_u_timens_data(vd); - vc = &vd->aux_clock_data[idx]; - /* Re-read from the real time data page */ - continue; - } - cpu_relax(); + if (vdso_read_begin_timens(vc, &seq)) { + vd = __arch_get_vdso_u_timens_data(vd); + vc = &vd->aux_clock_data[idx]; + /* Re-read from the real time data page */ + continue; } - smp_rmb(); /* Auxclock disabled? */ if (vc->clock_mode == VDSO_CLOCKMODE_NONE) @@ -277,7 +261,7 @@ bool do_aux(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_ti if (!vdso_get_timestamp(vd, vc, VDSO_BASE_AUX, &sec, &ns)) return false; - } while (unlikely(vdso_read_retry(vc, seq))); + } while (vdso_read_retry(vc, seq)); vdso_set_timespec(ts, sec, ns); @@ -313,7 +297,7 @@ __cvdso_clock_gettime_common(const struct vdso_time_data *vd, clockid_t clock, return do_hres(vd, vc, clock, ts); } -static __maybe_unused int +static int __cvdso_clock_gettime_data(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *ts) { @@ -333,7 +317,7 @@ __cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) } #ifdef BUILD_VDSO32 -static __maybe_unused int +static int __cvdso_clock_gettime32_data(const struct vdso_time_data *vd, clockid_t clock, struct old_timespec32 *res) { @@ -359,7 +343,7 @@ __cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res) } #endif /* BUILD_VDSO32 */ -static __maybe_unused int +static int __cvdso_gettimeofday_data(const struct vdso_time_data *vd, struct __kernel_old_timeval *tv, struct timezone *tz) { @@ -376,8 +360,7 @@ __cvdso_gettimeofday_data(const struct vdso_time_data *vd, } if (unlikely(tz != NULL)) { - if (IS_ENABLED(CONFIG_TIME_NS) && - vc->clock_mode == VDSO_CLOCKMODE_TIMENS) + if (vdso_is_timens_clock(vc)) vd = __arch_get_vdso_u_timens_data(vd); tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest; @@ -394,14 +377,13 @@ __cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) } #ifdef VDSO_HAS_TIME -static __maybe_unused __kernel_old_time_t +static __kernel_old_time_t __cvdso_time_data(const struct vdso_time_data *vd, __kernel_old_time_t *time) { const struct vdso_clock *vc = vd->clock_data; __kernel_old_time_t t; - if (IS_ENABLED(CONFIG_TIME_NS) && - vc->clock_mode == VDSO_CLOCKMODE_TIMENS) { + if (vdso_is_timens_clock(vc)) { vd = __arch_get_vdso_u_timens_data(vd); vc = vd->clock_data; } @@ -432,8 +414,7 @@ bool __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t cloc if (!vdso_clockid_valid(clock)) return false; - if (IS_ENABLED(CONFIG_TIME_NS) && - vc->clock_mode == VDSO_CLOCKMODE_TIMENS) + if (vdso_is_timens_clock(vc)) vd = __arch_get_vdso_u_timens_data(vd); /* @@ -464,7 +445,7 @@ bool __cvdso_clock_getres_common(const struct vdso_time_data *vd, clockid_t cloc return true; } -static __maybe_unused +static int __cvdso_clock_getres_data(const struct vdso_time_data *vd, clockid_t clock, struct __kernel_timespec *res) { @@ -484,7 +465,7 @@ int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res) } #ifdef BUILD_VDSO32 -static __maybe_unused int +static int __cvdso_clock_getres_time32_data(const struct vdso_time_data *vd, clockid_t clock, struct old_timespec32 *res) { |
