diff options
Diffstat (limited to 'include/vdso')
| -rw-r--r-- | include/vdso/align.h | 15 | ||||
| -rw-r--r-- | include/vdso/auxclock.h | 13 | ||||
| -rw-r--r-- | include/vdso/cache.h | 15 | ||||
| -rw-r--r-- | include/vdso/datapage.h | 149 | ||||
| -rw-r--r-- | include/vdso/gettime.h | 2 | ||||
| -rw-r--r-- | include/vdso/helpers.h | 89 | ||||
| -rw-r--r-- | include/vdso/jiffies.h | 2 | ||||
| -rw-r--r-- | include/vdso/unaligned.h | 41 |
8 files changed, 246 insertions, 80 deletions
diff --git a/include/vdso/align.h b/include/vdso/align.h new file mode 100644 index 000000000000..02dd8626b5c5 --- /dev/null +++ b/include/vdso/align.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __VDSO_ALIGN_H +#define __VDSO_ALIGN_H + +#include <vdso/const.h> + +/* @a is a power of 2 value */ +#define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) +#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) +#define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask)) +#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) +#define PTR_ALIGN_DOWN(p, a) ((typeof(p))ALIGN_DOWN((unsigned long)(p), (a))) +#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) + +#endif /* __VDSO_ALIGN_H */ diff --git a/include/vdso/auxclock.h b/include/vdso/auxclock.h new file mode 100644 index 000000000000..6d6e74cbc400 --- /dev/null +++ b/include/vdso/auxclock.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _VDSO_AUXCLOCK_H +#define _VDSO_AUXCLOCK_H + +#include <uapi/linux/time.h> +#include <uapi/linux/types.h> + +static __always_inline u64 aux_clock_resolution_ns(void) +{ + return 1; +} + +#endif /* _VDSO_AUXCLOCK_H */ diff --git a/include/vdso/cache.h b/include/vdso/cache.h new file mode 100644 index 000000000000..f89d48304bf8 --- /dev/null +++ b/include/vdso/cache.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __VDSO_CACHE_H +#define __VDSO_CACHE_H + +#include <asm/cache.h> + +#ifndef SMP_CACHE_BYTES +#define SMP_CACHE_BYTES L1_CACHE_BYTES +#endif + +#ifndef ____cacheline_aligned +#define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) +#endif + +#endif /* __VDSO_ALIGN_H */ diff --git a/include/vdso/datapage.h b/include/vdso/datapage.h index d967baa0cd0c..5977723fb3b5 100644 --- a/include/vdso/datapage.h +++ b/include/vdso/datapage.h @@ -4,20 +4,16 @@ #ifndef __ASSEMBLY__ -#include <linux/compiler.h> +#include <linux/types.h> + +#include <uapi/linux/bits.h> #include <uapi/linux/time.h> -#include <uapi/linux/types.h> -#include <uapi/asm-generic/errno-base.h> +#include <vdso/align.h> #include <vdso/bits.h> -#include <vdso/clocksource.h> -#include <vdso/ktime.h> -#include <vdso/limits.h> -#include <vdso/math64.h> -#include <vdso/processor.h> +#include <vdso/cache.h> +#include <vdso/page.h> #include <vdso/time.h> -#include <vdso/time32.h> -#include <vdso/time64.h> #ifdef CONFIG_ARCH_HAS_VDSO_TIME_DATA #include <asm/vdso/time_data.h> @@ -25,7 +21,17 @@ struct arch_vdso_time_data {}; #endif +#if defined(CONFIG_ARCH_HAS_VDSO_ARCH_DATA) +#include <asm/vdso/arch_data.h> +#else +struct vdso_arch_data { + /* Needed for the generic code, never actually used at runtime */ + char __unused; +}; +#endif + #define VDSO_BASES (CLOCK_TAI + 1) +#define VDSO_BASE_AUX 0 #define VDSO_HRES (BIT(CLOCK_REALTIME) | \ BIT(CLOCK_MONOTONIC) | \ BIT(CLOCK_BOOTTIME) | \ @@ -33,6 +39,7 @@ struct arch_vdso_time_data {}; #define VDSO_COARSE (BIT(CLOCK_REALTIME_COARSE) | \ BIT(CLOCK_MONOTONIC_COARSE)) #define VDSO_RAW (BIT(CLOCK_MONOTONIC_RAW)) +#define VDSO_AUX __GENMASK(CLOCK_AUX_LAST, CLOCK_AUX) #define CS_HRES_COARSE 0 #define CS_RAW 1 @@ -45,11 +52,11 @@ struct arch_vdso_time_data {}; * * There is one vdso_timestamp object in vvar for each vDSO-accelerated * clock_id. For high-resolution clocks, this encodes the time - * corresponding to vdso_data.cycle_last. For coarse clocks this encodes + * corresponding to vdso_time_data.cycle_last. For coarse clocks this encodes * the actual time. * * To be noticed that for highres clocks nsec is left-shifted by - * vdso_data.cs[x].shift. + * vdso_time_data[x].shift. */ struct vdso_timestamp { u64 sec; @@ -57,7 +64,7 @@ struct vdso_timestamp { }; /** - * struct vdso_data - vdso datapage representation + * struct vdso_clock - vdso per clocksource datapage representation * @seq: timebase sequence counter * @clock_mode: clock mode * @cycle_last: timebase at clocksource init @@ -65,21 +72,11 @@ struct vdso_timestamp { * @mask: clocksource mask * @mult: clocksource multiplier * @shift: clocksource shift - * @basetime[clock_id]: basetime per clock_id - * @offset[clock_id]: time namespace offset per clock_id - * @tz_minuteswest: minutes west of Greenwich - * @tz_dsttime: type of DST correction - * @hrtimer_res: hrtimer resolution - * @__unused: unused - * @arch_data: architecture specific data (optional, defaults - * to an empty struct) + * @basetime: basetime per clock_id + * @offset: time namespace offset per clock_id * - * vdso_data will be accessed by 64 bit and compat code at the same time - * so we should be careful before modifying this structure. - * - * The ordering of the struct members is optimized to have fast access to the - * often required struct members which are related to CLOCK_REALTIME and - * CLOCK_MONOTONIC. This information is stored in the first cache lines. + * See also struct vdso_time_data for basic access and ordering information as + * struct vdso_clock is used there. * * @basetime is used to store the base time for the system wide time getter * VVAR page. @@ -92,7 +89,7 @@ struct vdso_timestamp { * For clocks which are not affected by time namespace adjustment the * offset must be zero. */ -struct vdso_data { +struct vdso_clock { u32 seq; s32 clock_mode; @@ -108,14 +105,37 @@ struct vdso_data { struct vdso_timestamp basetime[VDSO_BASES]; struct timens_offset offset[VDSO_BASES]; }; +}; - s32 tz_minuteswest; - s32 tz_dsttime; - u32 hrtimer_res; - u32 __unused; +/** + * struct vdso_time_data - vdso datapage representation + * @arch_data: architecture specific data (optional, defaults + * to an empty struct) + * @clock_data: clocksource related data (array) + * @aux_clock_data: auxiliary clocksource related data (array) + * @tz_minuteswest: minutes west of Greenwich + * @tz_dsttime: type of DST correction + * @hrtimer_res: hrtimer resolution + * @__unused: unused + * + * vdso_time_data will be accessed by 64 bit and compat code at the same time + * so we should be careful before modifying this structure. + * + * The ordering of the struct members is optimized to have fast acces to the + * often required struct members which are related to CLOCK_REALTIME and + * CLOCK_MONOTONIC. This information is stored in the first cache lines. + */ +struct vdso_time_data { + struct arch_vdso_time_data arch_data; - struct arch_vdso_time_data arch_data; -}; + struct vdso_clock clock_data[CS_BASES]; + struct vdso_clock aux_clock_data[MAX_AUX_CLOCKS]; + + s32 tz_minuteswest; + s32 tz_dsttime; + u32 hrtimer_res; + u32 __unused; +} ____cacheline_aligned; /** * struct vdso_rng_data - vdso RNG state information @@ -136,33 +156,46 @@ struct vdso_rng_data { * With the hidden visibility, the compiler simply generates a PC-relative * relocation, and this is what we need. */ -extern struct vdso_data _vdso_data[CS_BASES] __attribute__((visibility("hidden"))); -extern struct vdso_data _timens_data[CS_BASES] __attribute__((visibility("hidden"))); -extern struct vdso_rng_data _vdso_rng_data __attribute__((visibility("hidden"))); - -/** - * union vdso_data_store - Generic vDSO data page - */ -union vdso_data_store { - struct vdso_data data[CS_BASES]; - u8 page[1U << CONFIG_PAGE_SHIFT]; +extern struct vdso_time_data vdso_u_time_data __attribute__((visibility("hidden"))); +extern struct vdso_rng_data vdso_u_rng_data __attribute__((visibility("hidden"))); +extern struct vdso_arch_data vdso_u_arch_data __attribute__((visibility("hidden"))); + +extern struct vdso_time_data *vdso_k_time_data; +extern struct vdso_rng_data *vdso_k_rng_data; +extern struct vdso_arch_data *vdso_k_arch_data; + +#define VDSO_ARCH_DATA_SIZE ALIGN(sizeof(struct vdso_arch_data), PAGE_SIZE) +#define VDSO_ARCH_DATA_PAGES (VDSO_ARCH_DATA_SIZE >> PAGE_SHIFT) + +enum vdso_pages { + VDSO_TIME_PAGE_OFFSET, + VDSO_TIMENS_PAGE_OFFSET, + VDSO_RNG_PAGE_OFFSET, + VDSO_ARCH_PAGES_START, + VDSO_ARCH_PAGES_END = VDSO_ARCH_PAGES_START + VDSO_ARCH_DATA_PAGES - 1, + VDSO_NR_PAGES }; -/* - * The generic vDSO implementation requires that gettimeofday.h - * provides: - * - __arch_get_vdso_data(): to get the vdso datapage. - * - __arch_get_hw_counter(): to get the hw counter based on the - * clock_mode. - * - gettimeofday_fallback(): fallback for gettimeofday. - * - clock_gettime_fallback(): fallback for clock_gettime. - * - clock_getres_fallback(): fallback for clock_getres. - */ -#ifdef ENABLE_COMPAT_VDSO -#include <asm/vdso/compat_gettimeofday.h> +#else /* !__ASSEMBLY__ */ + +#ifdef CONFIG_VDSO_GETRANDOM +#define __vdso_u_rng_data PROVIDE(vdso_u_rng_data = vdso_u_data + 2 * PAGE_SIZE); +#else +#define __vdso_u_rng_data +#endif + +#ifdef CONFIG_ARCH_HAS_VDSO_ARCH_DATA +#define __vdso_u_arch_data PROVIDE(vdso_u_arch_data = vdso_u_data + 3 * PAGE_SIZE); #else -#include <asm/vdso/gettimeofday.h> -#endif /* ENABLE_COMPAT_VDSO */ +#define __vdso_u_arch_data +#endif + +#define VDSO_VVAR_SYMS \ + PROVIDE(vdso_u_data = . - __VDSO_PAGES * PAGE_SIZE); \ + PROVIDE(vdso_u_time_data = vdso_u_data); \ + __vdso_u_rng_data \ + __vdso_u_arch_data \ + #endif /* !__ASSEMBLY__ */ diff --git a/include/vdso/gettime.h b/include/vdso/gettime.h index c50d152e7b3e..16a0a0556b86 100644 --- a/include/vdso/gettime.h +++ b/include/vdso/gettime.h @@ -5,6 +5,7 @@ #include <linux/types.h> struct __kernel_timespec; +struct __kernel_old_timeval; struct timezone; #if !defined(CONFIG_64BIT) || defined(BUILD_VDSO32_64) @@ -19,5 +20,6 @@ int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); __kernel_old_time_t __vdso_time(__kernel_old_time_t *t); int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts); +int __vdso_clock_getres_time64(clockid_t clock, struct __kernel_timespec *ts); #endif diff --git a/include/vdso/helpers.h b/include/vdso/helpers.h index 3ddb03bb05cb..a3bf4f1c0d37 100644 --- a/include/vdso/helpers.h +++ b/include/vdso/helpers.h @@ -6,50 +6,109 @@ #include <asm/barrier.h> #include <vdso/datapage.h> +#include <vdso/processor.h> +#include <vdso/clocksource.h> -static __always_inline u32 vdso_read_begin(const struct vdso_data *vd) +static __always_inline bool vdso_is_timens_clock(const struct vdso_clock *vc) +{ + return IS_ENABLED(CONFIG_TIME_NS) && vc->clock_mode == VDSO_CLOCKMODE_TIMENS; +} + +static __always_inline u32 vdso_read_begin(const struct vdso_clock *vc) { u32 seq; - while (unlikely((seq = READ_ONCE(vd->seq)) & 1)) + while (unlikely((seq = READ_ONCE(vc->seq)) & 1)) cpu_relax(); smp_rmb(); return seq; } -static __always_inline u32 vdso_read_retry(const struct vdso_data *vd, +/* + * Variant of vdso_read_begin() to handle VDSO_CLOCKMODE_TIMENS. + * + * Time namespace enabled tasks have a special VVAR page installed which has + * vc->seq set to 1 and vc->clock_mode set to VDSO_CLOCKMODE_TIMENS. For non + * time namespace affected tasks this does not affect performance because if + * vc->seq is odd, i.e. a concurrent update is in progress the extra check for + * vc->clock_mode is just a few extra instructions while spin waiting for + * vc->seq to become even again. + */ +static __always_inline bool vdso_read_begin_timens(const struct vdso_clock *vc, u32 *seq) +{ + while (unlikely((*seq = READ_ONCE(vc->seq)) & 1)) { + if (vdso_is_timens_clock(vc)) + return true; + cpu_relax(); + } + smp_rmb(); + + return false; +} + +static __always_inline u32 vdso_read_retry(const struct vdso_clock *vc, u32 start) { u32 seq; smp_rmb(); - seq = READ_ONCE(vd->seq); - return seq != start; + seq = READ_ONCE(vc->seq); + return unlikely(seq != start); } -static __always_inline void vdso_write_begin(struct vdso_data *vd) +static __always_inline void vdso_write_seq_begin(struct vdso_clock *vc) { /* * WRITE_ONCE() is required otherwise the compiler can validly tear - * updates to vd[x].seq and it is possible that the value seen by the + * updates to vc->seq and it is possible that the value seen by the * reader is inconsistent. */ - WRITE_ONCE(vd[CS_HRES_COARSE].seq, vd[CS_HRES_COARSE].seq + 1); - WRITE_ONCE(vd[CS_RAW].seq, vd[CS_RAW].seq + 1); - smp_wmb(); + WRITE_ONCE(vc->seq, vc->seq + 1); } -static __always_inline void vdso_write_end(struct vdso_data *vd) +static __always_inline void vdso_write_seq_end(struct vdso_clock *vc) { - smp_wmb(); /* * WRITE_ONCE() is required otherwise the compiler can validly tear - * updates to vd[x].seq and it is possible that the value seen by the + * updates to vc->seq and it is possible that the value seen by the * reader is inconsistent. */ - WRITE_ONCE(vd[CS_HRES_COARSE].seq, vd[CS_HRES_COARSE].seq + 1); - WRITE_ONCE(vd[CS_RAW].seq, vd[CS_RAW].seq + 1); + WRITE_ONCE(vc->seq, vc->seq + 1); +} + +static __always_inline void vdso_write_begin_clock(struct vdso_clock *vc) +{ + vdso_write_seq_begin(vc); + /* Ensure the sequence invalidation is visible before data is modified */ + smp_wmb(); +} + +static __always_inline void vdso_write_end_clock(struct vdso_clock *vc) +{ + /* Ensure the data update is visible before the sequence is set valid again */ + smp_wmb(); + vdso_write_seq_end(vc); +} + +static __always_inline void vdso_write_begin(struct vdso_time_data *vd) +{ + struct vdso_clock *vc = vd->clock_data; + + vdso_write_seq_begin(&vc[CS_HRES_COARSE]); + vdso_write_seq_begin(&vc[CS_RAW]); + /* Ensure the sequence invalidation is visible before data is modified */ + smp_wmb(); +} + +static __always_inline void vdso_write_end(struct vdso_time_data *vd) +{ + struct vdso_clock *vc = vd->clock_data; + + /* Ensure the data update is visible before the sequence is set valid again */ + smp_wmb(); + vdso_write_seq_end(&vc[CS_HRES_COARSE]); + vdso_write_seq_end(&vc[CS_RAW]); } #endif /* !__ASSEMBLY__ */ diff --git a/include/vdso/jiffies.h b/include/vdso/jiffies.h index 2f9d596c8b29..8ca04a141412 100644 --- a/include/vdso/jiffies.h +++ b/include/vdso/jiffies.h @@ -5,7 +5,7 @@ #include <asm/param.h> /* for HZ */ #include <vdso/time64.h> -/* TICK_NSEC is the time between ticks in nsec assuming SHIFTED_HZ */ +/* TICK_NSEC is the time between ticks in nsec */ #define TICK_NSEC ((NSEC_PER_SEC+HZ/2)/HZ) #endif /* __VDSO_JIFFIES_H */ diff --git a/include/vdso/unaligned.h b/include/vdso/unaligned.h index eee3d2a4dbe4..9076483c9fbb 100644 --- a/include/vdso/unaligned.h +++ b/include/vdso/unaligned.h @@ -2,14 +2,43 @@ #ifndef __VDSO_UNALIGNED_H #define __VDSO_UNALIGNED_H -#define __get_unaligned_t(type, ptr) ({ \ - const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x; \ +#include <linux/compiler_types.h> + +/** + * __get_unaligned_t - read an unaligned value from memory. + * @type: the type to load from the pointer. + * @ptr: the pointer to load from. + * + * Use memcpy to affect an unaligned type sized load avoiding undefined behavior + * from approaches like type punning that require -fno-strict-aliasing in order + * to be correct. As type may be const, use __unqual_scalar_typeof to map to a + * non-const type - you can't memcpy into a const type. The + * __get_unaligned_ctrl_type gives __unqual_scalar_typeof its required + * expression rather than type, a pointer is used to avoid warnings about mixing + * the use of 0 and NULL. The void* cast silences ubsan warnings. + */ +#define __get_unaligned_t(type, ptr) ({ \ + type *__get_unaligned_ctrl_type __always_unused = NULL; \ + __unqual_scalar_typeof(*__get_unaligned_ctrl_type) __get_unaligned_val; \ + __builtin_memcpy(&__get_unaligned_val, (void *)(ptr), \ + sizeof(__get_unaligned_val)); \ + __get_unaligned_val; \ }) -#define __put_unaligned_t(type, val, ptr) do { \ - struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x = (val); \ +/** + * __put_unaligned_t - write an unaligned value to memory. + * @type: the type of the value to store. + * @val: the value to store. + * @ptr: the pointer to store to. + * + * Use memcpy to affect an unaligned type sized store avoiding undefined + * behavior from approaches like type punning that require -fno-strict-aliasing + * in order to be correct. The void* cast silences ubsan warnings. + */ +#define __put_unaligned_t(type, val, ptr) do { \ + type __put_unaligned_val = (val); \ + __builtin_memcpy((void *)(ptr), &__put_unaligned_val, \ + sizeof(__put_unaligned_val)); \ } while (0) #endif /* __VDSO_UNALIGNED_H */ |
