diff options
Diffstat (limited to 'arch/s390')
28 files changed, 204 insertions, 364 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9c9ec08d78c7..7ed6f229250c 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -166,6 +166,7 @@ config S390 select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD select GENERIC_TIME_VSYSCALL + select GENERIC_VDSO_DATA_STORE select GENERIC_VDSO_TIME_NS select GENERIC_IOREMAP if PCI select HAVE_ALIGNED_STRUCT_PAGE diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 80bdfbae6e5b..26aa455d4068 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -815,9 +815,6 @@ CONFIG_SYSTEM_BLACKLIST_KEYRING=y CONFIG_CORDIC=m CONFIG_CRYPTO_LIB_CURVE25519=m CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m -CONFIG_CRC4=m -CONFIG_CRC7=m -CONFIG_CRC8=m CONFIG_RANDOM32_SELFTEST=y CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 449a0e996b96..8392f8a5ad6d 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -803,9 +803,6 @@ CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m CONFIG_CRYPTO_LIB_CURVE25519=m CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m -CONFIG_CRC4=m -CONFIG_CRC7=m -CONFIG_CRC8=m CONFIG_XZ_DEC_MICROLZMA=y CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=0 diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index 4e73ef46d4b2..9f2814d0e1e9 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -139,7 +139,6 @@ int s390_replace_asce(struct gmap *gmap); void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns); int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, unsigned long end, bool interruptible); -int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split); unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level); /** diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 9a367866cab0..424f899d8163 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -1056,7 +1056,6 @@ bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu); extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc); extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc); -static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index b11f5b6d0bd1..46fb0ef6f984 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -631,7 +631,7 @@ int uv_pin_shared(unsigned long paddr); int uv_destroy_folio(struct folio *folio); int uv_destroy_pte(pte_t pte); int uv_convert_from_secure_pte(pte_t pte); -int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb); +int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb); int uv_convert_from_secure(unsigned long paddr); int uv_convert_from_secure_folio(struct folio *folio); diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 92c73e4d97a9..420a073fdde5 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -6,13 +6,11 @@ #ifndef __ASSEMBLY__ -extern struct vdso_data *vdso_data; - int vdso_getcpu_init(void); #endif /* __ASSEMBLY__ */ -#define __VVAR_PAGES 2 +#define __VDSO_PAGES 4 #define VDSO_VERSION_STRING LINUX_2.6.29 diff --git a/arch/s390/include/asm/vdso/getrandom.h b/arch/s390/include/asm/vdso/getrandom.h index 36355af7160b..f8713ce39bb2 100644 --- a/arch/s390/include/asm/vdso/getrandom.h +++ b/arch/s390/include/asm/vdso/getrandom.h @@ -23,18 +23,6 @@ static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsig return syscall3(__NR_getrandom, (long)buffer, (long)len, (long)flags); } -static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) -{ - /* - * The RNG data is in the real VVAR data page, but if a task belongs to a time namespace - * then VVAR_DATA_PAGE_OFFSET points to the namespace-specific VVAR page and VVAR_TIMENS_ - * PAGE_OFFSET points to the real VVAR page. - */ - if (IS_ENABLED(CONFIG_TIME_NS) && _vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS) - return (void *)&_vdso_rng_data + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE; - return &_vdso_rng_data; -} - #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/s390/include/asm/vdso/gettimeofday.h b/arch/s390/include/asm/vdso/gettimeofday.h index 7937765ccfa5..fb4564308e9d 100644 --- a/arch/s390/include/asm/vdso/gettimeofday.h +++ b/arch/s390/include/asm/vdso/gettimeofday.h @@ -14,12 +14,7 @@ #include <linux/compiler.h> -static __always_inline const struct vdso_data *__arch_get_vdso_data(void) -{ - return _vdso_data; -} - -static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_data *vd) +static inline u64 __arch_get_hw_counter(s32 clock_mode, const struct vdso_time_data *vd) { u64 adj, now; @@ -49,12 +44,4 @@ long clock_getres_fallback(clockid_t clkid, struct __kernel_timespec *ts) return syscall2(__NR_clock_getres, (long)clkid, (long)ts); } -#ifdef CONFIG_TIME_NS -static __always_inline -const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) -{ - return _timens_data; -} -#endif - #endif diff --git a/arch/s390/include/asm/vdso/vsyscall.h b/arch/s390/include/asm/vdso/vsyscall.h index 3eb576ecd3bd..d346ebe51301 100644 --- a/arch/s390/include/asm/vdso/vsyscall.h +++ b/arch/s390/include/asm/vdso/vsyscall.h @@ -2,32 +2,12 @@ #ifndef __ASM_VDSO_VSYSCALL_H #define __ASM_VDSO_VSYSCALL_H -#define __VDSO_RND_DATA_OFFSET 768 - #ifndef __ASSEMBLY__ #include <linux/hrtimer.h> #include <vdso/datapage.h> #include <asm/vdso.h> -enum vvar_pages { - VVAR_DATA_PAGE_OFFSET, - VVAR_TIMENS_PAGE_OFFSET, - VVAR_NR_PAGES -}; - -static __always_inline struct vdso_data *__s390_get_k_vdso_data(void) -{ - return vdso_data; -} -#define __arch_get_k_vdso_data __s390_get_k_vdso_data - -static __always_inline struct vdso_rng_data *__s390_get_k_vdso_rnd_data(void) -{ - return (void *)vdso_data + __VDSO_RND_DATA_OFFSET; -} -#define __arch_get_k_vdso_rng_data __s390_get_k_vdso_rnd_data - /* The asm-generic header needs to be included after the definitions above */ #include <asm-generic/vdso/vsyscall.h> diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 276cb4c1e11b..4a981266b483 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -246,15 +246,6 @@ bool is_kdump_kernel(void) } EXPORT_SYMBOL_GPL(is_kdump_kernel); -static const char *nt_name(Elf64_Word type) -{ - const char *name = "LINUX"; - - if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG) - name = KEXEC_CORE_NOTE_NAME; - return name; -} - /* * Initialize ELF note */ @@ -279,10 +270,8 @@ static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len, return PTR_ADD(buf, len); } -static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len) -{ - return nt_init_name(buf, type, desc, d_len, nt_name(type)); -} +#define nt_init(buf, type, desc) \ + nt_init_name(buf, NT_ ## type, &(desc), sizeof(desc), NN_ ## type) /* * Calculate the size of ELF note @@ -298,10 +287,7 @@ static size_t nt_size_name(int d_len, const char *name) return size; } -static inline size_t nt_size(Elf64_Word type, int d_len) -{ - return nt_size_name(d_len, nt_name(type)); -} +#define nt_size(type, desc) nt_size_name(sizeof(desc), NN_ ## type) /* * Fill ELF notes for one CPU with save area registers @@ -322,18 +308,16 @@ static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa) memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc)); memcpy(&nt_fpregset.fprs, &sa->fprs, sizeof(sa->fprs)); /* Create ELF notes for the CPU */ - ptr = nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus)); - ptr = nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset)); - ptr = nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer)); - ptr = nt_init(ptr, NT_S390_TODCMP, &sa->todcmp, sizeof(sa->todcmp)); - ptr = nt_init(ptr, NT_S390_TODPREG, &sa->todpreg, sizeof(sa->todpreg)); - ptr = nt_init(ptr, NT_S390_CTRS, &sa->ctrs, sizeof(sa->ctrs)); - ptr = nt_init(ptr, NT_S390_PREFIX, &sa->prefix, sizeof(sa->prefix)); + ptr = nt_init(ptr, PRSTATUS, nt_prstatus); + ptr = nt_init(ptr, PRFPREG, nt_fpregset); + ptr = nt_init(ptr, S390_TIMER, sa->timer); + ptr = nt_init(ptr, S390_TODCMP, sa->todcmp); + ptr = nt_init(ptr, S390_TODPREG, sa->todpreg); + ptr = nt_init(ptr, S390_CTRS, sa->ctrs); + ptr = nt_init(ptr, S390_PREFIX, sa->prefix); if (cpu_has_vx()) { - ptr = nt_init(ptr, NT_S390_VXRS_HIGH, - &sa->vxrs_high, sizeof(sa->vxrs_high)); - ptr = nt_init(ptr, NT_S390_VXRS_LOW, - &sa->vxrs_low, sizeof(sa->vxrs_low)); + ptr = nt_init(ptr, S390_VXRS_HIGH, sa->vxrs_high); + ptr = nt_init(ptr, S390_VXRS_LOW, sa->vxrs_low); } return ptr; } @@ -346,16 +330,16 @@ static size_t get_cpu_elf_notes_size(void) struct save_area *sa = NULL; size_t size; - size = nt_size(NT_PRSTATUS, sizeof(struct elf_prstatus)); - size += nt_size(NT_PRFPREG, sizeof(elf_fpregset_t)); - size += nt_size(NT_S390_TIMER, sizeof(sa->timer)); - size += nt_size(NT_S390_TODCMP, sizeof(sa->todcmp)); - size += nt_size(NT_S390_TODPREG, sizeof(sa->todpreg)); - size += nt_size(NT_S390_CTRS, sizeof(sa->ctrs)); - size += nt_size(NT_S390_PREFIX, sizeof(sa->prefix)); + size = nt_size(PRSTATUS, struct elf_prstatus); + size += nt_size(PRFPREG, elf_fpregset_t); + size += nt_size(S390_TIMER, sa->timer); + size += nt_size(S390_TODCMP, sa->todcmp); + size += nt_size(S390_TODPREG, sa->todpreg); + size += nt_size(S390_CTRS, sa->ctrs); + size += nt_size(S390_PREFIX, sa->prefix); if (cpu_has_vx()) { - size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high)); - size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low)); + size += nt_size(S390_VXRS_HIGH, sa->vxrs_high); + size += nt_size(S390_VXRS_LOW, sa->vxrs_low); } return size; @@ -371,7 +355,7 @@ static void *nt_prpsinfo(void *ptr) memset(&prpsinfo, 0, sizeof(prpsinfo)); prpsinfo.pr_sname = 'R'; strcpy(prpsinfo.pr_fname, "vmlinux"); - return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo)); + return nt_init(ptr, PRPSINFO, prpsinfo); } /* @@ -610,7 +594,7 @@ static size_t get_elfcorehdr_size(int phdr_count) /* PT_NOTES */ size += sizeof(Elf64_Phdr); /* nt_prpsinfo */ - size += nt_size(NT_PRPSINFO, sizeof(struct elf_prpsinfo)); + size += nt_size(PRPSINFO, struct elf_prpsinfo); /* regsets */ size += get_cpu_cnt() * get_cpu_elf_notes_size(); /* nt_vmcoreinfo */ diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 1ecd0580561f..911b95cd57e5 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -198,13 +198,8 @@ void __noreturn die(struct pt_regs *regs, const char *str) console_verbose(); spin_lock_irq(&die_lock); bust_spinlocks(1); - printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff, + printk("%s: %04x ilc:%d [#%d]", str, regs->int_code & 0xffff, regs->int_code >> 17, ++die_counter); -#ifdef CONFIG_PREEMPT - pr_cont("PREEMPT "); -#elif defined(CONFIG_PREEMPT_RT) - pr_cont("PREEMPT_RT "); -#endif pr_cont("SMP "); if (debug_pagealloc_enabled()) pr_cont("DEBUG_PAGEALLOC"); diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 10725f5a6f0f..63875270941b 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -518,7 +518,8 @@ static void paicrypt_have_samples(void) /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event CRYPTO_ALL is allowed. */ -static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { /* We started with a clean page on event installation. So read out * results on schedule_out and if page was dirty, save old values. diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index a8f0bad99cf0..fd14d5ebccbc 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -542,7 +542,8 @@ static void paiext_have_samples(void) /* Called on schedule-in and schedule-out. No access to event structure, * but for sampling only event NNPA_ALL is allowed. */ -static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) +static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, + struct task_struct *task, bool sched_in) { /* We started with a clean page on event installation. So read out * results on schedule_out and if page was dirty, save old values. diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 5ce9a795a0fe..745649ad9779 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -72,7 +72,7 @@ void notrace stop_machine_yield(const struct cpumask *cpumask) this_cpu = smp_processor_id(); if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) { __this_cpu_write(cpu_relax_retry, 0); - cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false); + cpu = cpumask_next_wrap(this_cpu, cpumask); if (cpu >= nr_cpu_ids) return; if (arch_vcpu_is_preempted(cpu)) diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index e9115b4d8b63..a4569b96ef06 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -469,3 +469,4 @@ 464 common getxattrat sys_getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index e9f47c3a6197..699a18f1c54e 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -79,12 +79,10 @@ void __init time_early_init(void) { struct ptff_qto qto; struct ptff_qui qui; - int cs; /* Initialize TOD steering parameters */ tod_steering_end = tod_clock_base.tod; - for (cs = 0; cs < CS_BASES; cs++) - vdso_data[cs].arch_data.tod_steering_end = tod_steering_end; + vdso_k_time_data->arch_data.tod_steering_end = tod_steering_end; if (!test_facility(28)) return; @@ -373,7 +371,6 @@ static void clock_sync_global(long delta) { unsigned long now, adj; struct ptff_qto qto; - int cs; /* Fixup the monotonic sched clock. */ tod_clock_base.eitod += delta; @@ -389,10 +386,8 @@ static void clock_sync_global(long delta) panic("TOD clock sync offset %li is too large to drift\n", tod_steering_delta); tod_steering_end = now + (abs(tod_steering_delta) << 15); - for (cs = 0; cs < CS_BASES; cs++) { - vdso_data[cs].arch_data.tod_steering_end = tod_steering_end; - vdso_data[cs].arch_data.tod_steering_delta = tod_steering_delta; - } + vdso_k_time_data->arch_data.tod_steering_end = tod_steering_end; + vdso_k_time_data->arch_data.tod_steering_delta = tod_steering_delta; /* Update LPAR offset. */ if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0) diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 9f05df2da2f7..9a5d5be8acf4 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -206,6 +206,39 @@ int uv_convert_from_secure_pte(pte_t pte) return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte))); } +/** + * should_export_before_import - Determine whether an export is needed + * before an import-like operation + * @uvcb: the Ultravisor control block of the UVC to be performed + * @mm: the mm of the process + * + * Returns whether an export is needed before every import-like operation. + * This is needed for shared pages, which don't trigger a secure storage + * exception when accessed from a different guest. + * + * Although considered as one, the Unpin Page UVC is not an actual import, + * so it is not affected. + * + * No export is needed also when there is only one protected VM, because the + * page cannot belong to the wrong VM in that case (there is no "other VM" + * it can belong to). + * + * Return: true if an export is needed before every import, otherwise false. + */ +static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) +{ + /* + * The misc feature indicates, among other things, that importing a + * shared page from a different protected VM will automatically also + * transfer its ownership. + */ + if (uv_has_feature(BIT_UV_FEAT_MISC)) + return false; + if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) + return false; + return atomic_read(&mm->context.protected_count) > 1; +} + /* * Calculate the expected ref_count for a folio that would otherwise have no * further pins. This was cribbed from similar functions in other places in @@ -228,7 +261,7 @@ static int expected_folio_refs(struct folio *folio) } /** - * make_folio_secure() - make a folio secure + * __make_folio_secure() - make a folio secure * @folio: the folio to make secure * @uvcb: the uvcb that describes the UVC to be used * @@ -237,20 +270,18 @@ static int expected_folio_refs(struct folio *folio) * * Return: 0 on success; * -EBUSY if the folio is in writeback or has too many references; - * -E2BIG if the folio is large; * -EAGAIN if the UVC needs to be attempted again; * -ENXIO if the address is not mapped; * -EINVAL if the UVC failed for other reasons. * * Context: The caller must hold exactly one extra reference on the folio - * (it's the same logic as split_folio()) + * (it's the same logic as split_folio()), and the folio must be + * locked. */ -int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) +static int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) { int expected, cc = 0; - if (folio_test_large(folio)) - return -E2BIG; if (folio_test_writeback(folio)) return -EBUSY; expected = expected_folio_refs(folio) + 1; @@ -277,7 +308,98 @@ int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) return -EAGAIN; return uvcb->rc == 0x10a ? -ENXIO : -EINVAL; } -EXPORT_SYMBOL_GPL(make_folio_secure); + +static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct uv_cb_header *uvcb) +{ + int rc; + + if (!folio_trylock(folio)) + return -EAGAIN; + if (should_export_before_import(uvcb, mm)) + uv_convert_from_secure(folio_to_phys(folio)); + rc = __make_folio_secure(folio, uvcb); + folio_unlock(folio); + + return rc; +} + +/** + * s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split. + * @mm: the mm containing the folio to work on + * @folio: the folio + * @split: whether to split a large folio + * + * Context: Must be called while holding an extra reference to the folio; + * the mm lock should not be held. + * Return: 0 if the folio was split successfully; + * -EAGAIN if the folio was not split successfully but another attempt + * can be made, or if @split was set to false; + * -EINVAL in case of other errors. See split_folio(). + */ +static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split) +{ + int rc; + + lockdep_assert_not_held(&mm->mmap_lock); + folio_wait_writeback(folio); + lru_add_drain_all(); + if (split) { + folio_lock(folio); + rc = split_folio(folio); + folio_unlock(folio); + + if (rc != -EBUSY) + return rc; + } + return -EAGAIN; +} + +int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb) +{ + struct vm_area_struct *vma; + struct folio_walk fw; + struct folio *folio; + int rc; + + mmap_read_lock(mm); + vma = vma_lookup(mm, hva); + if (!vma) { + mmap_read_unlock(mm); + return -EFAULT; + } + folio = folio_walk_start(&fw, vma, hva, 0); + if (!folio) { + mmap_read_unlock(mm); + return -ENXIO; + } + + folio_get(folio); + /* + * Secure pages cannot be huge and userspace should not combine both. + * In case userspace does it anyway this will result in an -EFAULT for + * the unpack. The guest is thus never reaching secure mode. + * If userspace plays dirty tricks and decides to map huge pages at a + * later point in time, it will receive a segmentation fault or + * KVM_RUN will return -EFAULT. + */ + if (folio_test_hugetlb(folio)) + rc = -EFAULT; + else if (folio_test_large(folio)) + rc = -E2BIG; + else if (!pte_write(fw.pte) || (pte_val(fw.pte) & _PAGE_INVALID)) + rc = -ENXIO; + else + rc = make_folio_secure(mm, folio, uvcb); + folio_walk_end(&fw, vma); + mmap_read_unlock(mm); + + if (rc == -E2BIG || rc == -EBUSY) + rc = s390_wiggle_split_folio(mm, folio, rc == -E2BIG); + folio_put(folio); + + return rc; +} +EXPORT_SYMBOL_GPL(make_hva_secure); /* * To be called with the folio locked or with an extra reference! This will diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 598b512cde01..70c8f9ad13cd 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -16,8 +16,8 @@ #include <linux/mm.h> #include <linux/slab.h> #include <linux/smp.h> -#include <linux/time_namespace.h> #include <linux/random.h> +#include <linux/vdso_datastore.h> #include <vdso/datapage.h> #include <asm/vdso/vsyscall.h> #include <asm/alternative.h> @@ -26,85 +26,6 @@ extern char vdso64_start[], vdso64_end[]; extern char vdso32_start[], vdso32_end[]; -static struct vm_special_mapping vvar_mapping; - -static union vdso_data_store vdso_data_store __page_aligned_data; - -struct vdso_data *vdso_data = vdso_data_store.data; - -#ifdef CONFIG_TIME_NS -struct vdso_data *arch_get_vdso_data(void *vvar_page) -{ - return (struct vdso_data *)(vvar_page); -} - -/* - * The VVAR page layout depends on whether a task belongs to the root or - * non-root time namespace. Whenever a task changes its namespace, the VVAR - * page tables are cleared and then they will be re-faulted with a - * corresponding layout. - * See also the comment near timens_setup_vdso_data() for details. - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - VMA_ITERATOR(vmi, mm, 0); - struct vm_area_struct *vma; - - mmap_read_lock(mm); - for_each_vma(vmi, vma) { - if (!vma_is_special_mapping(vma, &vvar_mapping)) - continue; - zap_vma_pages(vma); - break; - } - mmap_read_unlock(mm); - return 0; -} -#endif - -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *timens_page = find_timens_vvar_page(vma); - unsigned long addr, pfn; - vm_fault_t err; - - switch (vmf->pgoff) { - case VVAR_DATA_PAGE_OFFSET: - pfn = virt_to_pfn(vdso_data); - if (timens_page) { - /* - * Fault in VVAR page too, since it will be accessed - * to get clock data anyway. - */ - addr = vmf->address + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE; - err = vmf_insert_pfn(vma, addr, pfn); - if (unlikely(err & VM_FAULT_ERROR)) - return err; - pfn = page_to_pfn(timens_page); - } - break; -#ifdef CONFIG_TIME_NS - case VVAR_TIMENS_PAGE_OFFSET: - /* - * If a task belongs to a time namespace then a namespace - * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and - * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET - * offset. - * See also the comment near timens_setup_vdso_data(). - */ - if (!timens_page) - return VM_FAULT_SIGBUS; - pfn = virt_to_pfn(vdso_data); - break; -#endif /* CONFIG_TIME_NS */ - default: - return VM_FAULT_SIGBUS; - } - return vmf_insert_pfn(vma, vmf->address, pfn); -} - static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *vma) { @@ -112,11 +33,6 @@ static int vdso_mremap(const struct vm_special_mapping *sm, return 0; } -static struct vm_special_mapping vvar_mapping = { - .name = "[vvar]", - .fault = vvar_fault, -}; - static struct vm_special_mapping vdso64_mapping = { .name = "[vdso]", .mremap = vdso_mremap, @@ -142,7 +58,7 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) struct vm_area_struct *vma; int rc; - BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); + BUILD_BUG_ON(VDSO_NR_PAGES != __VDSO_PAGES); if (mmap_write_lock_killable(mm)) return -EINTR; @@ -157,14 +73,11 @@ static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len) rc = vvar_start; if (IS_ERR_VALUE(vvar_start)) goto out; - vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE, - VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP| - VM_PFNMAP, - &vvar_mapping); + vma = vdso_install_vvar_mapping(mm, vvar_start); rc = PTR_ERR(vma); if (IS_ERR(vma)) goto out; - vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE; + vdso_text_start = vvar_start + VDSO_NR_PAGES * PAGE_SIZE; /* VM_MAYWRITE for COW so gdb can set breakpoints */ vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, VM_READ|VM_EXEC| @@ -220,7 +133,7 @@ unsigned long vdso_text_size(void) unsigned long vdso_size(void) { - return vdso_text_size() + VVAR_NR_PAGES * PAGE_SIZE; + return vdso_text_size() + VDSO_NR_PAGES * PAGE_SIZE; } int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index 2c5afb88d298..1e4ddd1a683f 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -2,7 +2,7 @@ # List of files in the vdso # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include obj-vdso32 = vdso_user_wrapper-32.o note-32.o # Build rules diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S index c916c4f73f76..9630d58c2080 100644 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -6,16 +6,15 @@ #include <asm/page.h> #include <asm/vdso.h> +#include <vdso/datapage.h> OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") OUTPUT_ARCH(s390:31-bit) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index ad206f2068d8..d8f0df742809 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -2,7 +2,7 @@ # List of files in the vdso # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index ec42b7d9cb53..e4f6551ae898 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -7,17 +7,15 @@ #include <asm/vdso/vsyscall.h> #include <asm/page.h> #include <asm/vdso.h> +#include <vdso/datapage.h> OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390") OUTPUT_ARCH(s390:64-bit) SECTIONS { - PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); - PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET); -#ifdef CONFIG_TIME_NS - PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); -#endif + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c index 02adf151d4de..6d8944d1b4a0 100644 --- a/arch/s390/kvm/gmap.c +++ b/arch/s390/kvm/gmap.c @@ -23,116 +23,25 @@ #include "gmap.h" /** - * should_export_before_import - Determine whether an export is needed - * before an import-like operation - * @uvcb: the Ultravisor control block of the UVC to be performed - * @mm: the mm of the process - * - * Returns whether an export is needed before every import-like operation. - * This is needed for shared pages, which don't trigger a secure storage - * exception when accessed from a different guest. - * - * Although considered as one, the Unpin Page UVC is not an actual import, - * so it is not affected. - * - * No export is needed also when there is only one protected VM, because the - * page cannot belong to the wrong VM in that case (there is no "other VM" - * it can belong to). - * - * Return: true if an export is needed before every import, otherwise false. - */ -static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) -{ - /* - * The misc feature indicates, among other things, that importing a - * shared page from a different protected VM will automatically also - * transfer its ownership. - */ - if (uv_has_feature(BIT_UV_FEAT_MISC)) - return false; - if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) - return false; - return atomic_read(&mm->context.protected_count) > 1; -} - -static int __gmap_make_secure(struct gmap *gmap, struct page *page, void *uvcb) -{ - struct folio *folio = page_folio(page); - int rc; - - /* - * Secure pages cannot be huge and userspace should not combine both. - * In case userspace does it anyway this will result in an -EFAULT for - * the unpack. The guest is thus never reaching secure mode. - * If userspace plays dirty tricks and decides to map huge pages at a - * later point in time, it will receive a segmentation fault or - * KVM_RUN will return -EFAULT. - */ - if (folio_test_hugetlb(folio)) - return -EFAULT; - if (folio_test_large(folio)) { - mmap_read_unlock(gmap->mm); - rc = kvm_s390_wiggle_split_folio(gmap->mm, folio, true); - mmap_read_lock(gmap->mm); - if (rc) - return rc; - folio = page_folio(page); - } - - if (!folio_trylock(folio)) - return -EAGAIN; - if (should_export_before_import(uvcb, gmap->mm)) - uv_convert_from_secure(folio_to_phys(folio)); - rc = make_folio_secure(folio, uvcb); - folio_unlock(folio); - - /* - * In theory a race is possible and the folio might have become - * large again before the folio_trylock() above. In that case, no - * action is performed and -EAGAIN is returned; the callers will - * have to try again later. - * In most cases this implies running the VM again, getting the same - * exception again, and make another attempt in this function. - * This is expected to happen extremely rarely. - */ - if (rc == -E2BIG) - return -EAGAIN; - /* The folio has too many references, try to shake some off */ - if (rc == -EBUSY) { - mmap_read_unlock(gmap->mm); - kvm_s390_wiggle_split_folio(gmap->mm, folio, false); - mmap_read_lock(gmap->mm); - return -EAGAIN; - } - - return rc; -} - -/** * gmap_make_secure() - make one guest page secure * @gmap: the guest gmap * @gaddr: the guest address that needs to be made secure * @uvcb: the UVCB specifying which operation needs to be performed * * Context: needs to be called with kvm->srcu held. - * Return: 0 on success, < 0 in case of error (see __gmap_make_secure()). + * Return: 0 on success, < 0 in case of error. */ int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) { struct kvm *kvm = gmap->private; - struct page *page; - int rc = 0; + unsigned long vmaddr; lockdep_assert_held(&kvm->srcu); - page = gfn_to_page(kvm, gpa_to_gfn(gaddr)); - mmap_read_lock(gmap->mm); - if (page) - rc = __gmap_make_secure(gmap, page, uvcb); - kvm_release_page_clean(page); - mmap_read_unlock(gmap->mm); - - return rc; + vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr)); + if (kvm_is_error_hva(vmaddr)) + return -EFAULT; + return make_hva_secure(gmap->mm, vmaddr, uvcb); } int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 07ff0e10cb7f..0f00f8e85fee 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -3174,8 +3174,7 @@ void kvm_s390_gisa_init(struct kvm *kvm) gi->alert.mask = 0; spin_lock_init(&gi->alert.ref_lock); gi->expires = 50 * 1000; /* 50 usec */ - hrtimer_init(&gi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - gi->timer.function = gisa_vcpu_kicker; + hrtimer_setup(&gi->timer, gisa_vcpu_kicker, CLOCK_MONOTONIC, HRTIMER_MODE_REL); memset(gi->origin, 0, sizeof(struct kvm_s390_gisa)); gi->origin->next_alert = (u32)virt_to_phys(gi->origin); VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ebecb96bacce..48104e59648b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3943,8 +3943,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) if (rc) return rc; } - hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; + hrtimer_setup(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); vcpu->arch.sie_block->hpid = HPID_KVM; @@ -4952,6 +4952,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) { unsigned int flags = 0; unsigned long gaddr; + int rc; gaddr = current->thread.gmap_teid.addr * PAGE_SIZE; if (kvm_s390_cur_gmap_fault_is_write()) @@ -4961,16 +4962,6 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) case 0: vcpu->stat.exit_null++; break; - case PGM_NON_SECURE_STORAGE_ACCESS: - kvm_s390_assert_primary_as(vcpu); - /* - * This is normal operation; a page belonging to a protected - * guest has not been imported yet. Try to import the page into - * the protected guest. - */ - if (gmap_convert_to_secure(vcpu->arch.gmap, gaddr) == -EINVAL) - send_sig(SIGSEGV, current, 0); - break; case PGM_SECURE_STORAGE_ACCESS: case PGM_SECURE_STORAGE_VIOLATION: kvm_s390_assert_primary_as(vcpu); @@ -4995,6 +4986,20 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) send_sig(SIGSEGV, current, 0); } break; + case PGM_NON_SECURE_STORAGE_ACCESS: + kvm_s390_assert_primary_as(vcpu); + /* + * This is normal operation; a page belonging to a protected + * guest has not been imported yet. Try to import the page into + * the protected guest. + */ + rc = gmap_convert_to_secure(vcpu->arch.gmap, gaddr); + if (rc == -EINVAL) + send_sig(SIGSEGV, current, 0); + if (rc != -ENXIO) + break; + flags = FAULT_FLAG_WRITE; + fallthrough; case PGM_PROTECTION: case PGM_SEGMENT_TRANSLATION: case PGM_PAGE_TRANSLATION: diff --git a/arch/s390/lib/crc32-glue.c b/arch/s390/lib/crc32-glue.c index 137080e61f90..124214a27340 100644 --- a/arch/s390/lib/crc32-glue.c +++ b/arch/s390/lib/crc32-glue.c @@ -62,7 +62,7 @@ static DEFINE_STATIC_KEY_FALSE(have_vxrs); DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base) DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base) -DEFINE_CRC32_VX(crc32c_le_arch, crc32c_le_vgfm_16, crc32c_le_base) +DEFINE_CRC32_VX(crc32c_arch, crc32c_le_vgfm_16, crc32c_base) static int __init crc32_s390_init(void) { diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 94d927785800..d14b488e7a1f 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2626,31 +2626,3 @@ int s390_replace_asce(struct gmap *gmap) return 0; } EXPORT_SYMBOL_GPL(s390_replace_asce); - -/** - * kvm_s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split - * @mm: the mm containing the folio to work on - * @folio: the folio - * @split: whether to split a large folio - * - * Context: Must be called while holding an extra reference to the folio; - * the mm lock should not be held. - */ -int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split) -{ - int rc; - - lockdep_assert_not_held(&mm->mmap_lock); - folio_wait_writeback(folio); - lru_add_drain_all(); - if (split) { - folio_lock(folio); - rc = split_folio(folio); - folio_unlock(folio); - - if (rc != -EBUSY) - return rc; - } - return -EAGAIN; -} -EXPORT_SYMBOL_GPL(kvm_s390_wiggle_split_folio); |