8 files changed, 96 insertions, 42 deletions
diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c
index 97098add2079..f8b0c52e77a4 100644
--- a/arch/s390/crypto/chacha-glue.c
+++ b/arch/s390/crypto/chacha-glue.c
@@ -22,7 +22,7 @@ static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src,
 				unsigned int nbytes, const u32 *key,
 				u32 *counter)
 {
-	DECLARE_KERNEL_FPU_ONSTACK(vxstate);
+	DECLARE_KERNEL_FPU_ONSTACK32(vxstate);
 
 	kernel_fpu_begin(&vxstate, KERNEL_VXR);
 	chacha20_vx(dst, src, nbytes, key, counter);
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
index dc2997f18e30..d9f1fdb66691 100644
--- a/arch/s390/crypto/crc32-vx.c
+++ b/arch/s390/crypto/crc32-vx.c
@@ -50,7 +50,7 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
 				unsigned char const *data, size_t datalen)  \
 	{								    \
 		unsigned long prealign, aligned, remaining;		    \
-		DECLARE_KERNEL_FPU_ONSTACK(vxstate);			    \
+		DECLARE_KERNEL_FPU_ONSTACK16(vxstate);			    \
 									    \
 		if (datalen < VX_MIN_LEN + VX_ALIGN_MASK)		    \
 			return ___crc32_sw(crc, data, datalen);		    \
diff --git a/arch/s390/include/asm/fpu-types.h b/arch/s390/include/asm/fpu-types.h
index 8e6927c23bdc..04c32b9fc849 100644
--- a/arch/s390/include/asm/fpu-types.h
+++ b/arch/s390/include/asm/fpu-types.h
@@ -16,14 +16,32 @@ struct fpu {
 	__vector128 vxrs[__NUM_VXRS] __aligned(8);
 };
 
-/* In-kernel FPU state structure */
+struct kernel_fpu_hdr {
+	int	mask;
+	u32	fpc;
+};
+
 struct kernel_fpu {
-	int	    mask;
-	u32	    fpc;
-	__vector128 vxrs[__NUM_VXRS] __aligned(8);
+	struct kernel_fpu_hdr hdr;
+	__vector128 vxrs[] __aligned(8);
 };
 
-#define DECLARE_KERNEL_FPU_ONSTACK(name)	\
-	struct kernel_fpu name __uninitialized
+#define KERNEL_FPU_STRUCT(vxr_size)				\
+struct kernel_fpu_##vxr_size {					\
+	struct kernel_fpu_hdr hdr;				\
+	__vector128 vxrs[vxr_size] __aligned(8);		\
+}
+
+KERNEL_FPU_STRUCT(16);
+KERNEL_FPU_STRUCT(32);
+
+#define DECLARE_KERNEL_FPU_ONSTACK(vxr_size, name)		\
+	struct kernel_fpu_##vxr_size name __uninitialized
+
+#define DECLARE_KERNEL_FPU_ONSTACK16(name)			\
+	DECLARE_KERNEL_FPU_ONSTACK(16, name)
+
+#define DECLARE_KERNEL_FPU_ONSTACK32(name)			\
+	DECLARE_KERNEL_FPU_ONSTACK(32, name)
 
 #endif /* _ASM_S390_FPU_TYPES_H */
diff --git a/arch/s390/include/asm/fpu.h b/arch/s390/include/asm/fpu.h
index e706af26c5d0..c1b3920092a1 100644
--- a/arch/s390/include/asm/fpu.h
+++ b/arch/s390/include/asm/fpu.h
@@ -162,28 +162,64 @@ static __always_inline void load_fp_regs_vx(__vector128 *vxrs)
 	__load_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t));
 }
 
-static inline void kernel_fpu_begin(struct kernel_fpu *state, int flags)
+static inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags)
 {
-	state->mask = READ_ONCE(current->thread.kfpu_flags);
+	state->hdr.mask = READ_ONCE(current->thread.kfpu_flags);
 	if (!test_thread_flag(TIF_FPU)) {
 		/* Save user space FPU state and register contents */
 		save_user_fpu_regs();
-	} else if (state->mask & flags) {
+	} else if (state->hdr.mask & flags) {
 		/* Save FPU/vector register in-use by the kernel */
 		__kernel_fpu_begin(state, flags);
 	}
 	__atomic_or(flags, &current->thread.kfpu_flags);
 }
 
-static inline void kernel_fpu_end(struct kernel_fpu *state, int flags)
+static inline void _kernel_fpu_end(struct kernel_fpu *state, int flags)
 {
-	WRITE_ONCE(current->thread.kfpu_flags, state->mask);
-	if (state->mask & flags) {
+	WRITE_ONCE(current->thread.kfpu_flags, state->hdr.mask);
+	if (state->hdr.mask & flags) {
 		/* Restore FPU/vector register in-use by the kernel */
 		__kernel_fpu_end(state, flags);
 	}
 }
 
+void __kernel_fpu_invalid_size(void);
+
+static __always_inline void kernel_fpu_check_size(int flags, unsigned int size)
+{
+	unsigned int cnt = 0;
+
+	if (flags & KERNEL_VXR_V0V7)
+		cnt += 8;
+	if (flags & KERNEL_VXR_V8V15)
+		cnt += 8;
+	if (flags & KERNEL_VXR_V16V23)
+		cnt += 8;
+	if (flags & KERNEL_VXR_V24V31)
+		cnt += 8;
+	if (cnt != size)
+		__kernel_fpu_invalid_size();
+}
+
+#define kernel_fpu_begin(state, flags)					\
+{									\
+	typeof(state) s = (state);					\
+	int _flags = (flags);						\
+									\
+	kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs));		\
+	_kernel_fpu_begin((struct kernel_fpu *)s, _flags);		\
+}
+
+#define kernel_fpu_end(state, flags)					\
+{									\
+	typeof(state) s = (state);					\
+	int _flags = (flags);						\
+									\
+	kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs));		\
+	_kernel_fpu_end((struct kernel_fpu *)s, _flags);		\
+}
+
 static inline void save_kernel_fpu_regs(struct thread_struct *thread)
 {
 	struct fpu *state = &thread->kfpu;
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
index 733e188951b7..62e9befe7890 100644
--- a/arch/s390/kernel/fpu.c
+++ b/arch/s390/kernel/fpu.c
@@ -19,41 +19,41 @@ void __kernel_fpu_begin(struct kernel_fpu *state, int flags)
 	 * Limit the save to the FPU/vector registers already
 	 * in use by the previous context.
 	 */
-	flags &= state->mask;
+	flags &= state->hdr.mask;
 	if (flags & KERNEL_FPC)
-		fpu_stfpc(&state->fpc);
+		fpu_stfpc(&state->hdr.fpc);
 	if (!cpu_has_vx()) {
 		if (flags & KERNEL_VXR_LOW)
-			save_fp_regs_vx(state->vxrs);
+			save_fp_regs_vx(vxrs);
 		return;
 	}
 	mask = flags & KERNEL_VXR;
 	if (mask == KERNEL_VXR) {
-		fpu_vstm(0, 15, &vxrs[0]);
-		fpu_vstm(16, 31, &vxrs[16]);
+		vxrs += fpu_vstm(0, 15, vxrs);
+		vxrs += fpu_vstm(16, 31, vxrs);
 		return;
 	}
 	if (mask == KERNEL_VXR_MID) {
-		fpu_vstm(8, 23, &vxrs[8]);
+		vxrs += fpu_vstm(8, 23, vxrs);
 		return;
 	}
 	mask = flags & KERNEL_VXR_LOW;
 	if (mask) {
 		if (mask == KERNEL_VXR_LOW)
-			fpu_vstm(0, 15, &vxrs[0]);
+			vxrs += fpu_vstm(0, 15, vxrs);
 		else if (mask == KERNEL_VXR_V0V7)
-			fpu_vstm(0, 7, &vxrs[0]);
+			vxrs += fpu_vstm(0, 7, vxrs);
 		else
-			fpu_vstm(8, 15, &vxrs[8]);
+			vxrs += fpu_vstm(8, 15, vxrs);
 	}
 	mask = flags & KERNEL_VXR_HIGH;
 	if (mask) {
 		if (mask == KERNEL_VXR_HIGH)
-			fpu_vstm(16, 31, &vxrs[16]);
+			vxrs += fpu_vstm(16, 31, vxrs);
 		else if (mask == KERNEL_VXR_V16V23)
-			fpu_vstm(16, 23, &vxrs[16]);
+			vxrs += fpu_vstm(16, 23, vxrs);
 		else
-			fpu_vstm(24, 31, &vxrs[24]);
+			vxrs += fpu_vstm(24, 31, vxrs);
 	}
 }
 EXPORT_SYMBOL(__kernel_fpu_begin);
@@ -68,41 +68,41 @@ void __kernel_fpu_end(struct kernel_fpu *state, int flags)
 	 * previous context that have been overwritten by the
 	 * current context.
 	 */
-	flags &= state->mask;
+	flags &= state->hdr.mask;
 	if (flags & KERNEL_FPC)
-		fpu_lfpc(&state->fpc);
+		fpu_lfpc(&state->hdr.fpc);
 	if (!cpu_has_vx()) {
 		if (flags & KERNEL_VXR_LOW)
-			load_fp_regs_vx(state->vxrs);
+			load_fp_regs_vx(vxrs);
 		return;
 	}
 	mask = flags & KERNEL_VXR;
 	if (mask == KERNEL_VXR) {
-		fpu_vlm(0, 15, &vxrs[0]);
-		fpu_vlm(16, 31, &vxrs[16]);
+		vxrs += fpu_vlm(0, 15, vxrs);
+		vxrs += fpu_vlm(16, 31, vxrs);
 		return;
 	}
 	if (mask == KERNEL_VXR_MID) {
-		fpu_vlm(8, 23, &vxrs[8]);
+		vxrs += fpu_vlm(8, 23, vxrs);
 		return;
 	}
 	mask = flags & KERNEL_VXR_LOW;
 	if (mask) {
 		if (mask == KERNEL_VXR_LOW)
-			fpu_vlm(0, 15, &vxrs[0]);
+			vxrs += fpu_vlm(0, 15, vxrs);
 		else if (mask == KERNEL_VXR_V0V7)
-			fpu_vlm(0, 7, &vxrs[0]);
+			vxrs += fpu_vlm(0, 7, vxrs);
 		else
-			fpu_vlm(8, 15, &vxrs[8]);
+			vxrs += fpu_vlm(8, 15, vxrs);
 	}
 	mask = flags & KERNEL_VXR_HIGH;
 	if (mask) {
 		if (mask == KERNEL_VXR_HIGH)
-			fpu_vlm(16, 31, &vxrs[16]);
+			vxrs += fpu_vlm(16, 31, vxrs);
 		else if (mask == KERNEL_VXR_V16V23)
-			fpu_vlm(16, 23, &vxrs[16]);
+			vxrs += fpu_vlm(16, 23, vxrs);
 		else
-			fpu_vlm(24, 31, &vxrs[24]);
+			vxrs += fpu_vlm(24, 31, vxrs);
 	}
 }
 EXPORT_SYMBOL(__kernel_fpu_end);
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 061d45cf0261..4cd6428bfab2 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -426,7 +426,7 @@ subsys_initcall(create_proc_service_level);
  */
 void s390_adjust_jiffies(void)
 {
-	DECLARE_KERNEL_FPU_ONSTACK(fpu);
+	DECLARE_KERNEL_FPU_ONSTACK16(fpu);
 	struct sysinfo_1_2_2 *info;
 	unsigned long capability;
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 8467945344b5..8c222b0dfbf2 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -5026,7 +5026,7 @@ static void store_regs(struct kvm_vcpu *vcpu)
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *kvm_run = vcpu->run;
-	DECLARE_KERNEL_FPU_ONSTACK(fpu);
+	DECLARE_KERNEL_FPU_ONSTACK32(fpu);
 	int rc;
 
 	/*
diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc
index bc2f4fbe5a82..92c05b7596bc 100644
--- a/lib/raid6/s390vx.uc
+++ b/lib/raid6/s390vx.uc
@@ -80,7 +80,7 @@ static inline void COPY_VEC(int x, int y)
 
 static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
 {
-	DECLARE_KERNEL_FPU_ONSTACK(vxstate);
+	DECLARE_KERNEL_FPU_ONSTACK32(vxstate);
 	u8 **dptr, *p, *q;
 	int d, z, z0;
 
@@ -113,7 +113,7 @@ static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
 static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop,
 					size_t bytes, void **ptrs)
 {
-	DECLARE_KERNEL_FPU_ONSTACK(vxstate);
+	DECLARE_KERNEL_FPU_ONSTACK32(vxstate);
 	u8 **dptr, *p, *q;
 	int d, z, z0;