summaryrefslogtreecommitdiff
path: root/arch/arm64/kernel/fpsimd.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kernel/fpsimd.c')
-rw-r--r--arch/arm64/kernel/fpsimd.c73
1 files changed, 38 insertions, 35 deletions
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 7a1aeb95d7c3..91e44ac7150f 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -679,7 +679,7 @@ static void fpsimd_to_sve(struct task_struct *task)
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
- if (!system_supports_sve())
+ if (!system_supports_sve() && !system_supports_sme())
return;
vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
@@ -705,7 +705,7 @@ static void sve_to_fpsimd(struct task_struct *task)
unsigned int i;
__uint128_t const *p;
- if (!system_supports_sve())
+ if (!system_supports_sve() && !system_supports_sme())
return;
vl = thread_get_cur_vl(&task->thread);
@@ -835,7 +835,8 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
- if (!test_tsk_thread_flag(task, TIF_SVE))
+ if (!test_tsk_thread_flag(task, TIF_SVE) &&
+ !thread_sm_enabled(&task->thread))
return;
vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
@@ -847,6 +848,8 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
int vec_set_vector_length(struct task_struct *task, enum vec_type type,
unsigned long vl, unsigned long flags)
{
+ bool free_sme = false;
+
if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
PR_SVE_SET_VL_ONEXEC))
return -EINVAL;
@@ -897,24 +900,39 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
task->thread.fp_type = FP_STATE_FPSIMD;
}
- if (system_supports_sme() && type == ARM64_VEC_SME) {
- task->thread.svcr &= ~(SVCR_SM_MASK |
- SVCR_ZA_MASK);
- clear_thread_flag(TIF_SME);
+ if (system_supports_sme()) {
+ if (type == ARM64_VEC_SME ||
+ !(task->thread.svcr & (SVCR_SM_MASK | SVCR_ZA_MASK))) {
+ /*
+ * We are changing the SME VL or weren't using
+ * SME anyway, discard the state and force a
+ * reallocation.
+ */
+ task->thread.svcr &= ~(SVCR_SM_MASK |
+ SVCR_ZA_MASK);
+ clear_tsk_thread_flag(task, TIF_SME);
+ free_sme = true;
+ }
}
if (task == current)
put_cpu_fpsimd_context();
+ task_set_vl(task, type, vl);
+
/*
- * Force reallocation of task SVE and SME state to the correct
- * size on next use:
+ * Free the changed states if they are not in use, SME will be
+ * reallocated to the correct size on next use and we just
+ * allocate SVE now in case it is needed for use in streaming
+ * mode.
*/
- sve_free(task);
- if (system_supports_sme() && type == ARM64_VEC_SME)
- sme_free(task);
+ if (system_supports_sve()) {
+ sve_free(task);
+ sve_alloc(task, true);
+ }
- task_set_vl(task, type, vl);
+ if (free_sme)
+ sme_free(task);
out:
update_tsk_thread_flag(task, vec_vl_inherit_flag(type),
@@ -1161,9 +1179,6 @@ void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
*/
u64 read_zcr_features(void)
{
- u64 zcr;
- unsigned int vq_max;
-
/*
* Set the maximum possible VL, and write zeroes to all other
* bits to see if they stick.
@@ -1171,12 +1186,8 @@ u64 read_zcr_features(void)
sve_kernel_enable(NULL);
write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
- zcr = read_sysreg_s(SYS_ZCR_EL1);
- zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */
- vq_max = sve_vq_from_vl(sve_get_vl());
- zcr |= vq_max - 1; /* set LEN field to maximum effective value */
-
- return zcr;
+ /* Return LEN value that would be written to get the maximum VL */
+ return sve_vq_from_vl(sve_get_vl()) - 1;
}
void __init sve_setup(void)
@@ -1267,9 +1278,9 @@ void fpsimd_release_task(struct task_struct *dead_task)
* the interest of testability and predictability, the architecture
* guarantees that when ZA is enabled it will be zeroed.
*/
-void sme_alloc(struct task_struct *task)
+void sme_alloc(struct task_struct *task, bool flush)
{
- if (task->thread.sme_state) {
+ if (task->thread.sme_state && flush) {
memset(task->thread.sme_state, 0, sme_state_size(task));
return;
}
@@ -1331,9 +1342,6 @@ void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
*/
u64 read_smcr_features(void)
{
- u64 smcr;
- unsigned int vq_max;
-
sme_kernel_enable(NULL);
/*
@@ -1342,12 +1350,8 @@ u64 read_smcr_features(void)
write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK,
SYS_SMCR_EL1);
- smcr = read_sysreg_s(SYS_SMCR_EL1);
- smcr &= ~(u64)SMCR_ELx_LEN_MASK; /* Only the LEN field */
- vq_max = sve_vq_from_vl(sme_get_vl());
- smcr |= vq_max - 1; /* set LEN field to maximum effective value */
-
- return smcr;
+ /* Return LEN value that would be written to get the maximum VL */
+ return sve_vq_from_vl(sme_get_vl()) - 1;
}
void __init sme_setup(void)
@@ -1497,7 +1501,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
}
sve_alloc(current, false);
- sme_alloc(current);
+ sme_alloc(current, true);
if (!current->thread.sve_state || !current->thread.sme_state) {
force_sig(SIGKILL);
return;
@@ -1649,7 +1653,6 @@ void fpsimd_flush_thread(void)
fpsimd_flush_thread_vl(ARM64_VEC_SME);
current->thread.svcr = 0;
- sme_smstop();
}
current->thread.fp_type = FP_STATE_FPSIMD;