diff options
Diffstat (limited to 'arch/arm64/kernel/fpsimd.c')
-rw-r--r-- | arch/arm64/kernel/fpsimd.c | 73 |
1 files changed, 38 insertions, 35 deletions
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 7a1aeb95d7c3..91e44ac7150f 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -679,7 +679,7 @@ static void fpsimd_to_sve(struct task_struct *task) void *sst = task->thread.sve_state; struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; - if (!system_supports_sve()) + if (!system_supports_sve() && !system_supports_sme()) return; vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread)); @@ -705,7 +705,7 @@ static void sve_to_fpsimd(struct task_struct *task) unsigned int i; __uint128_t const *p; - if (!system_supports_sve()) + if (!system_supports_sve() && !system_supports_sme()) return; vl = thread_get_cur_vl(&task->thread); @@ -835,7 +835,8 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) void *sst = task->thread.sve_state; struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; - if (!test_tsk_thread_flag(task, TIF_SVE)) + if (!test_tsk_thread_flag(task, TIF_SVE) && + !thread_sm_enabled(&task->thread)) return; vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread)); @@ -847,6 +848,8 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) int vec_set_vector_length(struct task_struct *task, enum vec_type type, unsigned long vl, unsigned long flags) { + bool free_sme = false; + if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT | PR_SVE_SET_VL_ONEXEC)) return -EINVAL; @@ -897,24 +900,39 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, task->thread.fp_type = FP_STATE_FPSIMD; } - if (system_supports_sme() && type == ARM64_VEC_SME) { - task->thread.svcr &= ~(SVCR_SM_MASK | - SVCR_ZA_MASK); - clear_thread_flag(TIF_SME); + if (system_supports_sme()) { + if (type == ARM64_VEC_SME || + !(task->thread.svcr & (SVCR_SM_MASK | SVCR_ZA_MASK))) { + /* + * We are changing the SME VL or weren't using + * SME anyway, discard the state and force a + * reallocation. + */ + task->thread.svcr &= ~(SVCR_SM_MASK | + SVCR_ZA_MASK); + clear_tsk_thread_flag(task, TIF_SME); + free_sme = true; + } } if (task == current) put_cpu_fpsimd_context(); + task_set_vl(task, type, vl); + /* - * Force reallocation of task SVE and SME state to the correct - * size on next use: + * Free the changed states if they are not in use, SME will be + * reallocated to the correct size on next use and we just + * allocate SVE now in case it is needed for use in streaming + * mode. */ - sve_free(task); - if (system_supports_sme() && type == ARM64_VEC_SME) - sme_free(task); + if (system_supports_sve()) { + sve_free(task); + sve_alloc(task, true); + } - task_set_vl(task, type, vl); + if (free_sme) + sme_free(task); out: update_tsk_thread_flag(task, vec_vl_inherit_flag(type), @@ -1161,9 +1179,6 @@ void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) */ u64 read_zcr_features(void) { - u64 zcr; - unsigned int vq_max; - /* * Set the maximum possible VL, and write zeroes to all other * bits to see if they stick. @@ -1171,12 +1186,8 @@ u64 read_zcr_features(void) sve_kernel_enable(NULL); write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1); - zcr = read_sysreg_s(SYS_ZCR_EL1); - zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */ - vq_max = sve_vq_from_vl(sve_get_vl()); - zcr |= vq_max - 1; /* set LEN field to maximum effective value */ - - return zcr; + /* Return LEN value that would be written to get the maximum VL */ + return sve_vq_from_vl(sve_get_vl()) - 1; } void __init sve_setup(void) @@ -1267,9 +1278,9 @@ void fpsimd_release_task(struct task_struct *dead_task) * the interest of testability and predictability, the architecture * guarantees that when ZA is enabled it will be zeroed. */ -void sme_alloc(struct task_struct *task) +void sme_alloc(struct task_struct *task, bool flush) { - if (task->thread.sme_state) { + if (task->thread.sme_state && flush) { memset(task->thread.sme_state, 0, sme_state_size(task)); return; } @@ -1331,9 +1342,6 @@ void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) */ u64 read_smcr_features(void) { - u64 smcr; - unsigned int vq_max; - sme_kernel_enable(NULL); /* @@ -1342,12 +1350,8 @@ u64 read_smcr_features(void) write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK, SYS_SMCR_EL1); - smcr = read_sysreg_s(SYS_SMCR_EL1); - smcr &= ~(u64)SMCR_ELx_LEN_MASK; /* Only the LEN field */ - vq_max = sve_vq_from_vl(sme_get_vl()); - smcr |= vq_max - 1; /* set LEN field to maximum effective value */ - - return smcr; + /* Return LEN value that would be written to get the maximum VL */ + return sve_vq_from_vl(sme_get_vl()) - 1; } void __init sme_setup(void) @@ -1497,7 +1501,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) } sve_alloc(current, false); - sme_alloc(current); + sme_alloc(current, true); if (!current->thread.sve_state || !current->thread.sme_state) { force_sig(SIGKILL); return; @@ -1649,7 +1653,6 @@ void fpsimd_flush_thread(void) fpsimd_flush_thread_vl(ARM64_VEC_SME); current->thread.svcr = 0; - sme_smstop(); } current->thread.fp_type = FP_STATE_FPSIMD; |