summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/mips/Kconfig9
-rw-r--r--arch/mips/include/asm/asmmacro.h70
-rw-r--r--arch/mips/include/asm/msa.h28
-rw-r--r--arch/mips/include/asm/processor.h9
-rw-r--r--arch/mips/include/asm/switch_to.h22
-rw-r--r--arch/mips/include/asm/thread_info.h4
-rw-r--r--arch/mips/kernel/genex.S1
-rw-r--r--arch/mips/kernel/process.c7
-rw-r--r--arch/mips/kernel/r4k_switch.S58
-rw-r--r--arch/mips/kernel/traps.c101
10 files changed, 275 insertions, 34 deletions
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index f627739eca49..50d854ce7f59 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2090,10 +2090,11 @@ config CPU_HAS_MSA
help
MIPS SIMD Architecture (MSA) introduces 128 bit wide vector registers
and a set of SIMD instructions to operate on them. When this option
- is enabled the kernel will support detection of the MSA ASE. If you
- know that your kernel will only be running on CPUs which do not
- support MSA then you may wish to say N here to reduce the size of
- your kernel.
+ is enabled the kernel will support allocating & switching MSA
+ vector register contexts. If you know that your kernel will only be
+ running on CPUs which do not support MSA or that your userland will
+ not be making use of it then you may wish to say N here to reduce
+ the size & complexity of your kernel.
If unsure, say Y.
diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index f571db371b3e..fe3b03c793e5 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -328,4 +328,74 @@
.endm
#endif
+ .macro msa_save_all thread
+ st_d 0, THREAD_FPR0, \thread
+ st_d 1, THREAD_FPR1, \thread
+ st_d 2, THREAD_FPR2, \thread
+ st_d 3, THREAD_FPR3, \thread
+ st_d 4, THREAD_FPR4, \thread
+ st_d 5, THREAD_FPR5, \thread
+ st_d 6, THREAD_FPR6, \thread
+ st_d 7, THREAD_FPR7, \thread
+ st_d 8, THREAD_FPR8, \thread
+ st_d 9, THREAD_FPR9, \thread
+ st_d 10, THREAD_FPR10, \thread
+ st_d 11, THREAD_FPR11, \thread
+ st_d 12, THREAD_FPR12, \thread
+ st_d 13, THREAD_FPR13, \thread
+ st_d 14, THREAD_FPR14, \thread
+ st_d 15, THREAD_FPR15, \thread
+ st_d 16, THREAD_FPR16, \thread
+ st_d 17, THREAD_FPR17, \thread
+ st_d 18, THREAD_FPR18, \thread
+ st_d 19, THREAD_FPR19, \thread
+ st_d 20, THREAD_FPR20, \thread
+ st_d 21, THREAD_FPR21, \thread
+ st_d 22, THREAD_FPR22, \thread
+ st_d 23, THREAD_FPR23, \thread
+ st_d 24, THREAD_FPR24, \thread
+ st_d 25, THREAD_FPR25, \thread
+ st_d 26, THREAD_FPR26, \thread
+ st_d 27, THREAD_FPR27, \thread
+ st_d 28, THREAD_FPR28, \thread
+ st_d 29, THREAD_FPR29, \thread
+ st_d 30, THREAD_FPR30, \thread
+ st_d 31, THREAD_FPR31, \thread
+ .endm
+
+ .macro msa_restore_all thread
+ ld_d 0, THREAD_FPR0, \thread
+ ld_d 1, THREAD_FPR1, \thread
+ ld_d 2, THREAD_FPR2, \thread
+ ld_d 3, THREAD_FPR3, \thread
+ ld_d 4, THREAD_FPR4, \thread
+ ld_d 5, THREAD_FPR5, \thread
+ ld_d 6, THREAD_FPR6, \thread
+ ld_d 7, THREAD_FPR7, \thread
+ ld_d 8, THREAD_FPR8, \thread
+ ld_d 9, THREAD_FPR9, \thread
+ ld_d 10, THREAD_FPR10, \thread
+ ld_d 11, THREAD_FPR11, \thread
+ ld_d 12, THREAD_FPR12, \thread
+ ld_d 13, THREAD_FPR13, \thread
+ ld_d 14, THREAD_FPR14, \thread
+ ld_d 15, THREAD_FPR15, \thread
+ ld_d 16, THREAD_FPR16, \thread
+ ld_d 17, THREAD_FPR17, \thread
+ ld_d 18, THREAD_FPR18, \thread
+ ld_d 19, THREAD_FPR19, \thread
+ ld_d 20, THREAD_FPR20, \thread
+ ld_d 21, THREAD_FPR21, \thread
+ ld_d 22, THREAD_FPR22, \thread
+ ld_d 23, THREAD_FPR23, \thread
+ ld_d 24, THREAD_FPR24, \thread
+ ld_d 25, THREAD_FPR25, \thread
+ ld_d 26, THREAD_FPR26, \thread
+ ld_d 27, THREAD_FPR27, \thread
+ ld_d 28, THREAD_FPR28, \thread
+ ld_d 29, THREAD_FPR29, \thread
+ ld_d 30, THREAD_FPR30, \thread
+ ld_d 31, THREAD_FPR31, \thread
+ .endm
+
#endif /* _ASM_ASMMACRO_H */
diff --git a/arch/mips/include/asm/msa.h b/arch/mips/include/asm/msa.h
index 0a614fb83348..a2aba6c3ec05 100644
--- a/arch/mips/include/asm/msa.h
+++ b/arch/mips/include/asm/msa.h
@@ -12,6 +12,9 @@
#include <asm/mipsregs.h>
+extern void _save_msa(struct task_struct *);
+extern void _restore_msa(struct task_struct *);
+
static inline void enable_msa(void)
{
if (cpu_has_msa) {
@@ -36,6 +39,31 @@ static inline int is_msa_enabled(void)
return read_c0_config5() & MIPS_CONF5_MSAEN;
}
+static inline int thread_msa_context_live(void)
+{
+ /*
+ * Check cpu_has_msa only if it's a constant. This will allow the
+ * compiler to optimise out code for CPUs without MSA without adding
+ * an extra redundant check for CPUs with MSA.
+ */
+ if (__builtin_constant_p(cpu_has_msa) && !cpu_has_msa)
+ return 0;
+
+ return test_thread_flag(TIF_MSA_CTX_LIVE);
+}
+
+static inline void save_msa(struct task_struct *t)
+{
+ if (cpu_has_msa)
+ _save_msa(t);
+}
+
+static inline void restore_msa(struct task_struct *t)
+{
+ if (cpu_has_msa)
+ _restore_msa(t);
+}
+
#ifdef TOOLCHAIN_SUPPORTS_MSA
#define __BUILD_MSA_CTL_REG(name, cs) \
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 50cf4c343118..ad70cba8daff 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -96,7 +96,12 @@ extern unsigned int vced_count, vcei_count;
#define NUM_FPU_REGS 32
-#define FPU_REG_WIDTH 64
+
+#ifdef CONFIG_CPU_HAS_MSA
+# define FPU_REG_WIDTH 128
+#else
+# define FPU_REG_WIDTH 64
+#endif
union fpureg {
__u32 val32[FPU_REG_WIDTH / 32];
@@ -133,6 +138,7 @@ BUILD_FPR_ACCESS(64)
struct mips_fpu_struct {
union fpureg fpr[NUM_FPU_REGS];
unsigned int fcr31;
+ unsigned int msacsr;
};
#define NUM_DSP_REGS 6
@@ -310,6 +316,7 @@ struct thread_struct {
.fpu = { \
.fpr = {{{0,},},}, \
.fcr31 = 0, \
+ .msacsr = 0, \
}, \
/* \
* FPU affinity state (null if not FPAFF) \
diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index 278d45a09728..495c1041a2cc 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -16,22 +16,29 @@
#include <asm/watch.h>
#include <asm/dsp.h>
#include <asm/cop2.h>
+#include <asm/msa.h>
struct task_struct;
+enum {
+ FP_SAVE_NONE = 0,
+ FP_SAVE_VECTOR = -1,
+ FP_SAVE_SCALAR = 1,
+};
+
/**
* resume - resume execution of a task
* @prev: The task previously executed.
* @next: The task to begin executing.
* @next_ti: task_thread_info(next).
- * @usedfpu: Non-zero if prev's FP context should be saved.
+ * @fp_save: Which, if any, FP context to save for prev.
*
* This function is used whilst scheduling to save the context of prev & load
* the context of next. Returns prev.
*/
extern asmlinkage struct task_struct *resume(struct task_struct *prev,
struct task_struct *next, struct thread_info *next_ti,
- u32 usedfpu);
+ s32 fp_save);
extern unsigned int ll_bit;
extern struct task_struct *ll_task;
@@ -75,7 +82,8 @@ do { \
#define switch_to(prev, next, last) \
do { \
- u32 __usedfpu, __c0_stat; \
+ u32 __c0_stat; \
+ s32 __fpsave = FP_SAVE_NONE; \
__mips_mt_fpaff_switch_to(prev); \
if (cpu_has_dsp) \
__save_dsp(prev); \
@@ -88,8 +96,12 @@ do { \
write_c0_status(__c0_stat & ~ST0_CU2); \
} \
__clear_software_ll_bit(); \
- __usedfpu = test_and_clear_tsk_thread_flag(prev, TIF_USEDFPU); \
- (last) = resume(prev, next, task_thread_info(next), __usedfpu); \
+ if (test_and_clear_tsk_thread_flag(prev, TIF_USEDFPU)) \
+ __fpsave = FP_SAVE_SCALAR; \
+ if (test_and_clear_tsk_thread_flag(prev, TIF_USEDMSA)) \
+ __fpsave = FP_SAVE_VECTOR; \
+ (last) = resume(prev, next, task_thread_info(next), __fpsave); \
+ disable_msa(); \
} while (0)
#define finish_arch_switch(prev) \
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index e80ae50cae80..d2d961d6cb86 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -116,6 +116,8 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_LOAD_WATCH 25 /* If set, load watch registers */
#define TIF_SYSCALL_TRACEPOINT 26 /* syscall tracepoint instrumentation */
#define TIF_32BIT_FPREGS 27 /* 32-bit floating point registers */
+#define TIF_USEDMSA 29 /* MSA has been used this quantum */
+#define TIF_MSA_CTX_LIVE 30 /* MSA context must be preserved */
#define TIF_SYSCALL_TRACE 31 /* syscall trace active */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
@@ -133,6 +135,8 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_FPUBOUND (1<<TIF_FPUBOUND)
#define _TIF_LOAD_WATCH (1<<TIF_LOAD_WATCH)
#define _TIF_32BIT_FPREGS (1<<TIF_32BIT_FPREGS)
+#define _TIF_USEDMSA (1<<TIF_USEDMSA)
+#define _TIF_MSA_CTX_LIVE (1<<TIF_MSA_CTX_LIVE)
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
#define _TIF_WORK_SYSCALL_ENTRY (_TIF_NOHZ | _TIF_SYSCALL_TRACE | \
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index d84f6a509502..278a49bd93b6 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -477,6 +477,7 @@ NESTED(nmi_handler, PT_SIZE, sp)
BUILD_HANDLER tr tr sti silent /* #13 */
BUILD_HANDLER fpe fpe fpe silent /* #15 */
BUILD_HANDLER ftlb ftlb none silent /* #16 */
+ BUILD_HANDLER msa msa sti silent /* #21 */
BUILD_HANDLER mdmx mdmx sti silent /* #22 */
#ifdef CONFIG_HARDWARE_WATCHPOINTS
/*
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 2f01f3d48d75..60e39dc7f1eb 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -32,6 +32,7 @@
#include <asm/cpu.h>
#include <asm/dsp.h>
#include <asm/fpu.h>
+#include <asm/msa.h>
#include <asm/pgtable.h>
#include <asm/mipsregs.h>
#include <asm/processor.h>
@@ -65,6 +66,8 @@ void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp)
clear_used_math();
clear_fpu_owner();
init_dsp();
+ clear_thread_flag(TIF_MSA_CTX_LIVE);
+ disable_msa();
regs->cp0_epc = pc;
regs->regs[29] = sp;
}
@@ -89,7 +92,9 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
preempt_disable();
- if (is_fpu_owner())
+ if (is_msa_enabled())
+ save_msa(p);
+ else if (is_fpu_owner())
save_fp(p);
if (cpu_has_dsp)
diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
index cc78dd9a17c7..f938ecd22af0 100644
--- a/arch/mips/kernel/r4k_switch.S
+++ b/arch/mips/kernel/r4k_switch.S
@@ -29,18 +29,8 @@
#define ST_OFF (_THREAD_SIZE - 32 - PT_SIZE + PT_STATUS)
/*
- * FPU context is saved iff the process has used it's FPU in the current
- * time slice as indicated by _TIF_USEDFPU. In any case, the CU1 bit for user
- * space STATUS register should be 0, so that a process *always* starts its
- * userland with FPU disabled after each context switch.
- *
- * FPU will be enabled as soon as the process accesses FPU again, through
- * do_cpu() trap.
- */
-
-/*
* task_struct *resume(task_struct *prev, task_struct *next,
- * struct thread_info *next_ti, int usedfpu)
+ * struct thread_info *next_ti, s32 fp_save)
*/
.align 5
LEAF(resume)
@@ -50,23 +40,37 @@
LONG_S ra, THREAD_REG31(a0)
/*
- * check if we need to save FPU registers
+ * Check whether we need to save any FP context. FP context is saved
+ * iff the process has used the context with the scalar FPU or the MSA
+ * ASE in the current time slice, as indicated by _TIF_USEDFPU and
+ * _TIF_USEDMSA respectively. switch_to will have set fp_save
+ * accordingly to an FP_SAVE_ enum value.
*/
+ beqz a3, 2f
- beqz a3, 1f
-
- PTR_L t3, TASK_THREAD_INFO(a0)
/*
- * clear saved user stack CU1 bit
+ * We do. Clear the saved CU1 bit for prev, such that next time it is
+ * scheduled it will start in userland with the FPU disabled. If the
+ * task uses the FPU then it will be enabled again via the do_cpu trap.
+ * This allows us to lazily restore the FP context.
*/
+ PTR_L t3, TASK_THREAD_INFO(a0)
LONG_L t0, ST_OFF(t3)
li t1, ~ST0_CU1
and t0, t0, t1
LONG_S t0, ST_OFF(t3)
+ /* Check whether we're saving scalar or vector context. */
+ bgtz a3, 1f
+
+ /* Save 128b MSA vector context. */
+ msa_save_all a0
+ b 2f
+
+1: /* Save 32b/64b scalar FP context. */
fpu_save_double a0 t0 t1 # c0_status passed in t0
# clobbers t1
-1:
+2:
#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
PTR_LA t8, __stack_chk_guard
@@ -141,6 +145,26 @@ LEAF(_restore_fp)
jr ra
END(_restore_fp)
+#ifdef CONFIG_CPU_HAS_MSA
+
+/*
+ * Save a thread's MSA vector context.
+ */
+LEAF(_save_msa)
+ msa_save_all a0
+ jr ra
+ END(_save_msa)
+
+/*
+ * Restore a thread's MSA vector context.
+ */
+LEAF(_restore_msa)
+ msa_restore_all a0
+ jr ra
+ END(_restore_msa)
+
+#endif
+
/*
* Load the FPU with signalling NANS. This bit pattern we're using has
* the property that no matter whether considered as single or as double
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index b0c7f80e05e5..4f27cbfad1e5 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -47,6 +47,7 @@
#include <asm/mipsregs.h>
#include <asm/mipsmtregs.h>
#include <asm/module.h>
+#include <asm/msa.h>
#include <asm/pgtable.h>
#include <asm/ptrace.h>
#include <asm/sections.h>
@@ -79,6 +80,7 @@ extern asmlinkage void handle_ov(void);
extern asmlinkage void handle_tr(void);
extern asmlinkage void handle_fpe(void);
extern asmlinkage void handle_ftlb(void);
+extern asmlinkage void handle_msa(void);
extern asmlinkage void handle_mdmx(void);
extern asmlinkage void handle_watch(void);
extern asmlinkage void handle_mt(void);
@@ -1074,6 +1076,76 @@ static int default_cu2_call(struct notifier_block *nfb, unsigned long action,
return NOTIFY_OK;
}
+static int enable_restore_fp_context(int msa)
+{
+ int err, was_fpu_owner;
+
+ if (!used_math()) {
+ /* First time FP context user. */
+ err = init_fpu();
+ if (msa && !err)
+ enable_msa();
+ if (!err)
+ set_used_math();
+ return err;
+ }
+
+ /*
+ * This task has formerly used the FP context.
+ *
+ * If this thread has no live MSA vector context then we can simply
+ * restore the scalar FP context. If it has live MSA vector context
+ * (that is, it has or may have used MSA since last performing a
+ * function call) then we'll need to restore the vector context. This
+ * applies even if we're currently only executing a scalar FP
+ * instruction. This is because if we were to later execute an MSA
+ * instruction then we'd either have to:
+ *
+ * - Restore the vector context & clobber any registers modified by
+ * scalar FP instructions between now & then.
+ *
+ * or
+ *
+ * - Not restore the vector context & lose the most significant bits
+ * of all vector registers.
+ *
+ * Neither of those options is acceptable. We cannot restore the least
+ * significant bits of the registers now & only restore the most
+ * significant bits later because the most significant bits of any
+ * vector registers whose aliased FP register is modified now will have
+ * been zeroed. We'd have no way to know that when restoring the vector
+ * context & thus may load an outdated value for the most significant
+ * bits of a vector register.
+ */
+ if (!msa && !thread_msa_context_live())
+ return own_fpu(1);
+
+ /*
+ * This task is using or has previously used MSA. Thus we require
+ * that Status.FR == 1.
+ */
+ was_fpu_owner = is_fpu_owner();
+ err = own_fpu(0);
+ if (err)
+ return err;
+
+ enable_msa();
+ write_msa_csr(current->thread.fpu.msacsr);
+ set_thread_flag(TIF_USEDMSA);
+
+ /*
+ * If this is the first time that the task is using MSA and it has
+ * previously used scalar FP in this time slice then we already nave
+ * FP context which we shouldn't clobber.
+ */
+ if (!test_and_set_thread_flag(TIF_MSA_CTX_LIVE) && was_fpu_owner)
+ return 0;
+
+ /* We need to restore the vector context. */
+ restore_msa(current);
+ return 0;
+}
+
asmlinkage void do_cpu(struct pt_regs *regs)
{
enum ctx_state prev_state;
@@ -1153,12 +1225,7 @@ asmlinkage void do_cpu(struct pt_regs *regs)
/* Fall through. */
case 1:
- if (used_math()) /* Using the FPU again. */
- err = own_fpu(1);
- else { /* First time FPU user. */
- err = init_fpu();
- set_used_math();
- }
+ err = enable_restore_fp_context(0);
if (!raw_cpu_has_fpu || err) {
int sig;
@@ -1183,6 +1250,27 @@ out:
exception_exit(prev_state);
}
+asmlinkage void do_msa(struct pt_regs *regs)
+{
+ enum ctx_state prev_state;
+ int err;
+
+ prev_state = exception_enter();
+
+ if (!cpu_has_msa || test_thread_flag(TIF_32BIT_FPREGS)) {
+ force_sig(SIGILL, current);
+ goto out;
+ }
+
+ die_if_kernel("do_msa invoked from kernel context!", regs);
+
+ err = enable_restore_fp_context(1);
+ if (err)
+ force_sig(SIGILL, current);
+out:
+ exception_exit(prev_state);
+}
+
asmlinkage void do_mdmx(struct pt_regs *regs)
{
enum ctx_state prev_state;
@@ -2041,6 +2129,7 @@ void __init trap_init(void)
set_except_vector(15, handle_fpe);
set_except_vector(16, handle_ftlb);
+ set_except_vector(21, handle_msa);
set_except_vector(22, handle_mdmx);
if (cpu_has_mcheck)