nds32: nds32 FPU port
authorVincent Chen <vincentc@andestech.com>
Thu, 22 Nov 2018 03:14:34 +0000 (11:14 +0800)
committerGreentime Hu <greentime@andestech.com>
Thu, 22 Nov 2018 10:13:13 +0000 (18:13 +0800)
This patch set contains basic components for supporting the nds32 FPU,
such as exception handlers and context switch for FPU registers. By
default, the lazy FPU scheme is supported and the user can configure it via
CONFIG_LZAY_FPU.

Signed-off-by: Vincent Chen <vincentc@andestech.com>
Acked-by: Greentime Hu <greentime@andestech.com>
Signed-off-by: Greentime Hu <greentime@andestech.com>
17 files changed:
arch/nds32/Kconfig
arch/nds32/Kconfig.cpu
arch/nds32/Makefile
arch/nds32/include/asm/bitfield.h
arch/nds32/include/asm/fpu.h [new file with mode: 0644]
arch/nds32/include/asm/processor.h
arch/nds32/include/uapi/asm/sigcontext.h
arch/nds32/kernel/Makefile
arch/nds32/kernel/ex-entry.S
arch/nds32/kernel/ex-exit.S
arch/nds32/kernel/ex-scall.S
arch/nds32/kernel/fpu.c [new file with mode: 0644]
arch/nds32/kernel/process.c
arch/nds32/kernel/setup.c
arch/nds32/kernel/signal.c
arch/nds32/kernel/sleep.S
arch/nds32/kernel/traps.c

index 5a11772a514d6924e47ab327c497930a26346e5e..41cffe3de0c3f150ad51374a4ba6b637e280fa62 100644 (file)
@@ -29,6 +29,7 @@ config NDS32
        select HANDLE_DOMAIN_IRQ
        select HAVE_ARCH_TRACEHOOK
        select HAVE_DEBUG_KMEMLEAK
+       select HAVE_EXIT_THREAD
        select HAVE_REGS_AND_STACK_ACCESS_API
        select HAVE_PERF_EVENTS
        select IRQ_DOMAIN
index b8c8984d145616c48083d1d0b63ae45ae1a177b9..bb06a1b7eef0c0914b1dc7013f7774a960088e7b 100644 (file)
@@ -7,6 +7,27 @@ config CPU_LITTLE_ENDIAN
        bool "Little endian"
        default y
 
+config FPU
+       bool "FPU support"
+       default n
+       help
+         If FPU ISA is used in user space, this configuration shall be Y to
+          enable required support in kerenl such as fpu context switch and
+          fpu exception handler.
+
+         If no FPU ISA is used in user space, say N.
+
+config LAZY_FPU
+       bool "lazy FPU support"
+       depends on FPU
+       default y
+       help
+         Say Y here to enable the lazy FPU scheme. The lazy FPU scheme can
+          enhance system performance by reducing the context switch
+         frequency of the FPU register.
+
+         For nomal case, say Y.
+
 config HWZOL
        bool "hardware zero overhead loop support"
        depends on CPU_D10 || CPU_D15
index 9f525ed700498254a903c8b680f30f2bf08be321..6dc03206e3c94c7f7456b86a14ec4a244c4163b0 100644 (file)
@@ -5,10 +5,14 @@ KBUILD_DEFCONFIG := defconfig
 
 comma = ,
 
+
 ifdef CONFIG_FUNCTION_TRACER
 arch-y += -malways-save-lp -mno-relax
 endif
 
+# Avoid generating FPU instructions
+arch-y  += -mno-ext-fpu-sp -mno-ext-fpu-dp -mfloat-abi=soft
+
 KBUILD_CFLAGS  += $(call cc-option, -mno-sched-prolog-epilog)
 KBUILD_CFLAGS  += -mcmodel=large
 
index 19b2841219adfeeab42d37edcfb5c8ccd87d68d1..c1619730192adf8a854666b6f5bafec87c2f3cdb 100644 (file)
 #define ITYPE_mskSTYPE         ( 0xF  << ITYPE_offSTYPE )
 #define ITYPE_mskCPID          ( 0x3  << ITYPE_offCPID )
 
+/* Additional definitions of ITYPE register for FPU */
+#define FPU_DISABLE_EXCEPTION  (0x1  << ITYPE_offSTYPE)
+#define FPU_EXCEPTION          (0x2  << ITYPE_offSTYPE)
+#define FPU_CPID               0       /* FPU Co-Processor ID is 0 */
+
 #define NDS32_VECTOR_mskNONEXCEPTION   0x78
 #define NDS32_VECTOR_offEXCEPTION      8
 #define NDS32_VECTOR_offINTERRUPT      9
 #define FPCSR_mskDNIT           ( 0x1  << FPCSR_offDNIT )
 #define FPCSR_mskRIT           ( 0x1  << FPCSR_offRIT )
 #define FPCSR_mskALL           (FPCSR_mskIVO | FPCSR_mskDBZ | FPCSR_mskOVF | FPCSR_mskUDF | FPCSR_mskIEX)
+#define FPCSR_mskALLE_NO_UDFE  (FPCSR_mskIVOE | FPCSR_mskDBZE | FPCSR_mskOVFE | FPCSR_mskIEXE)
 #define FPCSR_mskALLE          (FPCSR_mskIVOE | FPCSR_mskDBZE | FPCSR_mskOVFE | FPCSR_mskUDFE | FPCSR_mskIEXE)
 #define FPCSR_mskALLT           (FPCSR_mskIVOT | FPCSR_mskDBZT | FPCSR_mskOVFT | FPCSR_mskUDFT | FPCSR_mskIEXT |FPCSR_mskDNIT | FPCSR_mskRIT)
 
 #define FPCFG_mskIMVER         ( 0x1F  << FPCFG_offIMVER )
 #define FPCFG_mskAVER          ( 0x1F  << FPCFG_offAVER )
 
+/* 8 Single precision or 4 double precision registers are available */
+#define SP8_DP4_reg            0
+/* 16 Single precision or 8 double precision registers are available */
+#define SP16_DP8_reg           1
+/* 32 Single precision or 16 double precision registers are available */
+#define SP32_DP16_reg          2
+/* 32 Single precision or 32 double precision registers are available */
+#define SP32_DP32_reg          3
+
 /******************************************************************************
  * fucpr: FUCOP_CTL (FPU and Coprocessor Enable Control Register)
  *****************************************************************************/
diff --git a/arch/nds32/include/asm/fpu.h b/arch/nds32/include/asm/fpu.h
new file mode 100644 (file)
index 0000000..f7a7f6b
--- /dev/null
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2005-2018 Andes Technology Corporation */
+
+#ifndef __ASM_NDS32_FPU_H
+#define __ASM_NDS32_FPU_H
+
+#if IS_ENABLED(CONFIG_FPU)
+#ifndef __ASSEMBLY__
+#include <linux/sched/task_stack.h>
+#include <linux/preempt.h>
+#include <asm/ptrace.h>
+
+extern bool has_fpu;
+
+extern void save_fpu(struct task_struct *__tsk);
+extern void load_fpu(const struct fpu_struct *fpregs);
+extern bool do_fpu_exception(unsigned int subtype, struct pt_regs *regs);
+
+#define test_tsk_fpu(regs)     (regs->fucop_ctl & FUCOP_CTL_mskCP0EN)
+
+/*
+ * Initially load the FPU with signalling NANS.  This bit pattern
+ * has the property that no matter whether considered as single or as
+ * double precision, it still represents a signalling NAN.
+ */
+
+#define sNAN64    0xFFFFFFFFFFFFFFFFULL
+#define sNAN32    0xFFFFFFFFUL
+
+#define FPCSR_INIT  0x0UL
+
+extern const struct fpu_struct init_fpuregs;
+
+static inline void disable_ptreg_fpu(struct pt_regs *regs)
+{
+       regs->fucop_ctl &= ~FUCOP_CTL_mskCP0EN;
+}
+
+static inline void enable_ptreg_fpu(struct pt_regs *regs)
+{
+       regs->fucop_ctl |= FUCOP_CTL_mskCP0EN;
+}
+
+static inline void enable_fpu(void)
+{
+       unsigned long fucop_ctl;
+
+       fucop_ctl = __nds32__mfsr(NDS32_SR_FUCOP_CTL) | FUCOP_CTL_mskCP0EN;
+       __nds32__mtsr(fucop_ctl, NDS32_SR_FUCOP_CTL);
+       __nds32__isb();
+}
+
+static inline void disable_fpu(void)
+{
+       unsigned long fucop_ctl;
+
+       fucop_ctl = __nds32__mfsr(NDS32_SR_FUCOP_CTL) & ~FUCOP_CTL_mskCP0EN;
+       __nds32__mtsr(fucop_ctl, NDS32_SR_FUCOP_CTL);
+       __nds32__isb();
+}
+
+static inline void lose_fpu(void)
+{
+       preempt_disable();
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+       if (last_task_used_math == current) {
+               last_task_used_math = NULL;
+#else
+       if (test_tsk_fpu(task_pt_regs(current))) {
+#endif
+               save_fpu(current);
+       }
+       disable_ptreg_fpu(task_pt_regs(current));
+       preempt_enable();
+}
+
+static inline void own_fpu(void)
+{
+       preempt_disable();
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+       if (last_task_used_math != current) {
+               if (last_task_used_math != NULL)
+                       save_fpu(last_task_used_math);
+               load_fpu(&current->thread.fpu);
+               last_task_used_math = current;
+       }
+#else
+       if (!test_tsk_fpu(task_pt_regs(current))) {
+               load_fpu(&current->thread.fpu);
+       }
+#endif
+       enable_ptreg_fpu(task_pt_regs(current));
+       preempt_enable();
+}
+
+#if !IS_ENABLED(CONFIG_LAZY_FPU)
+static inline void unlazy_fpu(struct task_struct *tsk)
+{
+       preempt_disable();
+       if (test_tsk_fpu(task_pt_regs(tsk)))
+               save_fpu(tsk);
+       preempt_enable();
+}
+#endif /* !CONFIG_LAZY_FPU */
+static inline void clear_fpu(struct pt_regs *regs)
+{
+       preempt_disable();
+       if (test_tsk_fpu(regs))
+               disable_ptreg_fpu(regs);
+       preempt_enable();
+}
+#endif /* CONFIG_FPU */
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_NDS32_FPU_H */
index c2660f566baca99a7f88e401015e6b0816a5c52b..72024f8bc129035de9367277b35203f09fe40d80 100644 (file)
@@ -35,6 +35,8 @@ struct thread_struct {
        unsigned long address;
        unsigned long trap_no;
        unsigned long error_code;
+
+       struct fpu_struct fpu;
 };
 
 #define INIT_THREAD  { }
@@ -72,6 +74,11 @@ struct task_struct;
 
 /* Free all resources held by a thread. */
 #define release_thread(thread) do { } while(0)
+#if IS_ENABLED(CONFIG_FPU)
+#if !IS_ENABLED(CONFIG_UNLAZU_FPU)
+extern struct task_struct *last_task_used_math;
+#endif
+#endif
 
 /* Prepare to copy thread state - unlazy all lazy status */
 #define prepare_to_copy(tsk)   do { } while (0)
index 00567b237b0c297cfbd2a86d11f5c4a20b4c2a91..1257a78e3ae19ffd35fb2b80e6eeb0e65571dfe7 100644 (file)
@@ -9,6 +9,10 @@
  * before the signal handler was invoked.  Note: only add new entries
  * to the end of the structure.
  */
+struct fpu_struct {
+       unsigned long long fd_regs[32];
+       unsigned long fpcsr;
+};
 
 struct zol_struct {
        unsigned long nds32_lc; /* $LC */
@@ -54,6 +58,7 @@ struct sigcontext {
        unsigned long fault_address;
        unsigned long used_math_flag;
        /* FPU Registers */
+       struct fpu_struct fpu;
        struct zol_struct zol;
 };
 
index 8d62f2ecb1ab88d84da58f891690b1e7126e8c36..a1a1d61509e554b65a218e01c4f86d9959c0ce90 100644 (file)
@@ -13,12 +13,16 @@ obj-y                       := ex-entry.o ex-exit.o ex-scall.o irq.o \
 
 obj-$(CONFIG_MODULES)          += nds32_ksyms.o module.o
 obj-$(CONFIG_STACKTRACE)       += stacktrace.o
+obj-$(CONFIG_FPU)              += fpu.o
 obj-$(CONFIG_OF)               += devtree.o
 obj-$(CONFIG_CACHE_L2)         += atl2c.o
 obj-$(CONFIG_PERF_EVENTS) += perf_event_cpu.o
 obj-$(CONFIG_PM)               += pm.o sleep.o
 extra-y := head.o vmlinux.lds
 
+CFLAGS_fpu.o += -mext-fpu-sp -mext-fpu-dp
+
+
 obj-y                          += vdso/
 
 obj-$(CONFIG_FUNCTION_TRACER)   += ftrace.o
index 21a144071566989af1daa55400a0c9fab98b34ee..107d98a1d1b851758c7d1b764008bece438e0a99 100644 (file)
@@ -7,6 +7,7 @@
 #include <asm/errno.h>
 #include <asm/asm-offsets.h>
 #include <asm/page.h>
+#include <asm/fpu.h>
 
 #ifdef CONFIG_HWZOL
        .macro push_zol
        mfusr   $r16, $LC
        .endm
 #endif
+       .macro  skip_save_fucop_ctl
+#if defined(CONFIG_FPU)
+skip_fucop_ctl:
+       smw.adm $p0, [$sp], $p0, #0x1
+       j fucop_ctl_done
+#endif
+       .endm
 
        .macro  save_user_regs
-
+#if defined(CONFIG_FPU)
+       sethi   $p0, hi20(has_fpu)
+       lbsi    $p0, [$p0+lo12(has_fpu)]
+       beqz    $p0, skip_fucop_ctl
+       mfsr    $p0, $FUCOP_CTL
+       smw.adm $p0, [$sp], $p0, #0x1
+       bclr    $p0, $p0, #FUCOP_CTL_offCP0EN
+       mtsr    $p0, $FUCOP_CTL
+fucop_ctl_done:
+       /* move $SP to the bottom of pt_regs */
+       addi    $sp, $sp, -FUCOP_CTL_OFFSET
+#else
        smw.adm $sp, [$sp], $sp, #0x1
        /* move $SP to the bottom of pt_regs */
        addi    $sp, $sp, -OSP_OFFSET
+#endif
 
        /* push $r0 ~ $r25 */
        smw.bim $r0, [$sp], $r25
@@ -79,6 +99,7 @@ exception_handlers:
        .long   eh_syscall              !Syscall
        .long   asm_do_IRQ              !IRQ
 
+       skip_save_fucop_ctl
 common_exception_handler:
        save_user_regs
        mfsr    $p0, $ITYPE
@@ -103,7 +124,6 @@ common_exception_handler:
        mtsr    $r21, $PSW
        dsb
        jr      $p1
-
        /* syscall */
 1:
        addi    $p1, $p0, #-NDS32_VECTOR_offEXCEPTION
index f00af92f7e22fde904fc6e54999a9ebcdde7e950..97ba15cd4180b09bc59aeb7fcdbe2eede8c766dd 100644 (file)
@@ -8,6 +8,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/current.h>
+#include <asm/fpu.h>
 
 
 
        .macro  restore_user_regs_first
        setgie.d
        isb
-
+#if defined(CONFIG_FPU)
+       addi    $sp, $sp, OSP_OFFSET
+       lmw.adm $r12, [$sp], $r25, #0x0
+       sethi   $p0, hi20(has_fpu)
+       lbsi    $p0, [$p0+lo12(has_fpu)]
+       beqz    $p0, 2f
+       mtsr    $r25, $FUCOP_CTL
+2:
+#else
        addi    $sp, $sp, FUCOP_CTL_OFFSET
-
        lmw.adm $r12, [$sp], $r24, #0x0
+#endif
        mtsr    $r12, $SP_USR
        mtsr    $r13, $IPC
 #ifdef CONFIG_HWZOL
index 36aa87ecdabddc06d9ec4219a9ff47125a242921..270050f1b7b1d201ad06da2429868423874340c9 100644 (file)
@@ -19,11 +19,13 @@ ENTRY(__switch_to)
 
        la      $p0, __entry_task
        sw      $r1, [$p0]
-       move    $p1, $r0
-       addi    $p1, $p1, #THREAD_CPU_CONTEXT
+       addi    $p1, $r0, #THREAD_CPU_CONTEXT
        smw.bi  $r6, [$p1], $r14, #0xb          ! push r6~r14, fp, lp, sp
        move    $r25, $r1
-       addi    $r1, $r1, #THREAD_CPU_CONTEXT
+#if defined(CONFIG_FPU)
+       call    _switch_fpu
+#endif
+       addi    $r1, $r25, #THREAD_CPU_CONTEXT
        lmw.bi  $r6, [$r1], $r14, #0xb          ! pop r6~r14, fp, lp, sp
        ret
 
diff --git a/arch/nds32/kernel/fpu.c b/arch/nds32/kernel/fpu.c
new file mode 100644 (file)
index 0000000..e55a1e1
--- /dev/null
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/sched/signal.h>
+#include <asm/processor.h>
+#include <asm/user.h>
+#include <asm/io.h>
+#include <asm/bitfield.h>
+#include <asm/fpu.h>
+
+const struct fpu_struct init_fpuregs = {
+       .fd_regs = {[0 ... 31] = sNAN64},
+       .fpcsr = FPCSR_INIT
+};
+
+void save_fpu(struct task_struct *tsk)
+{
+       unsigned int fpcfg, fpcsr;
+
+       enable_fpu();
+       fpcfg = ((__nds32__fmfcfg() & FPCFG_mskFREG) >> FPCFG_offFREG);
+       switch (fpcfg) {
+       case SP32_DP32_reg:
+               asm volatile ("fsdi $fd31, [%0+0xf8]\n\t"
+                             "fsdi $fd30, [%0+0xf0]\n\t"
+                             "fsdi $fd29, [%0+0xe8]\n\t"
+                             "fsdi $fd28, [%0+0xe0]\n\t"
+                             "fsdi $fd27, [%0+0xd8]\n\t"
+                             "fsdi $fd26, [%0+0xd0]\n\t"
+                             "fsdi $fd25, [%0+0xc8]\n\t"
+                             "fsdi $fd24, [%0+0xc0]\n\t"
+                             "fsdi $fd23, [%0+0xb8]\n\t"
+                             "fsdi $fd22, [%0+0xb0]\n\t"
+                             "fsdi $fd21, [%0+0xa8]\n\t"
+                             "fsdi $fd20, [%0+0xa0]\n\t"
+                             "fsdi $fd19, [%0+0x98]\n\t"
+                             "fsdi $fd18, [%0+0x90]\n\t"
+                             "fsdi $fd17, [%0+0x88]\n\t"
+                             "fsdi $fd16, [%0+0x80]\n\t"
+                             : /* no output */
+                             : "r" (&tsk->thread.fpu)
+                             : "memory");
+               /* fall through */
+       case SP32_DP16_reg:
+               asm volatile ("fsdi $fd15, [%0+0x78]\n\t"
+                             "fsdi $fd14, [%0+0x70]\n\t"
+                             "fsdi $fd13, [%0+0x68]\n\t"
+                             "fsdi $fd12, [%0+0x60]\n\t"
+                             "fsdi $fd11, [%0+0x58]\n\t"
+                             "fsdi $fd10, [%0+0x50]\n\t"
+                             "fsdi $fd9,  [%0+0x48]\n\t"
+                             "fsdi $fd8,  [%0+0x40]\n\t"
+                             : /* no output */
+                             : "r" (&tsk->thread.fpu)
+                             : "memory");
+               /* fall through */
+       case SP16_DP8_reg:
+               asm volatile ("fsdi $fd7,  [%0+0x38]\n\t"
+                             "fsdi $fd6,  [%0+0x30]\n\t"
+                             "fsdi $fd5,  [%0+0x28]\n\t"
+                             "fsdi $fd4,  [%0+0x20]\n\t"
+                             : /* no output */
+                             : "r" (&tsk->thread.fpu)
+                             : "memory");
+               /* fall through */
+       case SP8_DP4_reg:
+               asm volatile ("fsdi $fd3,  [%1+0x18]\n\t"
+                             "fsdi $fd2,  [%1+0x10]\n\t"
+                             "fsdi $fd1,  [%1+0x8]\n\t"
+                             "fsdi $fd0,  [%1+0x0]\n\t"
+                             "fmfcsr   %0\n\t"
+                             "swi  %0, [%1+0x100]\n\t"
+                             : "=&r" (fpcsr)
+                             : "r"(&tsk->thread.fpu)
+                             : "memory");
+       }
+       disable_fpu();
+}
+
+void load_fpu(const struct fpu_struct *fpregs)
+{
+       unsigned int fpcfg, fpcsr;
+
+       enable_fpu();
+       fpcfg = ((__nds32__fmfcfg() & FPCFG_mskFREG) >> FPCFG_offFREG);
+       switch (fpcfg) {
+       case SP32_DP32_reg:
+               asm volatile ("fldi $fd31, [%0+0xf8]\n\t"
+                             "fldi $fd30, [%0+0xf0]\n\t"
+                             "fldi $fd29, [%0+0xe8]\n\t"
+                             "fldi $fd28, [%0+0xe0]\n\t"
+                             "fldi $fd27, [%0+0xd8]\n\t"
+                             "fldi $fd26, [%0+0xd0]\n\t"
+                             "fldi $fd25, [%0+0xc8]\n\t"
+                             "fldi $fd24, [%0+0xc0]\n\t"
+                             "fldi $fd23, [%0+0xb8]\n\t"
+                             "fldi $fd22, [%0+0xb0]\n\t"
+                             "fldi $fd21, [%0+0xa8]\n\t"
+                             "fldi $fd20, [%0+0xa0]\n\t"
+                             "fldi $fd19, [%0+0x98]\n\t"
+                             "fldi $fd18, [%0+0x90]\n\t"
+                             "fldi $fd17, [%0+0x88]\n\t"
+                             "fldi $fd16, [%0+0x80]\n\t"
+                             : /* no output */
+                             : "r" (fpregs));
+               /* fall through */
+       case SP32_DP16_reg:
+               asm volatile ("fldi $fd15, [%0+0x78]\n\t"
+                             "fldi $fd14, [%0+0x70]\n\t"
+                             "fldi $fd13, [%0+0x68]\n\t"
+                             "fldi $fd12, [%0+0x60]\n\t"
+                             "fldi $fd11, [%0+0x58]\n\t"
+                             "fldi $fd10, [%0+0x50]\n\t"
+                             "fldi $fd9,  [%0+0x48]\n\t"
+                             "fldi $fd8,  [%0+0x40]\n\t"
+                             : /* no output */
+                             : "r" (fpregs));
+               /* fall through */
+       case SP16_DP8_reg:
+               asm volatile ("fldi $fd7,  [%0+0x38]\n\t"
+                             "fldi $fd6,  [%0+0x30]\n\t"
+                             "fldi $fd5,  [%0+0x28]\n\t"
+                             "fldi $fd4,  [%0+0x20]\n\t"
+                             : /* no output */
+                             : "r" (fpregs));
+               /* fall through */
+       case SP8_DP4_reg:
+               asm volatile ("fldi $fd3,  [%1+0x18]\n\t"
+                             "fldi $fd2,  [%1+0x10]\n\t"
+                             "fldi $fd1,  [%1+0x8]\n\t"
+                             "fldi $fd0,  [%1+0x0]\n\t"
+                             "lwi  %0, [%1+0x100]\n\t"
+                             "fmtcsr   %0\n\t":"=&r" (fpcsr)
+                             : "r"(fpregs));
+       }
+       disable_fpu();
+}
+void store_fpu_for_suspend(void)
+{
+#ifdef CONFIG_LAZY_FPU
+       if (last_task_used_math != NULL)
+               save_fpu(last_task_used_math);
+       last_task_used_math = NULL;
+#else
+       if (!used_math())
+               return;
+       unlazy_fpu(current);
+#endif
+       clear_fpu(task_pt_regs(current));
+}
+inline void do_fpu_context_switch(struct pt_regs *regs)
+{
+       /* Enable to use FPU. */
+
+       if (!user_mode(regs)) {
+               pr_err("BUG: FPU is used in kernel mode.\n");
+               BUG();
+               return;
+       }
+
+       enable_ptreg_fpu(regs);
+#ifdef CONFIG_LAZY_FPU //Lazy FPU is used
+       if (last_task_used_math == current)
+               return;
+       if (last_task_used_math != NULL)
+               /* Other processes fpu state, save away */
+               save_fpu(last_task_used_math);
+       last_task_used_math = current;
+#endif
+       if (used_math()) {
+               load_fpu(&current->thread.fpu);
+       } else {
+               /* First time FPU user.  */
+               load_fpu(&init_fpuregs);
+               set_used_math();
+       }
+
+}
+
+inline void fill_sigfpe_signo(unsigned int fpcsr, int *signo)
+{
+       if (fpcsr & FPCSR_mskOVFT)
+               *signo = FPE_FLTOVF;
+       else if (fpcsr & FPCSR_mskUDFT)
+               *signo = FPE_FLTUND;
+       else if (fpcsr & FPCSR_mskIVOT)
+               *signo = FPE_FLTINV;
+       else if (fpcsr & FPCSR_mskDBZT)
+               *signo = FPE_FLTDIV;
+       else if (fpcsr & FPCSR_mskIEXT)
+               *signo = FPE_FLTRES;
+}
+
+inline void handle_fpu_exception(struct pt_regs *regs)
+{
+       unsigned int fpcsr;
+       int si_code = 0, si_signo = SIGFPE;
+
+       lose_fpu();
+       fpcsr = current->thread.fpu.fpcsr;
+
+       if (fpcsr & FPCSR_mskRIT) {
+               if (!user_mode(regs))
+                       do_exit(SIGILL);
+               si_signo = SIGILL;
+               show_regs(regs);
+               si_code = ILL_COPROC;
+       } else
+               fill_sigfpe_signo(fpcsr, &si_code);
+       force_sig_fault(si_signo, si_code,
+                       (void __user *)instruction_pointer(regs), current);
+}
+
+bool do_fpu_exception(unsigned int subtype, struct pt_regs *regs)
+{
+       int done = true;
+       /* Coprocessor disabled exception */
+       if (subtype == FPU_DISABLE_EXCEPTION) {
+               preempt_disable();
+               do_fpu_context_switch(regs);
+               preempt_enable();
+       }
+       /* Coprocessor exception such as underflow and overflow */
+       else if (subtype == FPU_EXCEPTION)
+               handle_fpu_exception(regs);
+       else
+               done = false;
+       return done;
+}
index 65fda986e55f3ae3adb7b42fb954bd91de3ef7c2..ab7ab46234b1475c106d8adb9a79e9eff0de6f12 100644 (file)
@@ -9,15 +9,16 @@
 #include <linux/uaccess.h>
 #include <asm/elf.h>
 #include <asm/proc-fns.h>
+#include <asm/fpu.h>
 #include <linux/ptrace.h>
 #include <linux/reboot.h>
 
-extern void setup_mm_for_reboot(char mode);
-#ifdef CONFIG_PROC_FS
-struct proc_dir_entry *proc_dir_cpu;
-EXPORT_SYMBOL(proc_dir_cpu);
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+struct task_struct *last_task_used_math;
 #endif
 
+extern void setup_mm_for_reboot(char mode);
+
 extern inline void arch_reset(char mode)
 {
        if (mode == 's') {
@@ -125,15 +126,31 @@ void show_regs(struct pt_regs *regs)
 
 EXPORT_SYMBOL(show_regs);
 
+void exit_thread(struct task_struct *tsk)
+{
+#if defined(CONFIG_FPU) && defined(CONFIG_LAZY_FPU)
+       if (last_task_used_math == tsk)
+               last_task_used_math = NULL;
+#endif
+}
+
 void flush_thread(void)
 {
+#if defined(CONFIG_FPU)
+       clear_fpu(task_pt_regs(current));
+       clear_used_math();
+# ifdef CONFIG_LAZY_FPU
+       if (last_task_used_math == current)
+               last_task_used_math = NULL;
+# endif
+#endif
 }
 
 DEFINE_PER_CPU(struct task_struct *, __entry_task);
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 int copy_thread(unsigned long clone_flags, unsigned long stack_start,
-           unsigned long stk_sz, struct task_struct *p)
+               unsigned long stk_sz, struct task_struct *p)
 {
        struct pt_regs *childregs = task_pt_regs(p);
 
@@ -159,6 +176,22 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
        p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
        p->thread.cpu_context.sp = (unsigned long)childregs;
 
+#if IS_ENABLED(CONFIG_FPU)
+       if (used_math()) {
+# if !IS_ENABLED(CONFIG_LAZY_FPU)
+               unlazy_fpu(current);
+# else
+               preempt_disable();
+               if (last_task_used_math == current)
+                       save_fpu(current);
+               preempt_enable();
+# endif
+               p->thread.fpu = current->thread.fpu;
+               clear_fpu(task_pt_regs(p));
+               set_stopped_child_used_math(p);
+       }
+#endif
+
 #ifdef CONFIG_HWZOL
        childregs->lb = 0;
        childregs->le = 0;
@@ -168,12 +201,33 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
        return 0;
 }
 
+#if IS_ENABLED(CONFIG_FPU)
+struct task_struct *_switch_fpu(struct task_struct *prev, struct task_struct *next)
+{
+#if !IS_ENABLED(CONFIG_LAZY_FPU)
+       unlazy_fpu(prev);
+#endif
+       if (!(next->flags & PF_KTHREAD))
+               clear_fpu(task_pt_regs(next));
+       return prev;
+}
+#endif
+
 /*
  * fill in the fpe structure for a core dump...
  */
 int dump_fpu(struct pt_regs *regs, elf_fpregset_t * fpu)
 {
        int fpvalid = 0;
+#if IS_ENABLED(CONFIG_FPU)
+       struct task_struct *tsk = current;
+
+       fpvalid = tsk_used_math(tsk);
+       if (fpvalid) {
+               lose_fpu();
+               memcpy(fpu, &tsk->thread.fpu, sizeof(*fpu));
+       }
+#endif
        return fpvalid;
 }
 
index eacc79024879bad1d6948b4b872de78f6316befe..d7f5657bc638323d1317d58955fb56bbccbab7c2 100644 (file)
@@ -15,6 +15,7 @@
 #include <asm/proc-fns.h>
 #include <asm/cache_info.h>
 #include <asm/elf.h>
+#include <asm/fpu.h>
 #include <nds32_intrinsic.h>
 
 #define HWCAP_MFUSR_PC         0x000001
@@ -40,6 +41,7 @@
 #define HWCAP_DX_REGS          0x100000
 
 unsigned long cpu_id, cpu_rev, cpu_cfgid;
+bool has_fpu = false;
 char cpu_series;
 char *endianness = NULL;
 
@@ -136,6 +138,11 @@ static void __init dump_cpu_info(int cpu)
                    (aliasing_num - 1) << PAGE_SHIFT;
        }
 #endif
+#ifdef CONFIG_FPU
+       /* Disable fpu and enable when it is used. */
+       if (has_fpu)
+               disable_fpu();
+#endif
 }
 
 static void __init setup_cpuinfo(void)
@@ -180,9 +187,10 @@ static void __init setup_cpuinfo(void)
        if (cpu_cfgid & 0x0004)
                elf_hwcap |= HWCAP_EXT2;
 
-       if (cpu_cfgid & 0x0008)
+       if (cpu_cfgid & 0x0008) {
                elf_hwcap |= HWCAP_FPU;
-
+               has_fpu = true;
+       }
        if (cpu_cfgid & 0x0010)
                elf_hwcap |= HWCAP_STRING;
 
index 5d01f6e33cb8e6469810fdc358b4960cabb1e42e..5b5be082cfa40896b21fc4c7c2115a70066fef3d 100644 (file)
@@ -12,6 +12,7 @@
 #include <asm/cacheflush.h>
 #include <asm/ucontext.h>
 #include <asm/unistd.h>
+#include <asm/fpu.h>
 
 #include <asm/ptrace.h>
 #include <asm/vdso.h>
@@ -20,6 +21,60 @@ struct rt_sigframe {
        struct siginfo info;
        struct ucontext uc;
 };
+#if IS_ENABLED(CONFIG_FPU)
+static inline int restore_sigcontext_fpu(struct pt_regs *regs,
+                                        struct sigcontext __user *sc)
+{
+       struct task_struct *tsk = current;
+       unsigned long used_math_flag;
+       int ret = 0;
+
+       clear_used_math();
+       __get_user_error(used_math_flag, &sc->used_math_flag, ret);
+
+       if (!used_math_flag)
+               return 0;
+       set_used_math();
+
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+       preempt_disable();
+       if (current == last_task_used_math) {
+               last_task_used_math = NULL;
+               disable_ptreg_fpu(regs);
+       }
+       preempt_enable();
+#else
+       clear_fpu(regs);
+#endif
+
+       return __copy_from_user(&tsk->thread.fpu, &sc->fpu,
+                               sizeof(struct fpu_struct));
+}
+
+static inline int setup_sigcontext_fpu(struct pt_regs *regs,
+                                      struct sigcontext __user *sc)
+{
+       struct task_struct *tsk = current;
+       int ret = 0;
+
+       __put_user_error(used_math(), &sc->used_math_flag, ret);
+
+       if (!used_math())
+               return ret;
+
+       preempt_disable();
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+       if (last_task_used_math == tsk)
+               save_fpu(last_task_used_math);
+#else
+       unlazy_fpu(tsk);
+#endif
+       ret = __copy_to_user(&sc->fpu, &tsk->thread.fpu,
+                            sizeof(struct fpu_struct));
+       preempt_enable();
+       return ret;
+}
+#endif
 
 static int restore_sigframe(struct pt_regs *regs,
                            struct rt_sigframe __user * sf)
@@ -69,7 +124,9 @@ static int restore_sigframe(struct pt_regs *regs,
        __get_user_error(regs->le, &sf->uc.uc_mcontext.zol.nds32_le, err);
        __get_user_error(regs->lb, &sf->uc.uc_mcontext.zol.nds32_lb, err);
 #endif
-
+#if IS_ENABLED(CONFIG_FPU)
+       err |= restore_sigcontext_fpu(regs, &sf->uc.uc_mcontext);
+#endif
        /*
         * Avoid sys_rt_sigreturn() restarting.
         */
@@ -153,6 +210,9 @@ setup_sigframe(struct rt_sigframe __user * sf, struct pt_regs *regs,
        __put_user_error(regs->le, &sf->uc.uc_mcontext.zol.nds32_le, err);
        __put_user_error(regs->lb, &sf->uc.uc_mcontext.zol.nds32_lb, err);
 #endif
+#if IS_ENABLED(CONFIG_FPU)
+       err |= setup_sigcontext_fpu(regs, &sf->uc.uc_mcontext);
+#endif
 
        __put_user_error(current->thread.trap_no, &sf->uc.uc_mcontext.trap_no,
                         err);
index 60c64bfbc90133ea372e914658878dc611230833..ca4e61f3656fddf1a931a41d4f156bb798b92bee 100644 (file)
@@ -36,7 +36,9 @@ suspend2ram:
        mfsr    $r17, $ir14
        mfsr    $r18, $ir15
        pushm   $r0, $r19
-
+#if defined(CONFIG_FPU)
+       jal     store_fpu_for_suspend
+#endif
        tlbop   FlushAll
        isb
 
index 1496aab48998817c00cb175dbd8b09b3453d73dd..5aa7c17da27ac0ffd93fecaf06b03a5f253ad828 100644 (file)
@@ -12,6 +12,7 @@
 
 #include <asm/proc-fns.h>
 #include <asm/unistd.h>
+#include <asm/fpu.h>
 
 #include <linux/ptrace.h>
 #include <nds32_intrinsic.h>
@@ -357,6 +358,21 @@ void do_dispatch_general(unsigned long entry, unsigned long addr,
        } else if (type == ETYPE_RESERVED_INSTRUCTION) {
                /* Reserved instruction */
                do_revinsn(regs);
+       } else if (type == ETYPE_COPROCESSOR) {
+               /* Coprocessor */
+#if IS_ENABLED(CONFIG_FPU)
+               unsigned int fucop_exist = __nds32__mfsr(NDS32_SR_FUCOP_EXIST);
+               unsigned int cpid = ((itype & ITYPE_mskCPID) >> ITYPE_offCPID);
+
+               if ((cpid == FPU_CPID) &&
+                   (fucop_exist & FUCOP_EXIST_mskCP0ISFPU)) {
+                       unsigned int subtype = (itype & ITYPE_mskSTYPE);
+
+                       if (true == do_fpu_exception(subtype, regs))
+                               return;
+               }
+#endif
+               unhandled_exceptions(entry, addr, type, regs);
        } else if (type == ETYPE_TRAP && swid == SWID_RAISE_INTERRUPT_LEVEL) {
                /* trap, used on v3 EDM target debugging workaround */
                /*