MIPS: Loongson-3: Fast TLB refill handler
authorHuacai Chen <chenhc@lemote.com>
Thu, 3 Mar 2016 01:45:12 +0000 (09:45 +0800)
committerRalf Baechle <ralf@linux-mips.org>
Fri, 13 May 2016 12:02:15 +0000 (14:02 +0200)
Loongson-3A R2 has pwbase/pwfield/pwsize/pwctl registers in CP0 (this
is very similar to HTW) and lwdir/lwpte/lddir/ldpte instructions which
can be used for fast TLB refill.

[ralf@linux-mips.org: Resolve conflict.]

Signed-off-by: Huacai Chen <chenhc@lemote.com>
Cc: Aurelien Jarno <aurelien@aurel32.net>
Cc: Steven J . Hill <sjhill@realitydiluted.com>
Cc: Fuxin Zhang <zhangfx@lemote.com>
Cc: Zhangjin Wu <wuzhangjin@gmail.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/12754/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
arch/mips/include/asm/cpu-features.h
arch/mips/include/asm/cpu.h
arch/mips/include/asm/mipsregs.h
arch/mips/include/asm/uasm.h
arch/mips/include/uapi/asm/inst.h
arch/mips/kernel/cpu-probe.c
arch/mips/mm/tlbex.c
arch/mips/mm/uasm-mips.c
arch/mips/mm/uasm.c

index 6ff0504d090df25bfbeb8247199c2b1e89d9beab..da92d513a3956aabf51897fdd7f1288d6d479da7 100644 (file)
@@ -35,6 +35,9 @@
 #ifndef cpu_has_htw
 #define cpu_has_htw            (cpu_data[0].options & MIPS_CPU_HTW)
 #endif
+#ifndef cpu_has_ldpte
+#define cpu_has_ldpte          (cpu_data[0].options & MIPS_CPU_LDPTE)
+#endif
 #ifndef cpu_has_rixiex
 #define cpu_has_rixiex         (cpu_data[0].options & MIPS_CPU_RIXIEX)
 #endif
index 810536b6900cf5337ac55509988d4f3e9f8e7f27..d9150a210df6a98fa28bfe6578b82fe295281bd2 100644 (file)
@@ -402,6 +402,7 @@ enum cpu_type_enum {
 #define MIPS_CPU_NAN_LEGACY    MBIT_ULL(38)    /* Legacy NaN implemented */
 #define MIPS_CPU_NAN_2008      MBIT_ULL(39)    /* 2008 NaN implemented */
 #define MIPS_CPU_VP            MBIT_ULL(40)    /* MIPSr6 Virtual Processors (multi-threading) */
+#define MIPS_CPU_LDPTE         MBIT_ULL(41)    /* CPU has ldpte/lddir instructions */
 
 /*
  * CPU ASE encodings
index 0d0bd161fc1040179e6b6b99fe80e99c0a8756dd..1e9d337fb077aea2ebefdcc176582b1ccd8c69e9 100644 (file)
@@ -1474,6 +1474,12 @@ do {                                                                     \
 #define read_c0_pwctl()                __read_32bit_c0_register($6, 6)
 #define write_c0_pwctl(val)    __write_32bit_c0_register($6, 6, val)
 
+#define read_c0_pgd()          __read_64bit_c0_register($9, 7)
+#define write_c0_pgd(val)      __write_64bit_c0_register($9, 7, val)
+
+#define read_c0_kpgd()         __read_64bit_c0_register($31, 7)
+#define write_c0_kpgd(val)     __write_64bit_c0_register($31, 7, val)
+
 /* Cavium OCTEON (cnMIPS) */
 #define read_c0_cvmcount()     __read_ulong_c0_register($9, 6)
 #define write_c0_cvmcount(val) __write_ulong_c0_register($9, 6, val)
index fc1cdd25fcda3fa7b632b23039a3b810ef47d322..b6ecfeee4dbe8aca256803ee2f980575825133e8 100644 (file)
@@ -171,7 +171,8 @@ Ip_u2u1(_wsbh);
 Ip_u3u1u2(_xor);
 Ip_u2u1u3(_xori);
 Ip_u2u1(_yield);
-
+Ip_u1u2(_ldpte);
+Ip_u2u1u3(_lddir);
 
 /* Handle labels. */
 struct uasm_label {
index ddea53e3a9bb7bbd56463a0727ff2b3bc378f614..3bb8cd957e81ab0c3b25dc54e0085fb3f2c4e71a 100644 (file)
@@ -203,6 +203,16 @@ enum mad_func {
        nmadd_fp_op     = 0x0c, nmsub_fp_op     = 0x0e
 };
 
+/*
+ * func field for page table walker (Loongson-3).
+ */
+enum ptw_func {
+       lwdir_op = 0x00,
+       lwpte_op = 0x01,
+       lddir_op = 0x02,
+       ldpte_op = 0x03,
+};
+
 /*
  * func field for special3 lx opcodes (Cavium Octeon).
  */
index 515dd1f8901e2b683c31b85b750d0ed380378181..064ab389e9643fc218e58f018f3fb3e7b92fc0b2 100644 (file)
@@ -1539,7 +1539,7 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu)
                }
 
                decode_configs(c);
-               c->options |= MIPS_CPU_TLBINV;
+               c->options |= MIPS_CPU_TLBINV | MIPS_CPU_LDPTE;
                c->writecombine = _CACHE_UNCACHED_ACCELERATED;
                break;
        default:
index 84c6e3fda84af1f87f025c63709ecd48327fb75e..7153e1fb0cd4529eec297a7462df0f6a8f5cfa14 100644 (file)
@@ -284,7 +284,12 @@ static inline void dump_handler(const char *symbol, const u32 *handler, int coun
 #define C0_ENTRYLO1    3, 0
 #define C0_CONTEXT     4, 0
 #define C0_PAGEMASK    5, 0
+#define C0_PWBASE      5, 5
+#define C0_PWFIELD     5, 6
+#define C0_PWSIZE      5, 7
+#define C0_PWCTL       6, 6
 #define C0_BADVADDR    8, 0
+#define C0_PGD         9, 7
 #define C0_ENTRYHI     10, 0
 #define C0_EPC         14, 0
 #define C0_XCONTEXT    20, 0
@@ -808,7 +813,10 @@ build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
 
        if (pgd_reg != -1) {
                /* pgd is in pgd_reg */
-               UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg);
+               if (cpu_has_ldpte)
+                       UASM_i_MFC0(p, ptr, C0_PWBASE);
+               else
+                       UASM_i_MFC0(p, ptr, c0_kscratch(), pgd_reg);
        } else {
 #if defined(CONFIG_MIPS_PGD_C0_CONTEXT)
                /*
@@ -1421,6 +1429,108 @@ static void build_r4000_tlb_refill_handler(void)
        dump_handler("r4000_tlb_refill", (u32 *)ebase, 64);
 }
 
+static void setup_pw(void)
+{
+       unsigned long pgd_i, pgd_w;
+#ifndef __PAGETABLE_PMD_FOLDED
+       unsigned long pmd_i, pmd_w;
+#endif
+       unsigned long pt_i, pt_w;
+       unsigned long pte_i, pte_w;
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
+       unsigned long psn;
+
+       psn = ilog2(_PAGE_HUGE);     /* bit used to indicate huge page */
+#endif
+       pgd_i = PGDIR_SHIFT;  /* 1st level PGD */
+#ifndef __PAGETABLE_PMD_FOLDED
+       pgd_w = PGDIR_SHIFT - PMD_SHIFT + PGD_ORDER;
+
+       pmd_i = PMD_SHIFT;    /* 2nd level PMD */
+       pmd_w = PMD_SHIFT - PAGE_SHIFT;
+#else
+       pgd_w = PGDIR_SHIFT - PAGE_SHIFT + PGD_ORDER;
+#endif
+
+       pt_i  = PAGE_SHIFT;    /* 3rd level PTE */
+       pt_w  = PAGE_SHIFT - 3;
+
+       pte_i = ilog2(_PAGE_GLOBAL);
+       pte_w = 0;
+
+#ifndef __PAGETABLE_PMD_FOLDED
+       write_c0_pwfield(pgd_i << 24 | pmd_i << 12 | pt_i << 6 | pte_i);
+       write_c0_pwsize(1 << 30 | pgd_w << 24 | pmd_w << 12 | pt_w << 6 | pte_w);
+#else
+       write_c0_pwfield(pgd_i << 24 | pt_i << 6 | pte_i);
+       write_c0_pwsize(1 << 30 | pgd_w << 24 | pt_w << 6 | pte_w);
+#endif
+
+#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
+       write_c0_pwctl(1 << 6 | psn);
+#endif
+       write_c0_kpgd(swapper_pg_dir);
+       kscratch_used_mask |= (1 << 7); /* KScratch6 is used for KPGD */
+}
+
+static void build_loongson3_tlb_refill_handler(void)
+{
+       u32 *p = tlb_handler;
+       struct uasm_label *l = labels;
+       struct uasm_reloc *r = relocs;
+
+       memset(labels, 0, sizeof(labels));
+       memset(relocs, 0, sizeof(relocs));
+       memset(tlb_handler, 0, sizeof(tlb_handler));
+
+       if (check_for_high_segbits) {
+               uasm_i_dmfc0(&p, K0, C0_BADVADDR);
+               uasm_i_dsrl_safe(&p, K1, K0, PGDIR_SHIFT + PGD_ORDER + PAGE_SHIFT - 3);
+               uasm_il_beqz(&p, &r, K1, label_vmalloc);
+               uasm_i_nop(&p);
+
+               uasm_il_bgez(&p, &r, K0, label_large_segbits_fault);
+               uasm_i_nop(&p);
+               uasm_l_vmalloc(&l, p);
+       }
+
+       uasm_i_dmfc0(&p, K1, C0_PGD);
+
+       uasm_i_lddir(&p, K0, K1, 3);  /* global page dir */
+#ifndef __PAGETABLE_PMD_FOLDED
+       uasm_i_lddir(&p, K1, K0, 1);  /* middle page dir */
+#endif
+       uasm_i_ldpte(&p, K1, 0);      /* even */
+       uasm_i_ldpte(&p, K1, 1);      /* odd */
+       uasm_i_tlbwr(&p);
+
+       /* restore page mask */
+       if (PM_DEFAULT_MASK >> 16) {
+               uasm_i_lui(&p, K0, PM_DEFAULT_MASK >> 16);
+               uasm_i_ori(&p, K0, K0, PM_DEFAULT_MASK & 0xffff);
+               uasm_i_mtc0(&p, K0, C0_PAGEMASK);
+       } else if (PM_DEFAULT_MASK) {
+               uasm_i_ori(&p, K0, 0, PM_DEFAULT_MASK);
+               uasm_i_mtc0(&p, K0, C0_PAGEMASK);
+       } else {
+               uasm_i_mtc0(&p, 0, C0_PAGEMASK);
+       }
+
+       uasm_i_eret(&p);
+
+       if (check_for_high_segbits) {
+               uasm_l_large_segbits_fault(&l, p);
+               UASM_i_LA(&p, K1, (unsigned long)tlb_do_page_fault_0);
+               uasm_i_jr(&p, K1);
+               uasm_i_nop(&p);
+       }
+
+       uasm_resolve_relocs(relocs, labels);
+       memcpy((void *)(ebase + 0x80), tlb_handler, 0x80);
+       local_flush_icache_range(ebase + 0x80, ebase + 0x100);
+       dump_handler("loongson3_tlb_refill", (u32 *)(ebase + 0x80), 32);
+}
+
 extern u32 handle_tlbl[], handle_tlbl_end[];
 extern u32 handle_tlbs[], handle_tlbs_end[];
 extern u32 handle_tlbm[], handle_tlbm_end[];
@@ -1468,7 +1578,10 @@ static void build_setup_pgd(void)
        } else {
                /* PGD in c0_KScratch */
                uasm_i_jr(&p, 31);
-               UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg);
+               if (cpu_has_ldpte)
+                       UASM_i_MTC0(&p, a0, C0_PWBASE);
+               else
+                       UASM_i_MTC0(&p, a0, c0_kscratch(), pgd_reg);
        }
 #else
 #ifdef CONFIG_SMP
@@ -2437,13 +2550,18 @@ void build_tlb_refill_handler(void)
                break;
 
        default:
+               if (cpu_has_ldpte)
+                       setup_pw();
+
                if (!run_once) {
                        scratch_reg = allocate_kscratch();
                        build_setup_pgd();
                        build_r4000_tlb_load_handler();
                        build_r4000_tlb_store_handler();
                        build_r4000_tlb_modify_handler();
-                       if (!cpu_has_local_ebase)
+                       if (cpu_has_ldpte)
+                               build_loongson3_tlb_refill_handler();
+                       else if (!cpu_has_local_ebase)
                                build_r4000_tlb_refill_handler();
                        flush_tlb_handlers();
                        run_once++;
index b4a8378935625b2e1d0f228f6961ebd8208544f1..9c2220a45189a6e954a597c05bb880f700b93801 100644 (file)
@@ -153,6 +153,8 @@ static struct insn insn_table[] = {
        { insn_xori,  M(xori_op, 0, 0, 0, 0, 0),  RS | RT | UIMM },
        { insn_xor,  M(spec_op, 0, 0, 0, 0, xor_op),  RS | RT | RD },
        { insn_yield, M(spec3_op, 0, 0, 0, 0, yield_op), RS | RD },
+       { insn_ldpte, M(lwc2_op, 0, 0, 0, ldpte_op, mult_op), RS | RD },
+       { insn_lddir, M(lwc2_op, 0, 0, 0, lddir_op, mult_op), RS | RT | RD },
        { insn_invalid, 0, 0 }
 };
 
index 319051c34343ae9e0eb7c5d4adf82732e5598cd6..ad718debc35a74d62c2d0b532420b2711a5c4416 100644 (file)
@@ -60,6 +60,7 @@ enum opcode {
        insn_sltiu, insn_sltu, insn_sra, insn_srl, insn_srlv, insn_subu,
        insn_sw, insn_sync, insn_syscall, insn_tlbp, insn_tlbr, insn_tlbwi,
        insn_tlbwr, insn_wait, insn_wsbh, insn_xor, insn_xori, insn_yield,
+       insn_lddir, insn_ldpte,
 };
 
 struct insn {
@@ -335,6 +336,8 @@ I_u1u2s3(_bbit0);
 I_u1u2s3(_bbit1);
 I_u3u1u2(_lwx)
 I_u3u1u2(_ldx)
+I_u1u2(_ldpte)
+I_u2u1u3(_lddir)
 
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
 #include <asm/octeon/octeon.h>