#define H_PUD_INDEX_SIZE 9
#define H_PGD_INDEX_SIZE 9
+/*
+ * Each context is 512TB. But on 4k we restrict our max TASK size to 64TB
+ * Hence also limit max EA bits to 64TB.
+ */
+#define MAX_EA_BITS_PER_CONTEXT 46
+
#ifndef __ASSEMBLY__
#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE)
#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE)
#define H_PUD_INDEX_SIZE 7
#define H_PGD_INDEX_SIZE 8
+/*
+ * Each context is 512TB size. SLB miss for first context/default context
+ * is handled in the hotpath.
+ */
+#define MAX_EA_BITS_PER_CONTEXT 49
+
/*
* 64k aligned address free up few of the lower bits of RPN for us
* We steal that here. For more deatils look at pte_pfn/pfn_pte()
};
typedef struct {
- mm_context_id_t id;
+ union {
+ /*
+ * We use id as the PIDR content for radix. On hash we can use
+ * more than one id. The extended ids are used when we start
+ * having address above 512TB. We allocate one extended id
+ * for each 512TB. The new id is then used with the 49 bit
+ * EA to build a new VA. We always use ESID_BITS_1T_MASK bits
+ * from EA and new context ids to build the new VAs.
+ */
+ mm_context_id_t id;
+ mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE];
+ };
u16 user_psize; /* page size index */
/* Number of bits in the mm_cpumask */
static inline void radix_init_pseries(void) { };
#endif
+static inline int get_ea_context(mm_context_t *ctx, unsigned long ea)
+{
+ int index = ea >> MAX_EA_BITS_PER_CONTEXT;
+
+ if (likely(index < ARRAY_SIZE(ctx->extended_id)))
+ return ctx->extended_id[index];
+
+ /* should never happen */
+ WARN_ON(1);
+ return 0;
+}
+
+static inline unsigned long get_user_vsid(mm_context_t *ctx,
+ unsigned long ea, int ssize)
+{
+ unsigned long context = get_ea_context(ctx, ea);
+
+ return get_vsid(context, ea, ssize);
+}
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
extern void hash__reserve_context_id(int id);
extern void __destroy_context(int context_id);
static inline void mmu_context_init(void) { }
+
+static inline int alloc_extended_context(struct mm_struct *mm,
+ unsigned long ea)
+{
+ int context_id;
+
+ int index = ea >> MAX_EA_BITS_PER_CONTEXT;
+
+ context_id = hash__alloc_context_id();
+ if (context_id < 0)
+ return context_id;
+
+ VM_WARN_ON(mm->context.extended_id[index]);
+ mm->context.extended_id[index] = context_id;
+ return context_id;
+}
+
+static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
+{
+ int context_id;
+
+ context_id = get_ea_context(&mm->context, ea);
+ if (!context_id)
+ return true;
+ return false;
+}
+
#else
extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk);
extern unsigned long __init_new_context(void);
extern void __destroy_context(unsigned long context_id);
extern void mmu_context_init(void);
+static inline int alloc_extended_context(struct mm_struct *mm,
+ unsigned long ea)
+{
+ /* non book3s_64 should never find this called */
+ WARN_ON(1);
+ return -ENOMEM;
+}
+
+static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
+{
+ return false;
+}
#endif
#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
*/
#define TASK_SIZE_USER64 TASK_SIZE_512TB
#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_128TB
+#define TASK_CONTEXT_SIZE TASK_SIZE_512TB
#else
#define TASK_SIZE_USER64 TASK_SIZE_64TB
#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_64TB
+/*
+ * We don't need to allocate extended context ids for 4K page size, because
+ * we limit the max effective address on this config to 64TB.
+ */
+#define TASK_CONTEXT_SIZE TASK_SIZE_64TB
#endif
/*
lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
mtlr r10
- beq- 8f /* if bad address, make full stack frame */
+ /*
+ * Large address, check whether we have to allocate new contexts.
+ */
+ beq- 8f
bne- cr5,2f /* if unrecoverable exception, oops */
mr r3,r12
mfspr r11,SPRN_SRR0
mfspr r12,SPRN_SRR1
- LOAD_HANDLER(r10,bad_addr_slb)
+ LOAD_HANDLER(r10, large_addr_slb)
mtspr SPRN_SRR0,r10
ld r10,PACAKMSR(r13)
mtspr SPRN_SRR1,r10
bl unrecoverable_exception
b 1b
-EXC_COMMON_BEGIN(bad_addr_slb)
+EXC_COMMON_BEGIN(large_addr_slb)
EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
RECONCILE_IRQ_STATE(r10, r11)
ld r3, PACA_EXSLB+EX_DAR(r13)
std r10, _TRAP(r1)
2: bl save_nvgprs
addi r3, r1, STACK_FRAME_OVERHEAD
- bl slb_miss_bad_addr
+ bl slb_miss_large_addr
b ret_from_except
EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
exception_exit(prev_state);
}
-void slb_miss_bad_addr(struct pt_regs *regs)
-{
- enum ctx_state prev_state = exception_enter();
-
- if (user_mode(regs))
- _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
- else
- bad_page_fault(regs, regs->dar, SIGSEGV);
-
- exception_exit(prev_state);
-}
-
void StackOverflow(struct pt_regs *regs)
{
printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n",
return 1;
psize = get_slice_psize(mm, ea);
ssize = user_segment_size(ea);
- vsid = get_vsid(mm->context.id, ea, ssize);
+ vsid = get_user_vsid(&mm->context, ea, ssize);
vsidkey = SLB_VSID_USER;
break;
case VMALLOC_REGION_ID:
}
psize = get_slice_psize(mm, ea);
ssize = user_segment_size(ea);
- vsid = get_vsid(mm->context.id, ea, ssize);
+ vsid = get_user_vsid(&mm->context, ea, ssize);
break;
case VMALLOC_REGION_ID:
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
/* Get VSID */
ssize = user_segment_size(ea);
- vsid = get_vsid(mm->context.id, ea, ssize);
+ vsid = get_user_vsid(&mm->context, ea, ssize);
if (!vsid)
return;
/*
}
EXPORT_SYMBOL_GPL(__destroy_context);
+static void destroy_contexts(mm_context_t *ctx)
+{
+ int index, context_id;
+
+ spin_lock(&mmu_context_lock);
+ for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) {
+ context_id = ctx->extended_id[index];
+ if (context_id)
+ ida_remove(&mmu_context_ida, context_id);
+ }
+ spin_unlock(&mmu_context_lock);
+}
+
#ifdef CONFIG_PPC_64K_PAGES
static void destroy_pagetable_page(struct mm_struct *mm)
{
else
subpage_prot_free(mm);
destroy_pagetable_page(mm);
- __destroy_context(mm->context.id);
+ destroy_contexts(&mm->context);
mm->context.id = MMU_NO_CONTEXT;
}
if (!is_kernel_addr(addr)) {
ssize = user_segment_size(addr);
- vsid = get_vsid(mm->context.id, addr, ssize);
+ vsid = get_user_vsid(&mm->context, addr, ssize);
WARN_ON(vsid == 0);
} else {
vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
#include <asm/cacheflush.h>
#include <asm/smp.h>
#include <linux/compiler.h>
+#include <linux/context_tracking.h>
#include <linux/mm_types.h>
#include <asm/udbg.h>
asm volatile("isync":::"memory");
}
+
+static void insert_slb_entry(unsigned long vsid, unsigned long ea,
+ int bpsize, int ssize)
+{
+ unsigned long flags, vsid_data, esid_data;
+ enum slb_index index;
+ int slb_cache_index;
+
+ /*
+ * We are irq disabled, hence should be safe to access PACA.
+ */
+ index = get_paca()->stab_rr;
+
+ /*
+ * simple round-robin replacement of slb starting at SLB_NUM_BOLTED.
+ */
+ if (index < (mmu_slb_size - 1))
+ index++;
+ else
+ index = SLB_NUM_BOLTED;
+
+ get_paca()->stab_rr = index;
+
+ flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
+ vsid_data = (vsid << slb_vsid_shift(ssize)) | flags |
+ ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
+ esid_data = mk_esid_data(ea, ssize, index);
+
+ asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)
+ : "memory");
+
+ /*
+ * Now update slb cache entries
+ */
+ slb_cache_index = get_paca()->slb_cache_ptr;
+ if (slb_cache_index < SLB_CACHE_ENTRIES) {
+ /*
+ * We have space in slb cache for optimized switch_slb().
+ * Top 36 bits from esid_data as per ISA
+ */
+ get_paca()->slb_cache[slb_cache_index++] = esid_data >> 28;
+ get_paca()->slb_cache_ptr++;
+ } else {
+ /*
+ * Our cache is full and the current cache content strictly
+ * doesn't indicate the active SLB conents. Bump the ptr
+ * so that switch_slb() will ignore the cache.
+ */
+ get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
+ }
+}
+
+static void handle_multi_context_slb_miss(int context_id, unsigned long ea)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long vsid;
+ int bpsize;
+
+ /*
+ * We are always above 1TB, hence use high user segment size.
+ */
+ vsid = get_vsid(context_id, ea, mmu_highuser_ssize);
+ bpsize = get_slice_psize(mm, ea);
+ insert_slb_entry(vsid, ea, bpsize, mmu_highuser_ssize);
+}
+
+void slb_miss_large_addr(struct pt_regs *regs)
+{
+ enum ctx_state prev_state = exception_enter();
+ unsigned long ea = regs->dar;
+ int context;
+
+ if (REGION_ID(ea) != USER_REGION_ID)
+ goto slb_bad_addr;
+
+ /*
+ * Are we beyound what the page table layout supports ?
+ */
+ if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
+ goto slb_bad_addr;
+
+ /* Lower address should have been handled by asm code */
+ if (ea < (1UL << MAX_EA_BITS_PER_CONTEXT))
+ goto slb_bad_addr;
+
+ /*
+ * consider this as bad access if we take a SLB miss
+ * on an address above addr limit.
+ */
+ if (ea >= current->mm->context.slb_addr_limit)
+ goto slb_bad_addr;
+
+ context = get_ea_context(¤t->mm->context, ea);
+ if (!context)
+ goto slb_bad_addr;
+
+ handle_multi_context_slb_miss(context, ea);
+ exception_exit(prev_state);
+ return;
+
+slb_bad_addr:
+ if (user_mode(regs))
+ _exception(SIGSEGV, regs, SEGV_BNDERR, ea);
+ else
+ bad_page_fault(regs, ea, SIGSEGV);
+ exception_exit(prev_state);
+}
*/
_GLOBAL(slb_allocate)
/*
- * check for bad kernel/user address
- * (ea & ~REGION_MASK) >= PGTABLE_RANGE
+ * Check if the address falls within the range of the first context, or
+ * if we may need to handle multi context. For the first context we
+ * allocate the slb entry via the fast path below. For large address we
+ * branch out to C-code and see if additional contexts have been
+ * allocated.
+ * The test here is:
+ * (ea & ~REGION_MASK) >= (1ull << MAX_EA_BITS_PER_CONTEXT)
*/
- rldicr. r9,r3,4,(63 - H_PGTABLE_EADDR_SIZE - 4)
+ rldicr. r9,r3,4,(63 - MAX_EA_BITS_PER_CONTEXT - 4)
bne- 8f
srdi r9,r3,60 /* get region */
slice_print_mask(" mask", &potential_mask);
convert:
+ /*
+ * Try to allocate the context before we do slice convert
+ * so that we handle the context allocation failure gracefully.
+ */
+ if (need_extra_context(mm, newaddr)) {
+ if (alloc_extended_context(mm, newaddr) < 0)
+ return -ENOMEM;
+ }
+
slice_andnot_mask(&potential_mask, &potential_mask, &good_mask);
if (compat_maskp && !fixed)
slice_andnot_mask(&potential_mask, &potential_mask, compat_maskp);
if (psize > MMU_PAGE_BASE)
on_each_cpu(slice_flush_segments, mm, 1);
}
+ return newaddr;
return_addr:
+ if (need_extra_context(mm, newaddr)) {
+ if (alloc_extended_context(mm, newaddr) < 0)
+ return -ENOMEM;
+ }
return newaddr;
-
}
EXPORT_SYMBOL_GPL(slice_get_unmapped_area);
/* Build full vaddr */
if (!is_kernel_addr(addr)) {
ssize = user_segment_size(addr);
- vsid = get_vsid(mm->context.id, addr, ssize);
+ vsid = get_user_vsid(&mm->context, addr, ssize);
} else {
vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
ssize = mmu_kernel_ssize;