From: Thomas Gleixner Date: Thu, 11 Oct 2007 09:12:05 +0000 (+0200) Subject: i386: prepare shared kernel/vm86.c X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=7338d4357bb2c8f20e400432e1fe04a22cb868ad;p=openwrt%2Fstaging%2Fblogic.git i386: prepare shared kernel/vm86.c Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index fff92c718b62..52b0041583cf 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -35,7 +35,7 @@ obj-y += sysenter.o vsyscall.o obj-$(CONFIG_ACPI_SRAT) += srat_32.o obj-$(CONFIG_EFI) += efi.o efi_stub.o obj-$(CONFIG_DOUBLEFAULT) += doublefault.o -obj-$(CONFIG_VM86) += vm86.o +obj-$(CONFIG_VM86) += vm86_32.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_K8_NB) += k8.o diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c deleted file mode 100644 index f2dcd1d27c0a..000000000000 --- a/arch/i386/kernel/vm86.c +++ /dev/null @@ -1,843 +0,0 @@ -/* - * linux/kernel/vm86.c - * - * Copyright (C) 1994 Linus Torvalds - * - * 29 dec 2001 - Fixed oopses caused by unchecked access to the vm86 - * stack - Manfred Spraul - * - * 22 mar 2002 - Manfred detected the stackfaults, but didn't handle - * them correctly. Now the emulation will be in a - * consistent state after stackfaults - Kasper Dupont - * - * - * 22 mar 2002 - Added missing clear_IF in set_vflags_* Kasper Dupont - * - * - * ?? ??? 2002 - Fixed premature returns from handle_vm86_fault - * caused by Kasper Dupont's changes - Stas Sergeev - * - * 4 apr 2002 - Fixed CHECK_IF_IN_TRAP broken by Stas' changes. - * Kasper Dupont - * - * 9 apr 2002 - Changed syntax of macros in handle_vm86_fault. - * Kasper Dupont - * - * 9 apr 2002 - Changed stack access macros to jump to a label - * instead of returning to userspace. This simplifies - * do_int, and is needed by handle_vm6_fault. Kasper - * Dupont - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -/* - * Known problems: - * - * Interrupt handling is not guaranteed: - * - a real x86 will disable all interrupts for one instruction - * after a "mov ss,xx" to make stack handling atomic even without - * the 'lss' instruction. We can't guarantee this in v86 mode, - * as the next instruction might result in a page fault or similar. - * - a real x86 will have interrupts disabled for one instruction - * past the 'sti' that enables them. We don't bother with all the - * details yet. - * - * Let's hope these problems do not actually matter for anything. - */ - - -#define KVM86 ((struct kernel_vm86_struct *)regs) -#define VMPI KVM86->vm86plus - - -/* - * 8- and 16-bit register defines.. - */ -#define AL(regs) (((unsigned char *)&((regs)->pt.eax))[0]) -#define AH(regs) (((unsigned char *)&((regs)->pt.eax))[1]) -#define IP(regs) (*(unsigned short *)&((regs)->pt.eip)) -#define SP(regs) (*(unsigned short *)&((regs)->pt.esp)) - -/* - * virtual flags (16 and 32-bit versions) - */ -#define VFLAGS (*(unsigned short *)&(current->thread.v86flags)) -#define VEFLAGS (current->thread.v86flags) - -#define set_flags(X,new,mask) \ -((X) = ((X) & ~(mask)) | ((new) & (mask))) - -#define SAFE_MASK (0xDD5) -#define RETURN_MASK (0xDFF) - -/* convert kernel_vm86_regs to vm86_regs */ -static int copy_vm86_regs_to_user(struct vm86_regs __user *user, - const struct kernel_vm86_regs *regs) -{ - int ret = 0; - - /* kernel_vm86_regs is missing xgs, so copy everything up to - (but not including) orig_eax, and then rest including orig_eax. */ - ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_eax)); - ret += copy_to_user(&user->orig_eax, ®s->pt.orig_eax, - sizeof(struct kernel_vm86_regs) - - offsetof(struct kernel_vm86_regs, pt.orig_eax)); - - return ret; -} - -/* convert vm86_regs to kernel_vm86_regs */ -static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs, - const struct vm86_regs __user *user, - unsigned extra) -{ - int ret = 0; - - /* copy eax-xfs inclusive */ - ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_eax)); - /* copy orig_eax-__gsh+extra */ - ret += copy_from_user(®s->pt.orig_eax, &user->orig_eax, - sizeof(struct kernel_vm86_regs) - - offsetof(struct kernel_vm86_regs, pt.orig_eax) + - extra); - return ret; -} - -struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs)); -struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) -{ - struct tss_struct *tss; - struct pt_regs *ret; - unsigned long tmp; - - /* - * This gets called from entry.S with interrupts disabled, but - * from process context. Enable interrupts here, before trying - * to access user space. - */ - local_irq_enable(); - - if (!current->thread.vm86_info) { - printk("no vm86_info: BAD\n"); - do_exit(SIGSEGV); - } - set_flags(regs->pt.eflags, VEFLAGS, VIF_MASK | current->thread.v86mask); - tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs,regs); - tmp += put_user(current->thread.screen_bitmap,¤t->thread.vm86_info->screen_bitmap); - if (tmp) { - printk("vm86: could not access userspace vm86_info\n"); - do_exit(SIGSEGV); - } - - tss = &per_cpu(init_tss, get_cpu()); - current->thread.esp0 = current->thread.saved_esp0; - current->thread.sysenter_cs = __KERNEL_CS; - load_esp0(tss, ¤t->thread); - current->thread.saved_esp0 = 0; - put_cpu(); - - ret = KVM86->regs32; - - ret->xfs = current->thread.saved_fs; - loadsegment(gs, current->thread.saved_gs); - - return ret; -} - -static void mark_screen_rdonly(struct mm_struct *mm) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - spinlock_t *ptl; - int i; - - pgd = pgd_offset(mm, 0xA0000); - if (pgd_none_or_clear_bad(pgd)) - goto out; - pud = pud_offset(pgd, 0xA0000); - if (pud_none_or_clear_bad(pud)) - goto out; - pmd = pmd_offset(pud, 0xA0000); - if (pmd_none_or_clear_bad(pmd)) - goto out; - pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl); - for (i = 0; i < 32; i++) { - if (pte_present(*pte)) - set_pte(pte, pte_wrprotect(*pte)); - pte++; - } - pte_unmap_unlock(pte, ptl); -out: - flush_tlb(); -} - - - -static int do_vm86_irq_handling(int subfunction, int irqnumber); -static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); - -asmlinkage int sys_vm86old(struct pt_regs regs) -{ - struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.ebx; - struct kernel_vm86_struct info; /* declare this _on top_, - * this avoids wasting of stack space. - * This remains on the stack until we - * return to 32 bit user space. - */ - struct task_struct *tsk; - int tmp, ret = -EPERM; - - tsk = current; - if (tsk->thread.saved_esp0) - goto out; - tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, - offsetof(struct kernel_vm86_struct, vm86plus) - - sizeof(info.regs)); - ret = -EFAULT; - if (tmp) - goto out; - memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); - info.regs32 = ®s; - tsk->thread.vm86_info = v86; - do_sys_vm86(&info, tsk); - ret = 0; /* we never return here */ -out: - return ret; -} - - -asmlinkage int sys_vm86(struct pt_regs regs) -{ - struct kernel_vm86_struct info; /* declare this _on top_, - * this avoids wasting of stack space. - * This remains on the stack until we - * return to 32 bit user space. - */ - struct task_struct *tsk; - int tmp, ret; - struct vm86plus_struct __user *v86; - - tsk = current; - switch (regs.ebx) { - case VM86_REQUEST_IRQ: - case VM86_FREE_IRQ: - case VM86_GET_IRQ_BITS: - case VM86_GET_AND_RESET_IRQ: - ret = do_vm86_irq_handling(regs.ebx, (int)regs.ecx); - goto out; - case VM86_PLUS_INSTALL_CHECK: - /* NOTE: on old vm86 stuff this will return the error - from access_ok(), because the subfunction is - interpreted as (invalid) address to vm86_struct. - So the installation check works. - */ - ret = 0; - goto out; - } - - /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */ - ret = -EPERM; - if (tsk->thread.saved_esp0) - goto out; - v86 = (struct vm86plus_struct __user *)regs.ecx; - tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, - offsetof(struct kernel_vm86_struct, regs32) - - sizeof(info.regs)); - ret = -EFAULT; - if (tmp) - goto out; - info.regs32 = ®s; - info.vm86plus.is_vm86pus = 1; - tsk->thread.vm86_info = (struct vm86_struct __user *)v86; - do_sys_vm86(&info, tsk); - ret = 0; /* we never return here */ -out: - return ret; -} - - -static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk) -{ - struct tss_struct *tss; -/* - * make sure the vm86() system call doesn't try to do anything silly - */ - info->regs.pt.xds = 0; - info->regs.pt.xes = 0; - info->regs.pt.xfs = 0; - -/* we are clearing gs later just before "jmp resume_userspace", - * because it is not saved/restored. - */ - -/* - * The eflags register is also special: we cannot trust that the user - * has set it up safely, so this makes sure interrupt etc flags are - * inherited from protected mode. - */ - VEFLAGS = info->regs.pt.eflags; - info->regs.pt.eflags &= SAFE_MASK; - info->regs.pt.eflags |= info->regs32->eflags & ~SAFE_MASK; - info->regs.pt.eflags |= VM_MASK; - - switch (info->cpu_type) { - case CPU_286: - tsk->thread.v86mask = 0; - break; - case CPU_386: - tsk->thread.v86mask = NT_MASK | IOPL_MASK; - break; - case CPU_486: - tsk->thread.v86mask = AC_MASK | NT_MASK | IOPL_MASK; - break; - default: - tsk->thread.v86mask = ID_MASK | AC_MASK | NT_MASK | IOPL_MASK; - break; - } - -/* - * Save old state, set default return value (%eax) to 0 - */ - info->regs32->eax = 0; - tsk->thread.saved_esp0 = tsk->thread.esp0; - tsk->thread.saved_fs = info->regs32->xfs; - savesegment(gs, tsk->thread.saved_gs); - - tss = &per_cpu(init_tss, get_cpu()); - tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; - if (cpu_has_sep) - tsk->thread.sysenter_cs = 0; - load_esp0(tss, &tsk->thread); - put_cpu(); - - tsk->thread.screen_bitmap = info->screen_bitmap; - if (info->flags & VM86_SCREEN_BITMAP) - mark_screen_rdonly(tsk->mm); - - /*call audit_syscall_exit since we do not exit via the normal paths */ - if (unlikely(current->audit_context)) - audit_syscall_exit(AUDITSC_RESULT(0), 0); - - __asm__ __volatile__( - "movl %0,%%esp\n\t" - "movl %1,%%ebp\n\t" - "mov %2, %%gs\n\t" - "jmp resume_userspace" - : /* no outputs */ - :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); - /* we never return here */ -} - -static inline void return_to_32bit(struct kernel_vm86_regs * regs16, int retval) -{ - struct pt_regs * regs32; - - regs32 = save_v86_state(regs16); - regs32->eax = retval; - __asm__ __volatile__("movl %0,%%esp\n\t" - "movl %1,%%ebp\n\t" - "jmp resume_userspace" - : : "r" (regs32), "r" (current_thread_info())); -} - -static inline void set_IF(struct kernel_vm86_regs * regs) -{ - VEFLAGS |= VIF_MASK; - if (VEFLAGS & VIP_MASK) - return_to_32bit(regs, VM86_STI); -} - -static inline void clear_IF(struct kernel_vm86_regs * regs) -{ - VEFLAGS &= ~VIF_MASK; -} - -static inline void clear_TF(struct kernel_vm86_regs * regs) -{ - regs->pt.eflags &= ~TF_MASK; -} - -static inline void clear_AC(struct kernel_vm86_regs * regs) -{ - regs->pt.eflags &= ~AC_MASK; -} - -/* It is correct to call set_IF(regs) from the set_vflags_* - * functions. However someone forgot to call clear_IF(regs) - * in the opposite case. - * After the command sequence CLI PUSHF STI POPF you should - * end up with interrups disabled, but you ended up with - * interrupts enabled. - * ( I was testing my own changes, but the only bug I - * could find was in a function I had not changed. ) - * [KD] - */ - -static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs * regs) -{ - set_flags(VEFLAGS, eflags, current->thread.v86mask); - set_flags(regs->pt.eflags, eflags, SAFE_MASK); - if (eflags & IF_MASK) - set_IF(regs); - else - clear_IF(regs); -} - -static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs * regs) -{ - set_flags(VFLAGS, flags, current->thread.v86mask); - set_flags(regs->pt.eflags, flags, SAFE_MASK); - if (flags & IF_MASK) - set_IF(regs); - else - clear_IF(regs); -} - -static inline unsigned long get_vflags(struct kernel_vm86_regs * regs) -{ - unsigned long flags = regs->pt.eflags & RETURN_MASK; - - if (VEFLAGS & VIF_MASK) - flags |= IF_MASK; - flags |= IOPL_MASK; - return flags | (VEFLAGS & current->thread.v86mask); -} - -static inline int is_revectored(int nr, struct revectored_struct * bitmap) -{ - __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0" - :"=r" (nr) - :"m" (*bitmap),"r" (nr)); - return nr; -} - -#define val_byte(val, n) (((__u8 *)&val)[n]) - -#define pushb(base, ptr, val, err_label) \ - do { \ - __u8 __val = val; \ - ptr--; \ - if (put_user(__val, base + ptr) < 0) \ - goto err_label; \ - } while(0) - -#define pushw(base, ptr, val, err_label) \ - do { \ - __u16 __val = val; \ - ptr--; \ - if (put_user(val_byte(__val, 1), base + ptr) < 0) \ - goto err_label; \ - ptr--; \ - if (put_user(val_byte(__val, 0), base + ptr) < 0) \ - goto err_label; \ - } while(0) - -#define pushl(base, ptr, val, err_label) \ - do { \ - __u32 __val = val; \ - ptr--; \ - if (put_user(val_byte(__val, 3), base + ptr) < 0) \ - goto err_label; \ - ptr--; \ - if (put_user(val_byte(__val, 2), base + ptr) < 0) \ - goto err_label; \ - ptr--; \ - if (put_user(val_byte(__val, 1), base + ptr) < 0) \ - goto err_label; \ - ptr--; \ - if (put_user(val_byte(__val, 0), base + ptr) < 0) \ - goto err_label; \ - } while(0) - -#define popb(base, ptr, err_label) \ - ({ \ - __u8 __res; \ - if (get_user(__res, base + ptr) < 0) \ - goto err_label; \ - ptr++; \ - __res; \ - }) - -#define popw(base, ptr, err_label) \ - ({ \ - __u16 __res; \ - if (get_user(val_byte(__res, 0), base + ptr) < 0) \ - goto err_label; \ - ptr++; \ - if (get_user(val_byte(__res, 1), base + ptr) < 0) \ - goto err_label; \ - ptr++; \ - __res; \ - }) - -#define popl(base, ptr, err_label) \ - ({ \ - __u32 __res; \ - if (get_user(val_byte(__res, 0), base + ptr) < 0) \ - goto err_label; \ - ptr++; \ - if (get_user(val_byte(__res, 1), base + ptr) < 0) \ - goto err_label; \ - ptr++; \ - if (get_user(val_byte(__res, 2), base + ptr) < 0) \ - goto err_label; \ - ptr++; \ - if (get_user(val_byte(__res, 3), base + ptr) < 0) \ - goto err_label; \ - ptr++; \ - __res; \ - }) - -/* There are so many possible reasons for this function to return - * VM86_INTx, so adding another doesn't bother me. We can expect - * userspace programs to be able to handle it. (Getting a problem - * in userspace is always better than an Oops anyway.) [KD] - */ -static void do_int(struct kernel_vm86_regs *regs, int i, - unsigned char __user * ssp, unsigned short sp) -{ - unsigned long __user *intr_ptr; - unsigned long segoffs; - - if (regs->pt.xcs == BIOSSEG) - goto cannot_handle; - if (is_revectored(i, &KVM86->int_revectored)) - goto cannot_handle; - if (i==0x21 && is_revectored(AH(regs),&KVM86->int21_revectored)) - goto cannot_handle; - intr_ptr = (unsigned long __user *) (i << 2); - if (get_user(segoffs, intr_ptr)) - goto cannot_handle; - if ((segoffs >> 16) == BIOSSEG) - goto cannot_handle; - pushw(ssp, sp, get_vflags(regs), cannot_handle); - pushw(ssp, sp, regs->pt.xcs, cannot_handle); - pushw(ssp, sp, IP(regs), cannot_handle); - regs->pt.xcs = segoffs >> 16; - SP(regs) -= 6; - IP(regs) = segoffs & 0xffff; - clear_TF(regs); - clear_IF(regs); - clear_AC(regs); - return; - -cannot_handle: - return_to_32bit(regs, VM86_INTx + (i << 8)); -} - -int handle_vm86_trap(struct kernel_vm86_regs * regs, long error_code, int trapno) -{ - if (VMPI.is_vm86pus) { - if ( (trapno==3) || (trapno==1) ) - return_to_32bit(regs, VM86_TRAP + (trapno << 8)); - do_int(regs, trapno, (unsigned char __user *) (regs->pt.xss << 4), SP(regs)); - return 0; - } - if (trapno !=1) - return 1; /* we let this handle by the calling routine */ - if (current->ptrace & PT_PTRACED) { - unsigned long flags; - spin_lock_irqsave(¤t->sighand->siglock, flags); - sigdelset(¤t->blocked, SIGTRAP); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); - } - send_sig(SIGTRAP, current, 1); - current->thread.trap_no = trapno; - current->thread.error_code = error_code; - return 0; -} - -void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code) -{ - unsigned char opcode; - unsigned char __user *csp; - unsigned char __user *ssp; - unsigned short ip, sp, orig_flags; - int data32, pref_done; - -#define CHECK_IF_IN_TRAP \ - if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \ - newflags |= TF_MASK -#define VM86_FAULT_RETURN do { \ - if (VMPI.force_return_for_pic && (VEFLAGS & (IF_MASK | VIF_MASK))) \ - return_to_32bit(regs, VM86_PICRETURN); \ - if (orig_flags & TF_MASK) \ - handle_vm86_trap(regs, 0, 1); \ - return; } while (0) - - orig_flags = *(unsigned short *)®s->pt.eflags; - - csp = (unsigned char __user *) (regs->pt.xcs << 4); - ssp = (unsigned char __user *) (regs->pt.xss << 4); - sp = SP(regs); - ip = IP(regs); - - data32 = 0; - pref_done = 0; - do { - switch (opcode = popb(csp, ip, simulate_sigsegv)) { - case 0x66: /* 32-bit data */ data32=1; break; - case 0x67: /* 32-bit address */ break; - case 0x2e: /* CS */ break; - case 0x3e: /* DS */ break; - case 0x26: /* ES */ break; - case 0x36: /* SS */ break; - case 0x65: /* GS */ break; - case 0x64: /* FS */ break; - case 0xf2: /* repnz */ break; - case 0xf3: /* rep */ break; - default: pref_done = 1; - } - } while (!pref_done); - - switch (opcode) { - - /* pushf */ - case 0x9c: - if (data32) { - pushl(ssp, sp, get_vflags(regs), simulate_sigsegv); - SP(regs) -= 4; - } else { - pushw(ssp, sp, get_vflags(regs), simulate_sigsegv); - SP(regs) -= 2; - } - IP(regs) = ip; - VM86_FAULT_RETURN; - - /* popf */ - case 0x9d: - { - unsigned long newflags; - if (data32) { - newflags=popl(ssp, sp, simulate_sigsegv); - SP(regs) += 4; - } else { - newflags = popw(ssp, sp, simulate_sigsegv); - SP(regs) += 2; - } - IP(regs) = ip; - CHECK_IF_IN_TRAP; - if (data32) { - set_vflags_long(newflags, regs); - } else { - set_vflags_short(newflags, regs); - } - VM86_FAULT_RETURN; - } - - /* int xx */ - case 0xcd: { - int intno=popb(csp, ip, simulate_sigsegv); - IP(regs) = ip; - if (VMPI.vm86dbg_active) { - if ( (1 << (intno &7)) & VMPI.vm86dbg_intxxtab[intno >> 3] ) - return_to_32bit(regs, VM86_INTx + (intno << 8)); - } - do_int(regs, intno, ssp, sp); - return; - } - - /* iret */ - case 0xcf: - { - unsigned long newip; - unsigned long newcs; - unsigned long newflags; - if (data32) { - newip=popl(ssp, sp, simulate_sigsegv); - newcs=popl(ssp, sp, simulate_sigsegv); - newflags=popl(ssp, sp, simulate_sigsegv); - SP(regs) += 12; - } else { - newip = popw(ssp, sp, simulate_sigsegv); - newcs = popw(ssp, sp, simulate_sigsegv); - newflags = popw(ssp, sp, simulate_sigsegv); - SP(regs) += 6; - } - IP(regs) = newip; - regs->pt.xcs = newcs; - CHECK_IF_IN_TRAP; - if (data32) { - set_vflags_long(newflags, regs); - } else { - set_vflags_short(newflags, regs); - } - VM86_FAULT_RETURN; - } - - /* cli */ - case 0xfa: - IP(regs) = ip; - clear_IF(regs); - VM86_FAULT_RETURN; - - /* sti */ - /* - * Damn. This is incorrect: the 'sti' instruction should actually - * enable interrupts after the /next/ instruction. Not good. - * - * Probably needs some horsing around with the TF flag. Aiee.. - */ - case 0xfb: - IP(regs) = ip; - set_IF(regs); - VM86_FAULT_RETURN; - - default: - return_to_32bit(regs, VM86_UNKNOWN); - } - - return; - -simulate_sigsegv: - /* FIXME: After a long discussion with Stas we finally - * agreed, that this is wrong. Here we should - * really send a SIGSEGV to the user program. - * But how do we create the correct context? We - * are inside a general protection fault handler - * and has just returned from a page fault handler. - * The correct context for the signal handler - * should be a mixture of the two, but how do we - * get the information? [KD] - */ - return_to_32bit(regs, VM86_UNKNOWN); -} - -/* ---------------- vm86 special IRQ passing stuff ----------------- */ - -#define VM86_IRQNAME "vm86irq" - -static struct vm86_irqs { - struct task_struct *tsk; - int sig; -} vm86_irqs[16]; - -static DEFINE_SPINLOCK(irqbits_lock); -static int irqbits; - -#define ALLOWED_SIGS ( 1 /* 0 = don't send a signal */ \ - | (1 << SIGUSR1) | (1 << SIGUSR2) | (1 << SIGIO) | (1 << SIGURG) \ - | (1 << SIGUNUSED) ) - -static irqreturn_t irq_handler(int intno, void *dev_id) -{ - int irq_bit; - unsigned long flags; - - spin_lock_irqsave(&irqbits_lock, flags); - irq_bit = 1 << intno; - if ((irqbits & irq_bit) || ! vm86_irqs[intno].tsk) - goto out; - irqbits |= irq_bit; - if (vm86_irqs[intno].sig) - send_sig(vm86_irqs[intno].sig, vm86_irqs[intno].tsk, 1); - /* - * IRQ will be re-enabled when user asks for the irq (whether - * polling or as a result of the signal) - */ - disable_irq_nosync(intno); - spin_unlock_irqrestore(&irqbits_lock, flags); - return IRQ_HANDLED; - -out: - spin_unlock_irqrestore(&irqbits_lock, flags); - return IRQ_NONE; -} - -static inline void free_vm86_irq(int irqnumber) -{ - unsigned long flags; - - free_irq(irqnumber, NULL); - vm86_irqs[irqnumber].tsk = NULL; - - spin_lock_irqsave(&irqbits_lock, flags); - irqbits &= ~(1 << irqnumber); - spin_unlock_irqrestore(&irqbits_lock, flags); -} - -void release_vm86_irqs(struct task_struct *task) -{ - int i; - for (i = FIRST_VM86_IRQ ; i <= LAST_VM86_IRQ; i++) - if (vm86_irqs[i].tsk == task) - free_vm86_irq(i); -} - -static inline int get_and_reset_irq(int irqnumber) -{ - int bit; - unsigned long flags; - int ret = 0; - - if (invalid_vm86_irq(irqnumber)) return 0; - if (vm86_irqs[irqnumber].tsk != current) return 0; - spin_lock_irqsave(&irqbits_lock, flags); - bit = irqbits & (1 << irqnumber); - irqbits &= ~bit; - if (bit) { - enable_irq(irqnumber); - ret = 1; - } - - spin_unlock_irqrestore(&irqbits_lock, flags); - return ret; -} - - -static int do_vm86_irq_handling(int subfunction, int irqnumber) -{ - int ret; - switch (subfunction) { - case VM86_GET_AND_RESET_IRQ: { - return get_and_reset_irq(irqnumber); - } - case VM86_GET_IRQ_BITS: { - return irqbits; - } - case VM86_REQUEST_IRQ: { - int sig = irqnumber >> 8; - int irq = irqnumber & 255; - if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!((1 << sig) & ALLOWED_SIGS)) return -EPERM; - if (invalid_vm86_irq(irq)) return -EPERM; - if (vm86_irqs[irq].tsk) return -EPERM; - ret = request_irq(irq, &irq_handler, 0, VM86_IRQNAME, NULL); - if (ret) return ret; - vm86_irqs[irq].sig = sig; - vm86_irqs[irq].tsk = current; - return irq; - } - case VM86_FREE_IRQ: { - if (invalid_vm86_irq(irqnumber)) return -EPERM; - if (!vm86_irqs[irqnumber].tsk) return 0; - if (vm86_irqs[irqnumber].tsk != current) return -EPERM; - free_vm86_irq(irqnumber); - return 0; - } - } - return -EINVAL; -} - diff --git a/arch/i386/kernel/vm86_32.c b/arch/i386/kernel/vm86_32.c new file mode 100644 index 000000000000..f2dcd1d27c0a --- /dev/null +++ b/arch/i386/kernel/vm86_32.c @@ -0,0 +1,843 @@ +/* + * linux/kernel/vm86.c + * + * Copyright (C) 1994 Linus Torvalds + * + * 29 dec 2001 - Fixed oopses caused by unchecked access to the vm86 + * stack - Manfred Spraul + * + * 22 mar 2002 - Manfred detected the stackfaults, but didn't handle + * them correctly. Now the emulation will be in a + * consistent state after stackfaults - Kasper Dupont + * + * + * 22 mar 2002 - Added missing clear_IF in set_vflags_* Kasper Dupont + * + * + * ?? ??? 2002 - Fixed premature returns from handle_vm86_fault + * caused by Kasper Dupont's changes - Stas Sergeev + * + * 4 apr 2002 - Fixed CHECK_IF_IN_TRAP broken by Stas' changes. + * Kasper Dupont + * + * 9 apr 2002 - Changed syntax of macros in handle_vm86_fault. + * Kasper Dupont + * + * 9 apr 2002 - Changed stack access macros to jump to a label + * instead of returning to userspace. This simplifies + * do_int, and is needed by handle_vm6_fault. Kasper + * Dupont + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * Known problems: + * + * Interrupt handling is not guaranteed: + * - a real x86 will disable all interrupts for one instruction + * after a "mov ss,xx" to make stack handling atomic even without + * the 'lss' instruction. We can't guarantee this in v86 mode, + * as the next instruction might result in a page fault or similar. + * - a real x86 will have interrupts disabled for one instruction + * past the 'sti' that enables them. We don't bother with all the + * details yet. + * + * Let's hope these problems do not actually matter for anything. + */ + + +#define KVM86 ((struct kernel_vm86_struct *)regs) +#define VMPI KVM86->vm86plus + + +/* + * 8- and 16-bit register defines.. + */ +#define AL(regs) (((unsigned char *)&((regs)->pt.eax))[0]) +#define AH(regs) (((unsigned char *)&((regs)->pt.eax))[1]) +#define IP(regs) (*(unsigned short *)&((regs)->pt.eip)) +#define SP(regs) (*(unsigned short *)&((regs)->pt.esp)) + +/* + * virtual flags (16 and 32-bit versions) + */ +#define VFLAGS (*(unsigned short *)&(current->thread.v86flags)) +#define VEFLAGS (current->thread.v86flags) + +#define set_flags(X,new,mask) \ +((X) = ((X) & ~(mask)) | ((new) & (mask))) + +#define SAFE_MASK (0xDD5) +#define RETURN_MASK (0xDFF) + +/* convert kernel_vm86_regs to vm86_regs */ +static int copy_vm86_regs_to_user(struct vm86_regs __user *user, + const struct kernel_vm86_regs *regs) +{ + int ret = 0; + + /* kernel_vm86_regs is missing xgs, so copy everything up to + (but not including) orig_eax, and then rest including orig_eax. */ + ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_eax)); + ret += copy_to_user(&user->orig_eax, ®s->pt.orig_eax, + sizeof(struct kernel_vm86_regs) - + offsetof(struct kernel_vm86_regs, pt.orig_eax)); + + return ret; +} + +/* convert vm86_regs to kernel_vm86_regs */ +static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs, + const struct vm86_regs __user *user, + unsigned extra) +{ + int ret = 0; + + /* copy eax-xfs inclusive */ + ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_eax)); + /* copy orig_eax-__gsh+extra */ + ret += copy_from_user(®s->pt.orig_eax, &user->orig_eax, + sizeof(struct kernel_vm86_regs) - + offsetof(struct kernel_vm86_regs, pt.orig_eax) + + extra); + return ret; +} + +struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs)); +struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) +{ + struct tss_struct *tss; + struct pt_regs *ret; + unsigned long tmp; + + /* + * This gets called from entry.S with interrupts disabled, but + * from process context. Enable interrupts here, before trying + * to access user space. + */ + local_irq_enable(); + + if (!current->thread.vm86_info) { + printk("no vm86_info: BAD\n"); + do_exit(SIGSEGV); + } + set_flags(regs->pt.eflags, VEFLAGS, VIF_MASK | current->thread.v86mask); + tmp = copy_vm86_regs_to_user(¤t->thread.vm86_info->regs,regs); + tmp += put_user(current->thread.screen_bitmap,¤t->thread.vm86_info->screen_bitmap); + if (tmp) { + printk("vm86: could not access userspace vm86_info\n"); + do_exit(SIGSEGV); + } + + tss = &per_cpu(init_tss, get_cpu()); + current->thread.esp0 = current->thread.saved_esp0; + current->thread.sysenter_cs = __KERNEL_CS; + load_esp0(tss, ¤t->thread); + current->thread.saved_esp0 = 0; + put_cpu(); + + ret = KVM86->regs32; + + ret->xfs = current->thread.saved_fs; + loadsegment(gs, current->thread.saved_gs); + + return ret; +} + +static void mark_screen_rdonly(struct mm_struct *mm) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + spinlock_t *ptl; + int i; + + pgd = pgd_offset(mm, 0xA0000); + if (pgd_none_or_clear_bad(pgd)) + goto out; + pud = pud_offset(pgd, 0xA0000); + if (pud_none_or_clear_bad(pud)) + goto out; + pmd = pmd_offset(pud, 0xA0000); + if (pmd_none_or_clear_bad(pmd)) + goto out; + pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl); + for (i = 0; i < 32; i++) { + if (pte_present(*pte)) + set_pte(pte, pte_wrprotect(*pte)); + pte++; + } + pte_unmap_unlock(pte, ptl); +out: + flush_tlb(); +} + + + +static int do_vm86_irq_handling(int subfunction, int irqnumber); +static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); + +asmlinkage int sys_vm86old(struct pt_regs regs) +{ + struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.ebx; + struct kernel_vm86_struct info; /* declare this _on top_, + * this avoids wasting of stack space. + * This remains on the stack until we + * return to 32 bit user space. + */ + struct task_struct *tsk; + int tmp, ret = -EPERM; + + tsk = current; + if (tsk->thread.saved_esp0) + goto out; + tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, + offsetof(struct kernel_vm86_struct, vm86plus) - + sizeof(info.regs)); + ret = -EFAULT; + if (tmp) + goto out; + memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); + info.regs32 = ®s; + tsk->thread.vm86_info = v86; + do_sys_vm86(&info, tsk); + ret = 0; /* we never return here */ +out: + return ret; +} + + +asmlinkage int sys_vm86(struct pt_regs regs) +{ + struct kernel_vm86_struct info; /* declare this _on top_, + * this avoids wasting of stack space. + * This remains on the stack until we + * return to 32 bit user space. + */ + struct task_struct *tsk; + int tmp, ret; + struct vm86plus_struct __user *v86; + + tsk = current; + switch (regs.ebx) { + case VM86_REQUEST_IRQ: + case VM86_FREE_IRQ: + case VM86_GET_IRQ_BITS: + case VM86_GET_AND_RESET_IRQ: + ret = do_vm86_irq_handling(regs.ebx, (int)regs.ecx); + goto out; + case VM86_PLUS_INSTALL_CHECK: + /* NOTE: on old vm86 stuff this will return the error + from access_ok(), because the subfunction is + interpreted as (invalid) address to vm86_struct. + So the installation check works. + */ + ret = 0; + goto out; + } + + /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */ + ret = -EPERM; + if (tsk->thread.saved_esp0) + goto out; + v86 = (struct vm86plus_struct __user *)regs.ecx; + tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, + offsetof(struct kernel_vm86_struct, regs32) - + sizeof(info.regs)); + ret = -EFAULT; + if (tmp) + goto out; + info.regs32 = ®s; + info.vm86plus.is_vm86pus = 1; + tsk->thread.vm86_info = (struct vm86_struct __user *)v86; + do_sys_vm86(&info, tsk); + ret = 0; /* we never return here */ +out: + return ret; +} + + +static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk) +{ + struct tss_struct *tss; +/* + * make sure the vm86() system call doesn't try to do anything silly + */ + info->regs.pt.xds = 0; + info->regs.pt.xes = 0; + info->regs.pt.xfs = 0; + +/* we are clearing gs later just before "jmp resume_userspace", + * because it is not saved/restored. + */ + +/* + * The eflags register is also special: we cannot trust that the user + * has set it up safely, so this makes sure interrupt etc flags are + * inherited from protected mode. + */ + VEFLAGS = info->regs.pt.eflags; + info->regs.pt.eflags &= SAFE_MASK; + info->regs.pt.eflags |= info->regs32->eflags & ~SAFE_MASK; + info->regs.pt.eflags |= VM_MASK; + + switch (info->cpu_type) { + case CPU_286: + tsk->thread.v86mask = 0; + break; + case CPU_386: + tsk->thread.v86mask = NT_MASK | IOPL_MASK; + break; + case CPU_486: + tsk->thread.v86mask = AC_MASK | NT_MASK | IOPL_MASK; + break; + default: + tsk->thread.v86mask = ID_MASK | AC_MASK | NT_MASK | IOPL_MASK; + break; + } + +/* + * Save old state, set default return value (%eax) to 0 + */ + info->regs32->eax = 0; + tsk->thread.saved_esp0 = tsk->thread.esp0; + tsk->thread.saved_fs = info->regs32->xfs; + savesegment(gs, tsk->thread.saved_gs); + + tss = &per_cpu(init_tss, get_cpu()); + tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; + if (cpu_has_sep) + tsk->thread.sysenter_cs = 0; + load_esp0(tss, &tsk->thread); + put_cpu(); + + tsk->thread.screen_bitmap = info->screen_bitmap; + if (info->flags & VM86_SCREEN_BITMAP) + mark_screen_rdonly(tsk->mm); + + /*call audit_syscall_exit since we do not exit via the normal paths */ + if (unlikely(current->audit_context)) + audit_syscall_exit(AUDITSC_RESULT(0), 0); + + __asm__ __volatile__( + "movl %0,%%esp\n\t" + "movl %1,%%ebp\n\t" + "mov %2, %%gs\n\t" + "jmp resume_userspace" + : /* no outputs */ + :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); + /* we never return here */ +} + +static inline void return_to_32bit(struct kernel_vm86_regs * regs16, int retval) +{ + struct pt_regs * regs32; + + regs32 = save_v86_state(regs16); + regs32->eax = retval; + __asm__ __volatile__("movl %0,%%esp\n\t" + "movl %1,%%ebp\n\t" + "jmp resume_userspace" + : : "r" (regs32), "r" (current_thread_info())); +} + +static inline void set_IF(struct kernel_vm86_regs * regs) +{ + VEFLAGS |= VIF_MASK; + if (VEFLAGS & VIP_MASK) + return_to_32bit(regs, VM86_STI); +} + +static inline void clear_IF(struct kernel_vm86_regs * regs) +{ + VEFLAGS &= ~VIF_MASK; +} + +static inline void clear_TF(struct kernel_vm86_regs * regs) +{ + regs->pt.eflags &= ~TF_MASK; +} + +static inline void clear_AC(struct kernel_vm86_regs * regs) +{ + regs->pt.eflags &= ~AC_MASK; +} + +/* It is correct to call set_IF(regs) from the set_vflags_* + * functions. However someone forgot to call clear_IF(regs) + * in the opposite case. + * After the command sequence CLI PUSHF STI POPF you should + * end up with interrups disabled, but you ended up with + * interrupts enabled. + * ( I was testing my own changes, but the only bug I + * could find was in a function I had not changed. ) + * [KD] + */ + +static inline void set_vflags_long(unsigned long eflags, struct kernel_vm86_regs * regs) +{ + set_flags(VEFLAGS, eflags, current->thread.v86mask); + set_flags(regs->pt.eflags, eflags, SAFE_MASK); + if (eflags & IF_MASK) + set_IF(regs); + else + clear_IF(regs); +} + +static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs * regs) +{ + set_flags(VFLAGS, flags, current->thread.v86mask); + set_flags(regs->pt.eflags, flags, SAFE_MASK); + if (flags & IF_MASK) + set_IF(regs); + else + clear_IF(regs); +} + +static inline unsigned long get_vflags(struct kernel_vm86_regs * regs) +{ + unsigned long flags = regs->pt.eflags & RETURN_MASK; + + if (VEFLAGS & VIF_MASK) + flags |= IF_MASK; + flags |= IOPL_MASK; + return flags | (VEFLAGS & current->thread.v86mask); +} + +static inline int is_revectored(int nr, struct revectored_struct * bitmap) +{ + __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0" + :"=r" (nr) + :"m" (*bitmap),"r" (nr)); + return nr; +} + +#define val_byte(val, n) (((__u8 *)&val)[n]) + +#define pushb(base, ptr, val, err_label) \ + do { \ + __u8 __val = val; \ + ptr--; \ + if (put_user(__val, base + ptr) < 0) \ + goto err_label; \ + } while(0) + +#define pushw(base, ptr, val, err_label) \ + do { \ + __u16 __val = val; \ + ptr--; \ + if (put_user(val_byte(__val, 1), base + ptr) < 0) \ + goto err_label; \ + ptr--; \ + if (put_user(val_byte(__val, 0), base + ptr) < 0) \ + goto err_label; \ + } while(0) + +#define pushl(base, ptr, val, err_label) \ + do { \ + __u32 __val = val; \ + ptr--; \ + if (put_user(val_byte(__val, 3), base + ptr) < 0) \ + goto err_label; \ + ptr--; \ + if (put_user(val_byte(__val, 2), base + ptr) < 0) \ + goto err_label; \ + ptr--; \ + if (put_user(val_byte(__val, 1), base + ptr) < 0) \ + goto err_label; \ + ptr--; \ + if (put_user(val_byte(__val, 0), base + ptr) < 0) \ + goto err_label; \ + } while(0) + +#define popb(base, ptr, err_label) \ + ({ \ + __u8 __res; \ + if (get_user(__res, base + ptr) < 0) \ + goto err_label; \ + ptr++; \ + __res; \ + }) + +#define popw(base, ptr, err_label) \ + ({ \ + __u16 __res; \ + if (get_user(val_byte(__res, 0), base + ptr) < 0) \ + goto err_label; \ + ptr++; \ + if (get_user(val_byte(__res, 1), base + ptr) < 0) \ + goto err_label; \ + ptr++; \ + __res; \ + }) + +#define popl(base, ptr, err_label) \ + ({ \ + __u32 __res; \ + if (get_user(val_byte(__res, 0), base + ptr) < 0) \ + goto err_label; \ + ptr++; \ + if (get_user(val_byte(__res, 1), base + ptr) < 0) \ + goto err_label; \ + ptr++; \ + if (get_user(val_byte(__res, 2), base + ptr) < 0) \ + goto err_label; \ + ptr++; \ + if (get_user(val_byte(__res, 3), base + ptr) < 0) \ + goto err_label; \ + ptr++; \ + __res; \ + }) + +/* There are so many possible reasons for this function to return + * VM86_INTx, so adding another doesn't bother me. We can expect + * userspace programs to be able to handle it. (Getting a problem + * in userspace is always better than an Oops anyway.) [KD] + */ +static void do_int(struct kernel_vm86_regs *regs, int i, + unsigned char __user * ssp, unsigned short sp) +{ + unsigned long __user *intr_ptr; + unsigned long segoffs; + + if (regs->pt.xcs == BIOSSEG) + goto cannot_handle; + if (is_revectored(i, &KVM86->int_revectored)) + goto cannot_handle; + if (i==0x21 && is_revectored(AH(regs),&KVM86->int21_revectored)) + goto cannot_handle; + intr_ptr = (unsigned long __user *) (i << 2); + if (get_user(segoffs, intr_ptr)) + goto cannot_handle; + if ((segoffs >> 16) == BIOSSEG) + goto cannot_handle; + pushw(ssp, sp, get_vflags(regs), cannot_handle); + pushw(ssp, sp, regs->pt.xcs, cannot_handle); + pushw(ssp, sp, IP(regs), cannot_handle); + regs->pt.xcs = segoffs >> 16; + SP(regs) -= 6; + IP(regs) = segoffs & 0xffff; + clear_TF(regs); + clear_IF(regs); + clear_AC(regs); + return; + +cannot_handle: + return_to_32bit(regs, VM86_INTx + (i << 8)); +} + +int handle_vm86_trap(struct kernel_vm86_regs * regs, long error_code, int trapno) +{ + if (VMPI.is_vm86pus) { + if ( (trapno==3) || (trapno==1) ) + return_to_32bit(regs, VM86_TRAP + (trapno << 8)); + do_int(regs, trapno, (unsigned char __user *) (regs->pt.xss << 4), SP(regs)); + return 0; + } + if (trapno !=1) + return 1; /* we let this handle by the calling routine */ + if (current->ptrace & PT_PTRACED) { + unsigned long flags; + spin_lock_irqsave(¤t->sighand->siglock, flags); + sigdelset(¤t->blocked, SIGTRAP); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + } + send_sig(SIGTRAP, current, 1); + current->thread.trap_no = trapno; + current->thread.error_code = error_code; + return 0; +} + +void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code) +{ + unsigned char opcode; + unsigned char __user *csp; + unsigned char __user *ssp; + unsigned short ip, sp, orig_flags; + int data32, pref_done; + +#define CHECK_IF_IN_TRAP \ + if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \ + newflags |= TF_MASK +#define VM86_FAULT_RETURN do { \ + if (VMPI.force_return_for_pic && (VEFLAGS & (IF_MASK | VIF_MASK))) \ + return_to_32bit(regs, VM86_PICRETURN); \ + if (orig_flags & TF_MASK) \ + handle_vm86_trap(regs, 0, 1); \ + return; } while (0) + + orig_flags = *(unsigned short *)®s->pt.eflags; + + csp = (unsigned char __user *) (regs->pt.xcs << 4); + ssp = (unsigned char __user *) (regs->pt.xss << 4); + sp = SP(regs); + ip = IP(regs); + + data32 = 0; + pref_done = 0; + do { + switch (opcode = popb(csp, ip, simulate_sigsegv)) { + case 0x66: /* 32-bit data */ data32=1; break; + case 0x67: /* 32-bit address */ break; + case 0x2e: /* CS */ break; + case 0x3e: /* DS */ break; + case 0x26: /* ES */ break; + case 0x36: /* SS */ break; + case 0x65: /* GS */ break; + case 0x64: /* FS */ break; + case 0xf2: /* repnz */ break; + case 0xf3: /* rep */ break; + default: pref_done = 1; + } + } while (!pref_done); + + switch (opcode) { + + /* pushf */ + case 0x9c: + if (data32) { + pushl(ssp, sp, get_vflags(regs), simulate_sigsegv); + SP(regs) -= 4; + } else { + pushw(ssp, sp, get_vflags(regs), simulate_sigsegv); + SP(regs) -= 2; + } + IP(regs) = ip; + VM86_FAULT_RETURN; + + /* popf */ + case 0x9d: + { + unsigned long newflags; + if (data32) { + newflags=popl(ssp, sp, simulate_sigsegv); + SP(regs) += 4; + } else { + newflags = popw(ssp, sp, simulate_sigsegv); + SP(regs) += 2; + } + IP(regs) = ip; + CHECK_IF_IN_TRAP; + if (data32) { + set_vflags_long(newflags, regs); + } else { + set_vflags_short(newflags, regs); + } + VM86_FAULT_RETURN; + } + + /* int xx */ + case 0xcd: { + int intno=popb(csp, ip, simulate_sigsegv); + IP(regs) = ip; + if (VMPI.vm86dbg_active) { + if ( (1 << (intno &7)) & VMPI.vm86dbg_intxxtab[intno >> 3] ) + return_to_32bit(regs, VM86_INTx + (intno << 8)); + } + do_int(regs, intno, ssp, sp); + return; + } + + /* iret */ + case 0xcf: + { + unsigned long newip; + unsigned long newcs; + unsigned long newflags; + if (data32) { + newip=popl(ssp, sp, simulate_sigsegv); + newcs=popl(ssp, sp, simulate_sigsegv); + newflags=popl(ssp, sp, simulate_sigsegv); + SP(regs) += 12; + } else { + newip = popw(ssp, sp, simulate_sigsegv); + newcs = popw(ssp, sp, simulate_sigsegv); + newflags = popw(ssp, sp, simulate_sigsegv); + SP(regs) += 6; + } + IP(regs) = newip; + regs->pt.xcs = newcs; + CHECK_IF_IN_TRAP; + if (data32) { + set_vflags_long(newflags, regs); + } else { + set_vflags_short(newflags, regs); + } + VM86_FAULT_RETURN; + } + + /* cli */ + case 0xfa: + IP(regs) = ip; + clear_IF(regs); + VM86_FAULT_RETURN; + + /* sti */ + /* + * Damn. This is incorrect: the 'sti' instruction should actually + * enable interrupts after the /next/ instruction. Not good. + * + * Probably needs some horsing around with the TF flag. Aiee.. + */ + case 0xfb: + IP(regs) = ip; + set_IF(regs); + VM86_FAULT_RETURN; + + default: + return_to_32bit(regs, VM86_UNKNOWN); + } + + return; + +simulate_sigsegv: + /* FIXME: After a long discussion with Stas we finally + * agreed, that this is wrong. Here we should + * really send a SIGSEGV to the user program. + * But how do we create the correct context? We + * are inside a general protection fault handler + * and has just returned from a page fault handler. + * The correct context for the signal handler + * should be a mixture of the two, but how do we + * get the information? [KD] + */ + return_to_32bit(regs, VM86_UNKNOWN); +} + +/* ---------------- vm86 special IRQ passing stuff ----------------- */ + +#define VM86_IRQNAME "vm86irq" + +static struct vm86_irqs { + struct task_struct *tsk; + int sig; +} vm86_irqs[16]; + +static DEFINE_SPINLOCK(irqbits_lock); +static int irqbits; + +#define ALLOWED_SIGS ( 1 /* 0 = don't send a signal */ \ + | (1 << SIGUSR1) | (1 << SIGUSR2) | (1 << SIGIO) | (1 << SIGURG) \ + | (1 << SIGUNUSED) ) + +static irqreturn_t irq_handler(int intno, void *dev_id) +{ + int irq_bit; + unsigned long flags; + + spin_lock_irqsave(&irqbits_lock, flags); + irq_bit = 1 << intno; + if ((irqbits & irq_bit) || ! vm86_irqs[intno].tsk) + goto out; + irqbits |= irq_bit; + if (vm86_irqs[intno].sig) + send_sig(vm86_irqs[intno].sig, vm86_irqs[intno].tsk, 1); + /* + * IRQ will be re-enabled when user asks for the irq (whether + * polling or as a result of the signal) + */ + disable_irq_nosync(intno); + spin_unlock_irqrestore(&irqbits_lock, flags); + return IRQ_HANDLED; + +out: + spin_unlock_irqrestore(&irqbits_lock, flags); + return IRQ_NONE; +} + +static inline void free_vm86_irq(int irqnumber) +{ + unsigned long flags; + + free_irq(irqnumber, NULL); + vm86_irqs[irqnumber].tsk = NULL; + + spin_lock_irqsave(&irqbits_lock, flags); + irqbits &= ~(1 << irqnumber); + spin_unlock_irqrestore(&irqbits_lock, flags); +} + +void release_vm86_irqs(struct task_struct *task) +{ + int i; + for (i = FIRST_VM86_IRQ ; i <= LAST_VM86_IRQ; i++) + if (vm86_irqs[i].tsk == task) + free_vm86_irq(i); +} + +static inline int get_and_reset_irq(int irqnumber) +{ + int bit; + unsigned long flags; + int ret = 0; + + if (invalid_vm86_irq(irqnumber)) return 0; + if (vm86_irqs[irqnumber].tsk != current) return 0; + spin_lock_irqsave(&irqbits_lock, flags); + bit = irqbits & (1 << irqnumber); + irqbits &= ~bit; + if (bit) { + enable_irq(irqnumber); + ret = 1; + } + + spin_unlock_irqrestore(&irqbits_lock, flags); + return ret; +} + + +static int do_vm86_irq_handling(int subfunction, int irqnumber) +{ + int ret; + switch (subfunction) { + case VM86_GET_AND_RESET_IRQ: { + return get_and_reset_irq(irqnumber); + } + case VM86_GET_IRQ_BITS: { + return irqbits; + } + case VM86_REQUEST_IRQ: { + int sig = irqnumber >> 8; + int irq = irqnumber & 255; + if (!capable(CAP_SYS_ADMIN)) return -EPERM; + if (!((1 << sig) & ALLOWED_SIGS)) return -EPERM; + if (invalid_vm86_irq(irq)) return -EPERM; + if (vm86_irqs[irq].tsk) return -EPERM; + ret = request_irq(irq, &irq_handler, 0, VM86_IRQNAME, NULL); + if (ret) return ret; + vm86_irqs[irq].sig = sig; + vm86_irqs[irq].tsk = current; + return irq; + } + case VM86_FREE_IRQ: { + if (invalid_vm86_irq(irqnumber)) return -EPERM; + if (!vm86_irqs[irqnumber].tsk) return 0; + if (vm86_irqs[irqnumber].tsk != current) return -EPERM; + free_vm86_irq(irqnumber); + return 0; + } + } + return -EINVAL; +} +