KVM: In-kernel I/O APIC model
authorEddie Dong <eddie.dong@intel.com>
Wed, 18 Jul 2007 09:03:39 +0000 (12:03 +0300)
committerAvi Kivity <avi@qumranet.com>
Sat, 13 Oct 2007 08:18:25 +0000 (10:18 +0200)
This allows in-kernel host-side device drivers to raise guest interrupts
without going to userspace.

[avi: fix level-triggered interrupt redelivery on eoi]
[avi: add missing #include]
[avi: avoid redelivery of edge-triggered interrupt]
[avi: implement polarity]
[avi: don't deliver edge-triggered interrupts when unmasking]
[avi: fix host oops on invalid guest access]

Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
drivers/kvm/Makefile
drivers/kvm/ioapic.c [new file with mode: 0644]
drivers/kvm/irq.c
drivers/kvm/irq.h
drivers/kvm/kvm.h
drivers/kvm/kvm_main.c

index 3bf7276b032d61f5f15e13299c4da68dc15957fc..e5a8f4d3e97386f0ba73e629b8d2a13341fc5dd0 100644 (file)
@@ -2,7 +2,7 @@
 # Makefile for Kernel-based Virtual Machine module
 #
 
-kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
+kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o ioapic.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/drivers/kvm/ioapic.c b/drivers/kvm/ioapic.c
new file mode 100644 (file)
index 0000000..c7992e6
--- /dev/null
@@ -0,0 +1,388 @@
+/*
+ *  Copyright (C) 2001  MandrakeSoft S.A.
+ *
+ *    MandrakeSoft S.A.
+ *    43, rue d'Aboukir
+ *    75002 Paris - France
+ *    http://www.linux-mandrake.com/
+ *    http://www.mandrakesoft.com/
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this library; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *  Yunhong Jiang <yunhong.jiang@intel.com>
+ *  Yaozu (Eddie) Dong <eddie.dong@intel.com>
+ *  Based on Xen 3.1 code.
+ */
+
+#include "kvm.h"
+#include <linux/kvm.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/smp.h>
+#include <linux/hrtimer.h>
+#include <linux/io.h>
+#include <asm/processor.h>
+#include <asm/msr.h>
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/apicdef.h>
+#include <asm/io_apic.h>
+#include "irq.h"
+/* #define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
+#define ioapic_debug(fmt, arg...)
+static void ioapic_deliver(struct kvm_ioapic *vioapic, int irq);
+
+static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
+                                         unsigned long addr,
+                                         unsigned long length)
+{
+       unsigned long result = 0;
+
+       switch (ioapic->ioregsel) {
+       case IOAPIC_REG_VERSION:
+               result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16)
+                         | (IOAPIC_VERSION_ID & 0xff));
+               break;
+
+       case IOAPIC_REG_APIC_ID:
+       case IOAPIC_REG_ARB_ID:
+               result = ((ioapic->id & 0xf) << 24);
+               break;
+
+       default:
+               {
+                       u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
+                       u64 redir_content;
+
+                       ASSERT(redir_index < IOAPIC_NUM_PINS);
+
+                       redir_content = ioapic->redirtbl[redir_index].bits;
+                       result = (ioapic->ioregsel & 0x1) ?
+                           (redir_content >> 32) & 0xffffffff :
+                           redir_content & 0xffffffff;
+                       break;
+               }
+       }
+
+       return result;
+}
+
+static void ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
+{
+       union ioapic_redir_entry *pent;
+
+       pent = &ioapic->redirtbl[idx];
+
+       if (!pent->fields.mask) {
+               ioapic_deliver(ioapic, idx);
+               if (pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
+                       pent->fields.remote_irr = 1;
+       }
+       if (!pent->fields.trig_mode)
+               ioapic->irr &= ~(1 << idx);
+}
+
+static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
+{
+       unsigned index;
+
+       switch (ioapic->ioregsel) {
+       case IOAPIC_REG_VERSION:
+               /* Writes are ignored. */
+               break;
+
+       case IOAPIC_REG_APIC_ID:
+               ioapic->id = (val >> 24) & 0xf;
+               break;
+
+       case IOAPIC_REG_ARB_ID:
+               break;
+
+       default:
+               index = (ioapic->ioregsel - 0x10) >> 1;
+
+               ioapic_debug("change redir index %x val %x", index, val);
+               if (index >= IOAPIC_NUM_PINS)
+                       return;
+               if (ioapic->ioregsel & 1) {
+                       ioapic->redirtbl[index].bits &= 0xffffffff;
+                       ioapic->redirtbl[index].bits |= (u64) val << 32;
+               } else {
+                       ioapic->redirtbl[index].bits &= ~0xffffffffULL;
+                       ioapic->redirtbl[index].bits |= (u32) val;
+                       ioapic->redirtbl[index].fields.remote_irr = 0;
+               }
+               if (ioapic->irr & (1 << index))
+                       ioapic_service(ioapic, index);
+               break;
+       }
+}
+
+static void ioapic_inj_irq(struct kvm_ioapic *ioapic,
+                          struct kvm_lapic *target,
+                          u8 vector, u8 trig_mode, u8 delivery_mode)
+{
+       ioapic_debug("irq %d trig %d deliv %d", vector, trig_mode,
+                    delivery_mode);
+
+       ASSERT((delivery_mode == dest_Fixed) ||
+              (delivery_mode == dest_LowestPrio));
+
+       kvm_apic_set_irq(target, vector, trig_mode);
+}
+
+static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
+                                      u8 dest_mode)
+{
+       u32 mask = 0;
+       int i;
+       struct kvm *kvm = ioapic->kvm;
+       struct kvm_vcpu *vcpu;
+
+       ioapic_debug("dest %d dest_mode %d", dest, dest_mode);
+
+       if (dest_mode == 0) {   /* Physical mode. */
+               if (dest == 0xFF) {     /* Broadcast. */
+                       for (i = 0; i < KVM_MAX_VCPUS; ++i)
+                               if (kvm->vcpus[i] && kvm->vcpus[i]->apic)
+                                       mask |= 1 << i;
+                       return mask;
+               }
+               for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+                       vcpu = kvm->vcpus[i];
+                       if (!vcpu)
+                               continue;
+                       if (kvm_apic_match_physical_addr(vcpu->apic, dest)) {
+                               if (vcpu->apic)
+                                       mask = 1 << i;
+                               break;
+                       }
+               }
+       } else if (dest != 0)   /* Logical mode, MDA non-zero. */
+               for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+                       vcpu = kvm->vcpus[i];
+                       if (!vcpu)
+                               continue;
+                       if (vcpu->apic &&
+                           kvm_apic_match_logical_addr(vcpu->apic, dest))
+                               mask |= 1 << vcpu->vcpu_id;
+               }
+       ioapic_debug("mask %x", mask);
+       return mask;
+}
+
+static void ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
+{
+       u8 dest = ioapic->redirtbl[irq].fields.dest_id;
+       u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode;
+       u8 delivery_mode = ioapic->redirtbl[irq].fields.delivery_mode;
+       u8 vector = ioapic->redirtbl[irq].fields.vector;
+       u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode;
+       u32 deliver_bitmask;
+       struct kvm_lapic *target;
+       struct kvm_vcpu *vcpu;
+       int vcpu_id;
+
+       ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
+                    "vector=%x trig_mode=%x",
+                    dest, dest_mode, delivery_mode, vector, trig_mode);
+
+       deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode);
+       if (!deliver_bitmask) {
+               ioapic_debug("no target on destination");
+               return;
+       }
+
+       switch (delivery_mode) {
+       case dest_LowestPrio:
+               target =
+                   kvm_apic_round_robin(ioapic->kvm, vector, deliver_bitmask);
+               if (target != NULL)
+                       ioapic_inj_irq(ioapic, target, vector,
+                                      trig_mode, delivery_mode);
+               else
+                       ioapic_debug("null round robin: "
+                                    "mask=%x vector=%x delivery_mode=%x",
+                                    deliver_bitmask, vector, dest_LowestPrio);
+               break;
+       case dest_Fixed:
+               for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) {
+                       if (!(deliver_bitmask & (1 << vcpu_id)))
+                               continue;
+                       deliver_bitmask &= ~(1 << vcpu_id);
+                       vcpu = ioapic->kvm->vcpus[vcpu_id];
+                       if (vcpu) {
+                               target = vcpu->apic;
+                               ioapic_inj_irq(ioapic, target, vector,
+                                              trig_mode, delivery_mode);
+                       }
+               }
+               break;
+
+               /* TODO: NMI */
+       default:
+               printk(KERN_WARNING "Unsupported delivery mode %d\n",
+                      delivery_mode);
+               break;
+       }
+}
+
+void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
+{
+       u32 old_irr = ioapic->irr;
+       u32 mask = 1 << irq;
+       union ioapic_redir_entry entry;
+
+       if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
+               entry = ioapic->redirtbl[irq];
+               level ^= entry.fields.polarity;
+               if (!level)
+                       ioapic->irr &= ~mask;
+               else {
+                       ioapic->irr |= mask;
+                       if ((!entry.fields.trig_mode && old_irr != ioapic->irr)
+                           || !entry.fields.remote_irr)
+                               ioapic_service(ioapic, irq);
+               }
+       }
+}
+
+static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector)
+{
+       int i;
+
+       for (i = 0; i < IOAPIC_NUM_PINS; i++)
+               if (ioapic->redirtbl[i].fields.vector == vector)
+                       return i;
+       return -1;
+}
+
+void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
+{
+       struct kvm_ioapic *ioapic = kvm->vioapic;
+       union ioapic_redir_entry *ent;
+       int gsi;
+
+       gsi = get_eoi_gsi(ioapic, vector);
+       if (gsi == -1) {
+               printk(KERN_WARNING "Can't find redir item for %d EOI\n",
+                      vector);
+               return;
+       }
+
+       ent = &ioapic->redirtbl[gsi];
+       ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
+
+       ent->fields.remote_irr = 0;
+       if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
+               ioapic_deliver(ioapic, gsi);
+}
+
+static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+       struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
+
+       return ((addr >= ioapic->base_address &&
+                (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
+}
+
+static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
+                            void *val)
+{
+       struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
+       u32 result;
+
+       ioapic_debug("addr %lx", (unsigned long)addr);
+       ASSERT(!(addr & 0xf));  /* check alignment */
+
+       addr &= 0xff;
+       switch (addr) {
+       case IOAPIC_REG_SELECT:
+               result = ioapic->ioregsel;
+               break;
+
+       case IOAPIC_REG_WINDOW:
+               result = ioapic_read_indirect(ioapic, addr, len);
+               break;
+
+       default:
+               result = 0;
+               break;
+       }
+       switch (len) {
+       case 8:
+               *(u64 *) val = result;
+               break;
+       case 1:
+       case 2:
+       case 4:
+               memcpy(val, (char *)&result, len);
+               break;
+       default:
+               printk(KERN_WARNING "ioapic: wrong length %d\n", len);
+       }
+}
+
+static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
+                             const void *val)
+{
+       struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
+       u32 data;
+
+       ioapic_debug("ioapic_mmio_write addr=%lx len=%d val=%p\n",
+                    addr, len, val);
+       ASSERT(!(addr & 0xf));  /* check alignment */
+       if (len == 4 || len == 8)
+               data = *(u32 *) val;
+       else {
+               printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
+               return;
+       }
+
+       addr &= 0xff;
+       switch (addr) {
+       case IOAPIC_REG_SELECT:
+               ioapic->ioregsel = data;
+               break;
+
+       case IOAPIC_REG_WINDOW:
+               ioapic_write_indirect(ioapic, data);
+               break;
+
+       default:
+               break;
+       }
+}
+
+int kvm_ioapic_init(struct kvm *kvm)
+{
+       struct kvm_ioapic *ioapic;
+       int i;
+
+       ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
+       if (!ioapic)
+               return -ENOMEM;
+       kvm->vioapic = ioapic;
+       for (i = 0; i < IOAPIC_NUM_PINS; i++)
+               ioapic->redirtbl[i].fields.mask = 1;
+       ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
+       ioapic->dev.read = ioapic_mmio_read;
+       ioapic->dev.write = ioapic_mmio_write;
+       ioapic->dev.in_range = ioapic_in_range;
+       ioapic->dev.private = ioapic;
+       ioapic->kvm = kvm;
+       kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev);
+       return 0;
+}
index 0b4430a0cae078fea81910db40f207a6cf0f757c..5265f8267b3b6e08e131c36c1aca443e929e60bf 100644 (file)
@@ -74,7 +74,3 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
                smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
 }
 
-void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
-{
-       /* TODO: for kernel IOAPIC */
-}
index 57e23bdac530b933003239dd7d6a2092af271550..6ed856a41e231215e26175e31b812637ec588ddb 100644 (file)
@@ -60,6 +60,50 @@ int kvm_pic_read_irq(struct kvm_pic *s);
 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 
+#define IOAPIC_NUM_PINS  24
+#define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */
+#define IOAPIC_EDGE_TRIG  0
+#define IOAPIC_LEVEL_TRIG 1
+
+#define IOAPIC_DEFAULT_BASE_ADDRESS  0xfec00000
+#define IOAPIC_MEM_LENGTH            0x100
+
+/* Direct registers. */
+#define IOAPIC_REG_SELECT  0x00
+#define IOAPIC_REG_WINDOW  0x10
+#define IOAPIC_REG_EOI     0x40        /* IA64 IOSAPIC only */
+
+/* Indirect registers. */
+#define IOAPIC_REG_APIC_ID 0x00        /* x86 IOAPIC only */
+#define IOAPIC_REG_VERSION 0x01
+#define IOAPIC_REG_ARB_ID  0x02        /* x86 IOAPIC only */
+
+struct kvm_ioapic {
+       u64 base_address;
+       u32 ioregsel;
+       u32 id;
+       u32 irr;
+       u32 pad;
+       union ioapic_redir_entry {
+               u64 bits;
+               struct {
+                       u8 vector;
+                       u8 delivery_mode:3;
+                       u8 dest_mode:1;
+                       u8 delivery_status:1;
+                       u8 polarity:1;
+                       u8 remote_irr:1;
+                       u8 trig_mode:1;
+                       u8 mask:1;
+                       u8 reserve:7;
+                       u8 reserved[4];
+                       u8 dest_id;
+               } fields;
+       } redirtbl[IOAPIC_NUM_PINS];
+       struct kvm_io_device dev;
+       struct kvm *kvm;
+};
+
 struct kvm_lapic {
        unsigned long base_address;
        struct kvm_io_device dev;
@@ -96,8 +140,15 @@ void kvm_free_apic(struct kvm_lapic *apic);
 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
+struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
+                                      unsigned long bitmap);
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector);
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
+int kvm_apic_set_irq(struct kvm_lapic *apic, u8 vec, u8 trig);
+int kvm_ioapic_init(struct kvm *kvm);
+void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
 
 #endif
index a5790cb21ffc10eb5f8174732ee6123dbd81a232..8d07a993af94ec02703726a896f0f5ac5c4555ea 100644 (file)
@@ -410,6 +410,7 @@ struct kvm {
        struct kvm_io_bus mmio_bus;
        struct kvm_io_bus pio_bus;
        struct kvm_pic *vpic;
+       struct kvm_ioapic *vioapic;
 };
 
 static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
@@ -417,6 +418,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
        return kvm->vpic;
 }
 
+static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
+{
+       return kvm->vioapic;
+}
+
 static inline int irqchip_in_kernel(struct kvm *kvm)
 {
        return pic_irqchip(kvm) != 0;
index 401e3cdc46073c14ead35a97d723d7d5302260c5..ffbdadd879713de75d7ec182707d0817de47829a 100644 (file)
@@ -381,6 +381,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
        kvm_io_bus_destroy(&kvm->pio_bus);
        kvm_io_bus_destroy(&kvm->mmio_bus);
        kfree(kvm->vpic);
+       kfree(kvm->vioapic);
        kvm_free_vcpus(kvm);
        kvm_free_physmem(kvm);
        kfree(kvm);
@@ -2771,8 +2772,14 @@ static long kvm_vm_ioctl(struct file *filp,
        case KVM_CREATE_IRQCHIP:
                r = -ENOMEM;
                kvm->vpic = kvm_create_pic(kvm);
-               if (kvm->vpic)
-                       r = 0;
+               if (kvm->vpic) {
+                       r = kvm_ioapic_init(kvm);
+                       if (r) {
+                               kfree(kvm->vpic);
+                               kvm->vpic = NULL;
+                               goto out;
+                       }
+               }
                else
                        goto out;
                break;
@@ -2787,7 +2794,9 @@ static long kvm_vm_ioctl(struct file *filp,
                                kvm_pic_set_irq(pic_irqchip(kvm),
                                        irq_event.irq,
                                        irq_event.level);
-                       /* TODO: IOAPIC */
+                       kvm_ioapic_set_irq(kvm->vioapic,
+                                       irq_event.irq,
+                                       irq_event.level);
                        r = 0;
                }
                break;