x86: SGI UV: Fix irq affinity for hub based interrupts
authorDimitri Sivanich <sivanich@sgi.com>
Wed, 30 Sep 2009 16:02:59 +0000 (11:02 -0500)
committerIngo Molnar <mingo@elte.hu>
Wed, 14 Oct 2009 07:17:01 +0000 (09:17 +0200)
This patch fixes handling of uv hub irq affinity.  IRQs with ALL or
NODE affinity can be routed to cpus other than their originally
assigned cpu.  Those with CPU affinity cannot be rerouted.

Signed-off-by: Dimitri Sivanich <sivanich@sgi.com>
LKML-Reference: <20090930160259.GA7822@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
arch/x86/include/asm/uv/uv_irq.h
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/uv_irq.c
drivers/misc/sgi-xp/xpc_uv.c

index 9613c8c0b64779862316b72a52b3d4b263d9d5f5..5397e12909528d8ea2e7777182640425dfde612e 100644 (file)
@@ -25,12 +25,21 @@ struct uv_IO_APIC_route_entry {
                dest            : 32;
 };
 
+enum {
+       UV_AFFINITY_ALL,
+       UV_AFFINITY_NODE,
+       UV_AFFINITY_CPU
+};
+
 extern struct irq_chip uv_irq_chip;
 
-extern int arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long);
+extern int
+arch_enable_uv_irq(char *, unsigned int, int, int, unsigned long, int);
 extern void arch_disable_uv_irq(int, unsigned long);
+extern int uv_set_irq_affinity(unsigned int, const struct cpumask *);
 
-extern int uv_setup_irq(char *, int, int, unsigned long);
-extern void uv_teardown_irq(unsigned int, int, unsigned long);
+extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
+extern int uv_setup_irq(char *, int, int, unsigned long, int);
+extern void uv_teardown_irq(unsigned int);
 
 #endif /* _ASM_X86_UV_UV_IRQ_H */
index 8c718c93d079181a7c7267179c4dddadf035edff..bb52e7f6e95387127e06b81bec6a9d39d472fa00 100644 (file)
@@ -3731,9 +3731,10 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
  * on the specified blade to allow the sending of MSIs to the specified CPU.
  */
 int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
-                      unsigned long mmr_offset)
+                      unsigned long mmr_offset, int restrict)
 {
        const struct cpumask *eligible_cpu = cpumask_of(cpu);
+       struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
        int mmr_pnode;
        unsigned long mmr_value;
@@ -3749,6 +3750,11 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
        if (err != 0)
                return err;
 
+       if (restrict == UV_AFFINITY_CPU)
+               desc->status |= IRQ_NO_BALANCING;
+       else
+               desc->status |= IRQ_MOVE_PCNTXT;
+
        spin_lock_irqsave(&vector_lock, flags);
        set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
                                      irq_name);
@@ -3777,11 +3783,10 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
  * Disable the specified MMR located on the specified blade so that MSIs are
  * longer allowed to be sent.
  */
-void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
+void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset)
 {
        unsigned long mmr_value;
        struct uv_IO_APIC_route_entry *entry;
-       int mmr_pnode;
 
        BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
 
@@ -3789,9 +3794,45 @@ void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
        entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
        entry->mask = 1;
 
-       mmr_pnode = uv_blade_to_pnode(mmr_blade);
        uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
 }
+
+int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+       struct irq_cfg *cfg = desc->chip_data;
+       unsigned int dest;
+       unsigned long mmr_value;
+       struct uv_IO_APIC_route_entry *entry;
+       unsigned long mmr_offset;
+       unsigned mmr_pnode;
+
+       dest = set_desc_affinity(desc, mask);
+       if (dest == BAD_APICID)
+               return -1;
+
+       mmr_value = 0;
+       entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+
+       entry->vector = cfg->vector;
+       entry->delivery_mode = apic->irq_delivery_mode;
+       entry->dest_mode = apic->irq_dest_mode;
+       entry->polarity = 0;
+       entry->trigger = 0;
+       entry->mask = 0;
+       entry->dest = dest;
+
+       /* Get previously stored MMR and pnode of hub sourcing interrupts */
+       if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode))
+               return -1;
+
+       uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+
+       if (cfg->move_in_progress)
+               send_cleanup_vector(cfg);
+
+       return 0;
+}
 #endif /* CONFIG_X86_64 */
 
 int __init io_apic_get_redir_entries (int ioapic)
index aeef529917e44f896d6e05c77c2b6232e84c8361..9a83775ab0f30d391835ab98c0875399c761941f 100644 (file)
@@ -9,10 +9,22 @@
  */
 
 #include <linux/module.h>
+#include <linux/rbtree.h>
 #include <linux/irq.h>
 
 #include <asm/apic.h>
 #include <asm/uv/uv_irq.h>
+#include <asm/uv/uv_hub.h>
+
+/* MMR offset and pnode of hub sourcing interrupts for a given irq */
+struct uv_irq_2_mmr_pnode{
+       struct rb_node list;
+       unsigned long offset;
+       int pnode;
+       int irq;
+};
+static spinlock_t uv_irq_lock;
+static struct rb_root uv_irq_root;
 
 static void uv_noop(unsigned int irq)
 {
@@ -39,25 +51,106 @@ struct irq_chip uv_irq_chip = {
        .unmask         = uv_noop,
        .eoi            = uv_ack_apic,
        .end            = uv_noop,
+       .set_affinity   = uv_set_irq_affinity,
 };
 
+/*
+ * Add offset and pnode information of the hub sourcing interrupts to the
+ * rb tree for a specific irq.
+ */
+static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade)
+{
+       struct rb_node **link = &uv_irq_root.rb_node;
+       struct rb_node *parent = NULL;
+       struct uv_irq_2_mmr_pnode *n;
+       struct uv_irq_2_mmr_pnode *e;
+       unsigned long irqflags;
+
+       n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL,
+                               uv_blade_to_memory_nid(blade));
+       if (!n)
+               return -ENOMEM;
+
+       n->irq = irq;
+       n->offset = offset;
+       n->pnode = uv_blade_to_pnode(blade);
+       spin_lock_irqsave(&uv_irq_lock, irqflags);
+       /* Find the right place in the rbtree: */
+       while (*link) {
+               parent = *link;
+               e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list);
+
+               if (unlikely(irq == e->irq)) {
+                       /* irq entry exists */
+                       e->pnode = uv_blade_to_pnode(blade);
+                       e->offset = offset;
+                       spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+                       kfree(n);
+                       return 0;
+               }
+
+               if (irq < e->irq)
+                       link = &(*link)->rb_left;
+               else
+                       link = &(*link)->rb_right;
+       }
+
+       /* Insert the node into the rbtree. */
+       rb_link_node(&n->list, parent, link);
+       rb_insert_color(&n->list, &uv_irq_root);
+
+       spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+       return 0;
+}
+
+/* Retrieve offset and pnode information from the rb tree for a specific irq */
+int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode)
+{
+       struct uv_irq_2_mmr_pnode *e;
+       struct rb_node *n;
+       unsigned long irqflags;
+
+       spin_lock_irqsave(&uv_irq_lock, irqflags);
+       n = uv_irq_root.rb_node;
+       while (n) {
+               e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
+
+               if (e->irq == irq) {
+                       *offset = e->offset;
+                       *pnode = e->pnode;
+                       spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+                       return 0;
+               }
+
+               if (irq < e->irq)
+                       n = n->rb_left;
+               else
+                       n = n->rb_right;
+       }
+       spin_unlock_irqrestore(&uv_irq_lock, irqflags);
+       return -1;
+}
+
 /*
  * Set up a mapping of an available irq and vector, and enable the specified
  * MMR that defines the MSI that is to be sent to the specified CPU when an
  * interrupt is raised.
  */
 int uv_setup_irq(char *irq_name, int cpu, int mmr_blade,
-                unsigned long mmr_offset)
+                unsigned long mmr_offset, int restrict)
 {
-       int irq;
-       int ret;
+       int irq, ret;
+
+       irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade));
 
-       irq = create_irq();
        if (irq <= 0)
                return -EBUSY;
 
-       ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset);
-       if (ret != irq)
+       ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset,
+               restrict);
+       if (ret == irq)
+               uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade);
+       else
                destroy_irq(irq);
 
        return ret;
@@ -71,9 +164,28 @@ EXPORT_SYMBOL_GPL(uv_setup_irq);
  *
  * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq().
  */
-void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset)
+void uv_teardown_irq(unsigned int irq)
 {
-       arch_disable_uv_irq(mmr_blade, mmr_offset);
+       struct uv_irq_2_mmr_pnode *e;
+       struct rb_node *n;
+       unsigned long irqflags;
+
+       spin_lock_irqsave(&uv_irq_lock, irqflags);
+       n = uv_irq_root.rb_node;
+       while (n) {
+               e = rb_entry(n, struct uv_irq_2_mmr_pnode, list);
+               if (e->irq == irq) {
+                       arch_disable_uv_irq(e->pnode, e->offset);
+                       rb_erase(n, &uv_irq_root);
+                       kfree(e);
+                       break;
+               }
+               if (irq < e->irq)
+                       n = n->rb_left;
+               else
+                       n = n->rb_right;
+       }
+       spin_unlock_irqrestore(&uv_irq_lock, irqflags);
        destroy_irq(irq);
 }
 EXPORT_SYMBOL_GPL(uv_teardown_irq);
index c76677afda1b50591b855759fa711253d99458ee..b5bbe59f9c5729c964f3749bf9e94883256da6d2 100644 (file)
@@ -106,7 +106,8 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name)
        int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade);
 
 #if defined CONFIG_X86_64
-       mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset);
+       mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset,
+                       UV_AFFINITY_CPU);
        if (mq->irq < 0) {
                dev_err(xpc_part, "uv_setup_irq() returned error=%d\n",
                        -mq->irq);
@@ -136,7 +137,7 @@ static void
 xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq)
 {
 #if defined CONFIG_X86_64
-       uv_teardown_irq(mq->irq, mq->mmr_blade, mq->mmr_offset);
+       uv_teardown_irq(mq->irq);
 
 #elif defined CONFIG_IA64_GENERIC || defined CONFIG_IA64_SGI_UV
        int mmr_pnode;