IB/hfi1: Optimize pio_buf and send_context structs
authorSebastian Sanchez <sebastian.sanchez@intel.com>
Tue, 25 Oct 2016 20:12:34 +0000 (13:12 -0700)
committerDoug Ledford <dledford@redhat.com>
Tue, 15 Nov 2016 21:37:27 +0000 (16:37 -0500)
Both pio_buf and send_context structs have oversized
fields and have cachelines that can be optimized.

Reduce oversized fields for both structs.
Make sure pio_buf struct fits within a cacheline.
Move read-only fields to their own cacheline in
send_context struct.

All of this will avoid cacheline trading as the ring
progresses and pio buffers/send contexts are used.

Reviewed-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Sebastian Sanchez <sebastian.sanchez@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
drivers/infiniband/hw/hfi1/pio.c
drivers/infiniband/hw/hfi1/pio.h
drivers/infiniband/hw/hfi1/pio_copy.c

index 516fac38d31ee33b47806cefd1203a6342edbe4d..86a7f365b62481cc92ead966653feb7a47d66cf2 100644 (file)
@@ -765,6 +765,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
        sc->hw_context = hw_context;
        cr_group_addresses(sc, &dma);
        sc->credits = sci->credits;
+       sc->size = sc->credits * PIO_BLOCK_SIZE;
 
 /* PIO Send Memory Address details */
 #define PIO_ADDR_CONTEXT_MASK 0xfful
@@ -1470,9 +1471,7 @@ retry:
 
        /* finish filling in the buffer outside the lock */
        pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE;
-       pbuf->size = sc->credits * PIO_BLOCK_SIZE;
-       pbuf->end = sc->base_addr + pbuf->size;
-       pbuf->block_count = blocks;
+       pbuf->end = sc->base_addr + sc->size;
        pbuf->qw_written = 0;
        pbuf->carry_bytes = 0;
        pbuf->carry.val64 = 0;
index 498b548055e00804f6dc7ea7c72476b36d4428ca..867e5ffc35952238eb5f3a6b231e0e71e5252dc5 100644 (file)
@@ -83,43 +83,43 @@ struct pio_buf {
        void *arg;              /* argument for cb */
        void __iomem *start;    /* buffer start address */
        void __iomem *end;      /* context end address */
-       unsigned long size;     /* context size, in bytes */
        unsigned long sent_at;  /* buffer is sent when <= free */
-       u32 block_count;        /* size of buffer, in blocks */
-       u32 qw_written;         /* QW written so far */
-       u32 carry_bytes;        /* number of valid bytes in carry */
        union mix carry;        /* pending unwritten bytes */
+       u16 qw_written;         /* QW written so far */
+       u8 carry_bytes; /* number of valid bytes in carry */
 };
 
 /* cache line aligned pio buffer array */
 union pio_shadow_ring {
        struct pio_buf pbuf;
-       u64 unused[16];         /* cache line spacer */
 } ____cacheline_aligned;
 
 /* per-NUMA send context */
 struct send_context {
        /* read-only after init */
        struct hfi1_devdata *dd;                /* device */
-       void __iomem *base_addr;        /* start of PIO memory */
        union pio_shadow_ring *sr;      /* shadow ring */
+       void __iomem *base_addr;        /* start of PIO memory */
+       u32 __percpu *buffers_allocated;/* count of buffers allocated */
+       u32 size;                       /* context size, in bytes */
 
-       struct work_struct halt_work;   /* halted context work queue entry */
-       unsigned long flags;            /* flags */
        int node;                       /* context home node */
-       int type;                       /* context type */
-       u32 sw_index;                   /* software index number */
-       u32 hw_context;                 /* hardware context number */
-       u32 credits;                    /* number of blocks in context */
        u32 sr_size;                    /* size of the shadow ring */
-       u32 group;                      /* credit return group */
+       u16 flags;                      /* flags */
+       u8  type;                       /* context type */
+       u8  sw_index;                   /* software index number */
+       u8  hw_context;                 /* hardware context number */
+       u8  group;                      /* credit return group */
+
        /* allocator fields */
        spinlock_t alloc_lock ____cacheline_aligned_in_smp;
        u32 sr_head;                    /* shadow ring head */
        unsigned long fill;             /* official alloc count */
        unsigned long alloc_free;       /* copy of free (less cache thrash) */
-       u32 __percpu *buffers_allocated;/* count of buffers allocated */
        u32 fill_wrap;                  /* tracks fill within ring */
+       u32 credits;                    /* number of blocks in context */
+       /* adding a new field here would make it part of this cacheline */
+
        /* releaser fields */
        spinlock_t release_lock ____cacheline_aligned_in_smp;
        u32 sr_tail;                    /* shadow ring tail */
@@ -131,6 +131,7 @@ struct send_context {
        u32 credit_intr_count;          /* count of credit intr users */
        u64 credit_ctrl;                /* cache for credit control */
        wait_queue_head_t halt_wait;    /* wait until kernel sees interrupt */
+       struct work_struct halt_work;   /* halted context work queue entry */
 };
 
 /* send context flags */
index aa777364310738a2e1a80ba434b59f7798ffdc2e..03024cec78dd867b10e5b66cb5d22e3e1b808c41 100644 (file)
@@ -129,8 +129,8 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
                                dest += sizeof(u64);
                        }
 
-                       dest -= pbuf->size;
-                       dend -= pbuf->size;
+                       dest -= pbuf->sc->size;
+                       dend -= pbuf->sc->size;
                }
 
                /* write 8-byte non-SOP, non-wrap chunk data */
@@ -361,8 +361,8 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
                                dest += sizeof(u64);
                        }
 
-                       dest -= pbuf->size;
-                       dend -= pbuf->size;
+                       dest -= pbuf->sc->size;
+                       dend -= pbuf->sc->size;
                }
 
                /* write 8-byte non-SOP, non-wrap chunk data */
@@ -458,8 +458,8 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
                        dest += sizeof(u64);
                }
 
-               dest -= pbuf->size;
-               dend -= pbuf->size;
+               dest -= pbuf->sc->size;
+               dend -= pbuf->sc->size;
        }
 
        /* write 8-byte non-SOP, non-wrap chunk data */
@@ -492,7 +492,7 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
                 */
                /* adjust if we have wrapped */
                if (dest >= pbuf->end)
-                       dest -= pbuf->size;
+                       dest -= pbuf->sc->size;
                /* jump to the SOP range if within the first block */
                else if (pbuf->qw_written < PIO_BLOCK_QWS)
                        dest += SOP_DISTANCE;
@@ -584,8 +584,8 @@ static void mid_copy_straight(struct pio_buf *pbuf,
                        dest += sizeof(u64);
                }
 
-               dest -= pbuf->size;
-               dend -= pbuf->size;
+               dest -= pbuf->sc->size;
+               dend -= pbuf->sc->size;
        }
 
        /* write 8-byte non-SOP, non-wrap chunk data */
@@ -666,7 +666,7 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
                         */
                        /* adjust if we've wrapped */
                        if (dest >= pbuf->end)
-                               dest -= pbuf->size;
+                               dest -= pbuf->sc->size;
                        /* jump to SOP range if within the first block */
                        else if (pbuf->qw_written < PIO_BLOCK_QWS)
                                dest += SOP_DISTANCE;
@@ -719,7 +719,7 @@ void seg_pio_copy_end(struct pio_buf *pbuf)
         */
        /* adjust if we have wrapped */
        if (dest >= pbuf->end)
-               dest -= pbuf->size;
+               dest -= pbuf->sc->size;
        /* jump to the SOP range if within the first block */
        else if (pbuf->qw_written < PIO_BLOCK_QWS)
                dest += SOP_DISTANCE;