GRU Driver V3: fixes to resolve code review comments

author Jack Steiner <steiner@sgi.com>

Wed, 30 Jul 2008 05:34:02 +0000 (22:34 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 30 Jul 2008 16:41:48 +0000 (09:41 -0700)
author Jack Steiner <steiner@sgi.com>
Wed, 30 Jul 2008 05:34:02 +0000 (22:34 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 30 Jul 2008 16:41:48 +0000 (09:41 -0700)
diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h

index 3159b261c5a74dc138df9340052a0f12b350bd08..0dc36225c7c6c2f76b909cec45342d4ede4b9a80 100644 (file)
--- a/drivers/misc/sgi-gru/gru_instructions.h
+++ b/drivers/misc/sgi-gru/gru_instructions.h
@@ -284,16 +284,6 @@ __opword(unsigned char opcode, unsigned char exopc, unsigned char xtype,
            (exopc << GRU_CB_EXOPC_SHFT);
  }
  
-/*
- * Prefetch a cacheline. Fetch is unconditional. Must page fault if
- * no valid TLB entry is found.
- *     ??? should I use actual "load" or hardware prefetch???
- */
-static inline void gru_prefetch(void *p)
-{
-       *(volatile char *)p;
-}
-
  /*
   * Architecture specific intrinsics
   */
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c

index 09c9c65ff9d183ea4c880b140a2dd7d8a820f1ab..23c91f5f6b61c5425d1653390fdca05c1378db03 100644 (file)
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -112,6 +112,10 @@ static int gru_file_mmap(struct file *file, struct vm_area_struct *vma)
         if ((vma->vm_flags & (VM_SHARED | VM_WRITE)) != (VM_SHARED | VM_WRITE))
                 return -EPERM;
  
+       if (vma->vm_start & (GRU_GSEG_PAGESIZE - 1) ||
+                       vma->vm_end & (GRU_GSEG_PAGESIZE - 1))
+               return -EINVAL;
+
         vma->vm_flags |=
             (VM_IO | VM_DONTCOPY | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP |
                         VM_RESERVED);
@@ -471,8 +475,8 @@ struct vm_operations_struct gru_vm_ops = {
  module_init(gru_init);
  module_exit(gru_exit);
  
-module_param(options, ulong, 0644);
-MODULE_PARM_DESC(options, "Various debug options");
+module_param(gru_options, ulong, 0644);
+MODULE_PARM_DESC(gru_options, "Various debug options");
  
  MODULE_AUTHOR("Silicon Graphics, Inc.");
  MODULE_LICENSE("GPL");
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c

index 234d165fb11e3727f43e4fd378e8d5f6129f7413..dfd49af0fe18220d41a4afc438fc0b74c3439a6f 100644 (file)
--- a/drivers/misc/sgi-gru/grukservices.c
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -638,11 +638,11 @@ int gru_kservices_init(struct gru_state *gru)
         cpus_possible = uv_blade_nr_possible_cpus(gru->gs_blade_id);
  
         num = GRU_NUM_KERNEL_CBR * cpus_possible;
-       cbr_map = reserve_gru_cb_resources(gru, GRU_CB_COUNT_TO_AU(num), NULL);
+       cbr_map = gru_reserve_cb_resources(gru, GRU_CB_COUNT_TO_AU(num), NULL);
         gru->gs_reserved_cbrs += num;
  
         num = GRU_NUM_KERNEL_DSR_BYTES * cpus_possible;
-       dsr_map = reserve_gru_ds_resources(gru, GRU_DS_BYTES_TO_AU(num), NULL);
+       dsr_map = gru_reserve_ds_resources(gru, GRU_DS_BYTES_TO_AU(num), NULL);
         gru->gs_reserved_dsr_bytes += num;
  
         gru->gs_active_contexts++;
@@ -673,7 +673,7 @@ int gru_kservices_init(struct gru_state *gru)
         }
         unlock_cch_handle(cch);
  
-       if (options & GRU_QUICKLOOK)
+       if (gru_options & GRU_QUICKLOOK)
                 quicktest(gru);
         return 0;
  }
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c

index aef6822cb80e51260833c6848be903cedc955f25..0eeb8dddd2f51396a85d726a8286f09b68348580 100644 (file)
--- a/drivers/misc/sgi-gru/grumain.c
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -22,7 +22,7 @@
  #include "grutables.h"
  #include "gruhandles.h"
  
-unsigned long options __read_mostly;
+unsigned long gru_options __read_mostly;
  
  static struct device_driver gru_driver = {
         .name = "gru"
@@ -163,14 +163,14 @@ static unsigned long reserve_resources(unsigned long *p, int n, int mmax,
         return bits;
  }
  
-unsigned long reserve_gru_cb_resources(struct gru_state *gru, int cbr_au_count,
+unsigned long gru_reserve_cb_resources(struct gru_state *gru, int cbr_au_count,
                                        char *cbmap)
  {
         return reserve_resources(&gru->gs_cbr_map, cbr_au_count, GRU_CBR_AU,
                                  cbmap);
  }
  
-unsigned long reserve_gru_ds_resources(struct gru_state *gru, int dsr_au_count,
+unsigned long gru_reserve_ds_resources(struct gru_state *gru, int dsr_au_count,
                                        char *dsmap)
  {
         return reserve_resources(&gru->gs_dsr_map, dsr_au_count, GRU_DSR_AU,
@@ -182,10 +182,10 @@ static void reserve_gru_resources(struct gru_state *gru,
  {
         gru->gs_active_contexts++;
         gts->ts_cbr_map =
-           reserve_gru_cb_resources(gru, gts->ts_cbr_au_count,
+           gru_reserve_cb_resources(gru, gts->ts_cbr_au_count,
                                      gts->ts_cbr_idx);
         gts->ts_dsr_map =
-           reserve_gru_ds_resources(gru, gts->ts_dsr_au_count, NULL);
+           gru_reserve_ds_resources(gru, gts->ts_dsr_au_count, NULL);
  }
  
  static void free_gru_resources(struct gru_state *gru,
@@ -416,6 +416,7 @@ static void gru_free_gru_context(struct gru_thread_state *gts)
  
  /*
   * Prefetching cachelines help hardware performance.
+ * (Strictly a performance enhancement. Not functionally required).
   */
  static void prefetch_data(void *p, int num, int stride)
  {
@@ -746,6 +747,8 @@ again:
   * gru_nopage
   *
   * Map the user's GRU segment
+ *
+ *     Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries.
   */
  int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
  {
@@ -757,6 +760,7 @@ int gru_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
                 vma, vaddr, GSEG_BASE(vaddr));
         STAT(nopfn);
  
+       /* The following check ensures vaddr is a valid address in the VMA */
         gts = gru_find_thread_state(vma, TSID(vaddr, vma));
         if (!gts)
                 return VM_FAULT_SIGBUS;
@@ -775,7 +779,7 @@ again:
         }
  
         if (!gts->ts_gru) {
-               while (!gru_assign_gru_context(gts)) {
+               if (!gru_assign_gru_context(gts)) {
                         mutex_unlock(&gts->ts_ctxlock);
                         preempt_enable();
                         schedule_timeout(GRU_ASSIGN_DELAY);  /* true hack ZZZ */
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c

index bdb1ad83bbfb329fe78ba28a4c97b8f93c08c91f..533923f83f1aa061635571b86934ad2715014be7 100644 (file)
--- a/drivers/misc/sgi-gru/gruprocfs.c
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -122,7 +122,7 @@ static ssize_t statistics_write(struct file *file, const char __user *userbuf,
  
  static int options_show(struct seq_file *s, void *p)
  {
-       seq_printf(s, "0x%lx\n", options);
+       seq_printf(s, "0x%lx\n", gru_options);
         return 0;
  }
  
@@ -136,7 +136,7 @@ static ssize_t options_write(struct file *file, const char __user *userbuf,
             (buf, userbuf, count < sizeof(buf) ? count : sizeof(buf)))
                 return -EFAULT;
         if (!strict_strtoul(buf, 10, &val))
-               options = val;
+               gru_options = val;
  
         return count;
  }
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h

index f97d84640129fe0bf825ecbe4056805ba66a38f2..4251018f70ff0e63f1cc98650a32bef2c101318d 100644 (file)
--- a/drivers/misc/sgi-gru/grutables.h
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -24,6 +24,70 @@
  #define __GRUTABLES_H__
  
  /*
+ * GRU Chiplet:
+ *   The GRU is a user addressible memory accelerator. It provides
+ *   several forms of load, store, memset, bcopy instructions. In addition, it
+ *   contains special instructions for AMOs, sending messages to message
+ *   queues, etc.
+ *
+ *   The GRU is an integral part of the node controller. It connects
+ *   directly to the cpu socket. In its current implementation, there are 2
+ *   GRU chiplets in the node controller on each blade (~node).
+ *
+ *   The entire GRU memory space is fully coherent and cacheable by the cpus.
+ *
+ *   Each GRU chiplet has a physical memory map that looks like the following:
+ *
+ *     +-----------------+
+ *     |/////////////////|
+ *     |/////////////////|
+ *     |/////////////////|
+ *     |/////////////////|
+ *     |/////////////////|
+ *     |/////////////////|
+ *     |/////////////////|
+ *     |/////////////////|
+ *     +-----------------+
+ *     |  system control |
+ *     +-----------------+        _______ +-------------+
+ *     |/////////////////|       /        |             |
+ *     |/////////////////|      /         |             |
+ *     |/////////////////|     /          | instructions|
+ *     |/////////////////|    /           |             |
+ *     |/////////////////|   /            |             |
+ *     |/////////////////|  /             |-------------|
+ *     |/////////////////| /              |             |
+ *     +-----------------+                |             |
+ *     |   context 15    |                |  data       |
+ *     +-----------------+                |             |
+ *     |    ......       | \              |             |
+ *     +-----------------+  \____________ +-------------+
+ *     |   context 1     |
+ *     +-----------------+
+ *     |   context 0     |
+ *     +-----------------+
+ *
+ *   Each of the "contexts" is a chunk of memory that can be mmaped into user
+ *   space. The context consists of 2 parts:
+ *
+ *     - an instruction space that can be directly accessed by the user
+ *       to issue GRU instructions and to check instruction status.
+ *
+ *     - a data area that acts as normal RAM.
+ *
+ *   User instructions contain virtual addresses of data to be accessed by the
+ *   GRU. The GRU contains a TLB that is used to convert these user virtual
+ *   addresses to physical addresses.
+ *
+ *   The "system control" area of the GRU chiplet is used by the kernel driver
+ *   to manage user contexts and to perform functions such as TLB dropin and
+ *   purging.
+ *
+ *   One context may be reserved for the kernel and used for cross-partition
+ *   communication. The GRU will also be used to asynchronously zero out
+ *   large blocks of memory (not currently implemented).
+ *
+ *
   * Tables:
   *
   *     VDATA-VMA Data          - Holds a few parameters. Head of linked list of
@@ -190,14 +254,14 @@ struct gru_stats_s {
  #define GRU_STEAL_DELAY                ((HZ * 200) / 1000)
  
  #define STAT(id)       do {                                            \
-                               if (options & OPT_STATS)                \
+                               if (gru_options & OPT_STATS)            \
                                         atomic_long_inc(&gru_stats.id); \
                         } while (0)
  
  #ifdef CONFIG_SGI_GRU_DEBUG
  #define gru_dbg(dev, fmt, x...)                                                \
         do {                                                            \
-               if (options & OPT_DPRINT)                               \
+               if (gru_options & OPT_DPRINT)                           \
                         dev_dbg(dev, "%s: " fmt, __func__, x);          \
         } while (0)
  #else
@@ -529,9 +593,9 @@ extern void gru_flush_all_tlb(struct gru_state *gru);
  extern int gru_proc_init(void);
  extern void gru_proc_exit(void);
  
-extern unsigned long reserve_gru_cb_resources(struct gru_state *gru,
+extern unsigned long gru_reserve_cb_resources(struct gru_state *gru,
                 int cbr_au_count, char *cbmap);
-extern unsigned long reserve_gru_ds_resources(struct gru_state *gru,
+extern unsigned long gru_reserve_ds_resources(struct gru_state *gru,
                 int dsr_au_count, char *dsmap);
  extern int gru_fault(struct vm_area_struct *, struct vm_fault *vmf);
  extern struct gru_mm_struct *gru_register_mmu_notifier(void);
@@ -540,6 +604,6 @@ extern void gru_drop_mmu_notifier(struct gru_mm_struct *gms);
  extern void gru_flush_tlb_range(struct gru_mm_struct *gms, unsigned long start,
                                         unsigned long len);
  
-extern unsigned long options;
+extern unsigned long gru_options;
  
  #endif /* __GRUTABLES_H__ */
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c

index bb6b0e64e101f2b360d8b3463d9813f3e6901f27..bcfd5425e2e6416ec1b72397c61ea890a7b3eed6 100644 (file)
--- a/drivers/misc/sgi-gru/grutlbpurge.c
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -242,7 +242,9 @@ static void gru_invalidate_range_end(struct mmu_notifier *mn,
         struct gru_mm_struct *gms = container_of(mn, struct gru_mm_struct,
                                                  ms_notifier);
  
-       atomic_dec(&gms->ms_range_active);
+       /* ..._and_test() provides needed barrier */
+       (void)atomic_dec_and_test(&gms->ms_range_active);
+
         wake_up_all(&gms->ms_wait_queue);
         gru_dbg(grudev, "gms %p, start 0x%lx, end 0x%lx\n", gms, start, end);
  }
author	Jack Steiner <steiner@sgi.com>
	Wed, 30 Jul 2008 05:34:02 +0000 (22:34 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 30 Jul 2008 16:41:48 +0000 (09:41 -0700)
drivers/misc/sgi-gru/gru_instructions.h		patch \| blob \| history
drivers/misc/sgi-gru/grufile.c		patch \| blob \| history
drivers/misc/sgi-gru/grukservices.c		patch \| blob \| history
drivers/misc/sgi-gru/grumain.c		patch \| blob \| history
drivers/misc/sgi-gru/gruprocfs.c		patch \| blob \| history
drivers/misc/sgi-gru/grutables.h		patch \| blob \| history
drivers/misc/sgi-gru/grutlbpurge.c		patch \| blob \| history