drm/i915: Make for_each_engine_masked() more compact and quicker
authorChris Wilson <chris@chris-wilson.co.uk>
Sat, 27 Aug 2016 07:54:01 +0000 (08:54 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Sat, 27 Aug 2016 08:42:53 +0000 (09:42 +0100)
Rather than walk the full array of engines checking whether each is in
the mask in turn, we can use the mask to jump to the right engines. This
should quicker for a sparse array of engines or mask, whilst generating
smaller code:

   text    data     bss     dec     hex filename
1251010    4579     800 1256389  132bc5 drivers/gpu/drm/i915/i915.ko
1250530    4579     800 1255909  1329e5 drivers/gpu/drm/i915/i915.ko

The downside is that we have to pass in a temporary, alas no C99
iterators yet.

[P.S. Joonas doesn't like having to pass extra temporaries into the
macro, and even less that I called them tmp. As yet, we haven't found a
macro that avoids passing in a temporary that is smaller. We probably
will get C99 iterators first!]

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20160827075401.16470-2-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem_request.c
drivers/gpu/drm/i915/i915_guc_submission.c
drivers/gpu/drm/i915/i915_irq.c
drivers/gpu/drm/i915/intel_uncore.c

index 9fdaf23336df0a792520ca7560e7bac0865b67a2..77a5478043e751a1af9cdc67006e78920656e964 100644 (file)
@@ -2093,13 +2093,16 @@ static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc)
                for_each_if (((id__) = (engine__)->id, \
                              intel_engine_initialized(engine__)))
 
+#define __mask_next_bit(mask) ({                                       \
+       int __idx = ffs(mask) - 1;                                      \
+       mask &= ~BIT(__idx);                                            \
+       __idx;                                                          \
+})
+
 /* Iterator over subset of engines selected by mask */
-#define for_each_engine_masked(engine__, dev_priv__, mask__) \
-       for ((engine__) = &(dev_priv__)->engine[0]; \
-            (engine__) < &(dev_priv__)->engine[I915_NUM_ENGINES]; \
-            (engine__)++) \
-               for_each_if (((mask__) & intel_engine_flag(engine__)) && \
-                            intel_engine_initialized(engine__))
+#define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \
+       for (tmp__ = mask__ & INTEL_INFO(dev_priv__)->ring_mask;        \
+            tmp__ ? (engine__ = &(dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : 0; )
 
 enum hdmi_force_audio {
        HDMI_AUDIO_OFF_DVI = -2,        /* no aux data for HDMI-DVI converter */
index 5e55270bb2dee0aee1638554d6709e3e1abb4e9d..9cc08a1e43c63f31ce7c62e9de5e06b6189d33c1 100644 (file)
@@ -766,6 +766,7 @@ static bool engine_retire_requests(struct intel_engine_cs *engine)
 void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
 {
        struct intel_engine_cs *engine;
+       unsigned int tmp;
 
        lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
@@ -774,7 +775,7 @@ void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
 
        GEM_BUG_ON(!dev_priv->gt.awake);
 
-       for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines)
+       for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines, tmp)
                if (engine_retire_requests(engine))
                        dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
 
index e4369411f07dca28a264300efadd33d2a6e97eca..32699a744aca2c11c5c346c346cc6783289c6efb 100644 (file)
@@ -330,6 +330,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
        struct i915_gem_context *ctx = client->owner;
        struct guc_context_desc desc;
        struct sg_table *sg;
+       unsigned int tmp;
        u32 gfx_addr;
 
        memset(&desc, 0, sizeof(desc));
@@ -339,7 +340,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
        desc.priority = client->priority;
        desc.db_id = client->doorbell_id;
 
-       for_each_engine_masked(engine, dev_priv, client->engines) {
+       for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
                struct intel_context *ce = &ctx->engine[engine->id];
                uint32_t guc_engine_id = engine->guc_id;
                struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id];
index 7610eca4f6878b0f0cac26910c3bfcde944a75a1..82358d4e0cc2aa8e6b336fda7cb312985b7851a4 100644 (file)
@@ -3169,6 +3169,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 
        if (hung) {
                char msg[80];
+               unsigned int tmp;
                int len;
 
                /* If some rings hung but others were still busy, only
@@ -3178,7 +3179,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
                        hung &= ~stuck;
                len = scnprintf(msg, sizeof(msg),
                                "%s on ", stuck == hung ? "No progress" : "Hang");
-               for_each_engine_masked(engine, dev_priv, hung)
+               for_each_engine_masked(engine, dev_priv, hung, tmp)
                        len += scnprintf(msg + len, sizeof(msg) - len,
                                         "%s, ", engine->name);
                msg[len-2] = '\0';
index 43f833901b8e36780595d96b13a24a68837cd904..e9f68cd56e3211f95ab37aded3eafa8b4cf49134 100644 (file)
@@ -1597,8 +1597,10 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv,
        if (engine_mask == ALL_ENGINES) {
                hw_mask = GEN6_GRDOM_FULL;
        } else {
+               unsigned int tmp;
+
                hw_mask = 0;
-               for_each_engine_masked(engine, dev_priv, engine_mask)
+               for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
                        hw_mask |= hw_engine_mask[engine->id];
        }
 
@@ -1714,15 +1716,16 @@ static int gen8_reset_engines(struct drm_i915_private *dev_priv,
                              unsigned engine_mask)
 {
        struct intel_engine_cs *engine;
+       unsigned int tmp;
 
-       for_each_engine_masked(engine, dev_priv, engine_mask)
+       for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
                if (gen8_request_engine_reset(engine))
                        goto not_ready;
 
        return gen6_reset_engines(dev_priv, engine_mask);
 
 not_ready:
-       for_each_engine_masked(engine, dev_priv, engine_mask)
+       for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
                gen8_unrequest_engine_reset(engine);
 
        return -EIO;