raid5: make_request does less prepare wait
authorShaohua Li <shli@kernel.org>
Wed, 9 Apr 2014 03:25:47 +0000 (11:25 +0800)
committerNeilBrown <neilb@suse.de>
Wed, 9 Apr 2014 04:42:38 +0000 (14:42 +1000)
In NUMA machine, prepare_to_wait/finish_wait in make_request exposes a
lot of contention for sequential workload (or big request size
workload). For such workload, each bio includes several stripes. So we
can just do prepare_to_wait/finish_wait once for the whold bio instead
of every stripe.  This reduces the lock contention completely for such
workload. Random workload might have the similar lock contention too,
but I didn't see it yet, maybe because my stroage is still not fast
enough.

Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: NeilBrown <neilb@suse.de>
drivers/md/raid5.c

index 16f5c21963db5391ed25fd1e185ab8399f353e74..a904a2c80fc83fc5651fe7f26ed37cd40bb37224 100644 (file)
@@ -4552,6 +4552,8 @@ static void make_request(struct mddev *mddev, struct bio * bi)
        struct stripe_head *sh;
        const int rw = bio_data_dir(bi);
        int remaining;
+       DEFINE_WAIT(w);
+       bool do_prepare;
 
        if (unlikely(bi->bi_rw & REQ_FLUSH)) {
                md_flush_request(mddev, bi);
@@ -4575,15 +4577,18 @@ static void make_request(struct mddev *mddev, struct bio * bi)
        bi->bi_next = NULL;
        bi->bi_phys_segments = 1;       /* over-loaded to count active stripes */
 
+       prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
        for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
-               DEFINE_WAIT(w);
                int previous;
                int seq;
 
+               do_prepare = false;
        retry:
                seq = read_seqcount_begin(&conf->gen_lock);
                previous = 0;
-               prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
+               if (do_prepare)
+                       prepare_to_wait(&conf->wait_for_overlap, &w,
+                               TASK_UNINTERRUPTIBLE);
                if (unlikely(conf->reshape_progress != MaxSector)) {
                        /* spinlock is needed as reshape_progress may be
                         * 64bit on a 32bit platform, and so it might be
@@ -4604,6 +4609,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                                    : logical_sector >= conf->reshape_safe) {
                                        spin_unlock_irq(&conf->device_lock);
                                        schedule();
+                                       do_prepare = true;
                                        goto retry;
                                }
                        }
@@ -4640,6 +4646,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                                if (must_retry) {
                                        release_stripe(sh);
                                        schedule();
+                                       do_prepare = true;
                                        goto retry;
                                }
                        }
@@ -4663,8 +4670,10 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                                prepare_to_wait(&conf->wait_for_overlap,
                                                &w, TASK_INTERRUPTIBLE);
                                if (logical_sector >= mddev->suspend_lo &&
-                                   logical_sector < mddev->suspend_hi)
+                                   logical_sector < mddev->suspend_hi) {
                                        schedule();
+                                       do_prepare = true;
+                               }
                                goto retry;
                        }
 
@@ -4677,9 +4686,9 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                                md_wakeup_thread(mddev->thread);
                                release_stripe(sh);
                                schedule();
+                               do_prepare = true;
                                goto retry;
                        }
-                       finish_wait(&conf->wait_for_overlap, &w);
                        set_bit(STRIPE_HANDLE, &sh->state);
                        clear_bit(STRIPE_DELAYED, &sh->state);
                        if ((bi->bi_rw & REQ_SYNC) &&
@@ -4689,10 +4698,10 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                } else {
                        /* cannot get stripe for read-ahead, just give-up */
                        clear_bit(BIO_UPTODATE, &bi->bi_flags);
-                       finish_wait(&conf->wait_for_overlap, &w);
                        break;
                }
        }
+       finish_wait(&conf->wait_for_overlap, &w);
 
        remaining = raid5_dec_bi_active_stripes(bi);
        if (remaining == 0) {