block: annotate refault stalls from IO submission
authorJohannes Weiner <hannes@cmpxchg.org>
Thu, 8 Aug 2019 19:03:00 +0000 (15:03 -0400)
committerJens Axboe <axboe@kernel.dk>
Wed, 14 Aug 2019 14:50:01 +0000 (08:50 -0600)
psi tracks the time tasks wait for refaulting pages to become
uptodate, but it does not track the time spent submitting the IO. The
submission part can be significant if backing storage is contended or
when cgroup throttling (io.latency) is in effect - a lot of time is
spent in submit_bio(). In that case, we underreport memory pressure.

Annotate submit_bio() to account submission time as memory stall when
the bio is reading userspace workingset pages.

Tested-by: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/bio.c
block/blk-core.c
include/linux/blk_types.h

index 24a496f5d2e22652d02a7a52b615037e6be7f9db..54769659a434725c5ff08c229f16fd55bc119de6 100644 (file)
@@ -806,6 +806,9 @@ void __bio_add_page(struct bio *bio, struct page *page,
 
        bio->bi_iter.bi_size += len;
        bio->bi_vcnt++;
+
+       if (!bio_flagged(bio, BIO_WORKINGSET) && unlikely(PageWorkingset(page)))
+               bio_set_flag(bio, BIO_WORKINGSET);
 }
 EXPORT_SYMBOL_GPL(__bio_add_page);
 
index 919629ce4015ec7ad6a66300c04023333f6f5c2e..834aea04718f83dfb5db98c8c0ef18717bb9a67d 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/blk-cgroup.h>
 #include <linux/debugfs.h>
 #include <linux/bpf.h>
+#include <linux/psi.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/block.h>
@@ -1134,6 +1135,10 @@ EXPORT_SYMBOL_GPL(direct_make_request);
  */
 blk_qc_t submit_bio(struct bio *bio)
 {
+       bool workingset_read = false;
+       unsigned long pflags;
+       blk_qc_t ret;
+
        if (blkcg_punt_bio_submit(bio))
                return BLK_QC_T_NONE;
 
@@ -1152,6 +1157,8 @@ blk_qc_t submit_bio(struct bio *bio)
                if (op_is_write(bio_op(bio))) {
                        count_vm_events(PGPGOUT, count);
                } else {
+                       if (bio_flagged(bio, BIO_WORKINGSET))
+                               workingset_read = true;
                        task_io_account_read(bio->bi_iter.bi_size);
                        count_vm_events(PGPGIN, count);
                }
@@ -1166,7 +1173,21 @@ blk_qc_t submit_bio(struct bio *bio)
                }
        }
 
-       return generic_make_request(bio);
+       /*
+        * If we're reading data that is part of the userspace
+        * workingset, count submission time as memory stall. When the
+        * device is congested, or the submitting cgroup IO-throttled,
+        * submission can be a significant part of overall IO time.
+        */
+       if (workingset_read)
+               psi_memstall_enter(&pflags);
+
+       ret = generic_make_request(bio);
+
+       if (workingset_read)
+               psi_memstall_leave(&pflags);
+
+       return ret;
 }
 EXPORT_SYMBOL(submit_bio);
 
index d6ce7b3ec8b13c5282286e2acaba37ad25f230b4..5a1118d4ef7e80049715b5e79fb995cb04aa9606 100644 (file)
@@ -209,6 +209,7 @@ enum {
        BIO_BOUNCED,            /* bio is a bounce bio */
        BIO_USER_MAPPED,        /* contains user pages */
        BIO_NULL_MAPPED,        /* contains invalid user pages */
+       BIO_WORKINGSET,         /* contains userspace workingset pages */
        BIO_QUIET,              /* Make BIO Quiet */
        BIO_CHAIN,              /* chained bio, ->bi_remaining in effect */
        BIO_REFFED,             /* bio has elevated ->bi_cnt */