jbd2: trace when lock_buffer in do_get_write_access takes a long time
authorTheodore Ts'o <tytso@mit.edu>
Sun, 21 Apr 2013 20:47:54 +0000 (16:47 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Sun, 21 Apr 2013 20:47:54 +0000 (16:47 -0400)
While investigating interactivity problems it was clear that processes
sometimes stall for long periods of times if an attempt is made to
lock a buffer which is undergoing writeback.  It would stall in
a trace looking something like

[<ffffffff811a39de>] __lock_buffer+0x2e/0x30
[<ffffffff8123a60f>] do_get_write_access+0x43f/0x4b0
[<ffffffff8123a7cb>] jbd2_journal_get_write_access+0x2b/0x50
[<ffffffff81220f79>] __ext4_journal_get_write_access+0x39/0x80
[<ffffffff811f3198>] ext4_reserve_inode_write+0x78/0xa0
[<ffffffff811f3209>] ext4_mark_inode_dirty+0x49/0x220
[<ffffffff811f57d1>] ext4_dirty_inode+0x41/0x60
[<ffffffff8119ac3e>] __mark_inode_dirty+0x4e/0x2d0
[<ffffffff8118b9b9>] update_time+0x79/0xc0
[<ffffffff8118ba98>] file_update_time+0x98/0x100
[<ffffffff81110ffc>] __generic_file_aio_write+0x17c/0x3b0
[<ffffffff811112aa>] generic_file_aio_write+0x7a/0xf0
[<ffffffff811ea853>] ext4_file_write+0x83/0xd0
[<ffffffff81172b23>] do_sync_write+0xa3/0xe0
[<ffffffff811731ae>] vfs_write+0xae/0x180
[<ffffffff8117361d>] sys_write+0x4d/0x90
[<ffffffff8159d62d>] system_call_fastpath+0x1a/0x1f
[<ffffffffffffffff>] 0xffffffffffffffff

Signed-off-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
fs/jbd2/transaction.c
include/trace/events/jbd2.h

index a1920da22802d4f5a758e61fce58df657e1d01ed..10f524c59ea88d48bf4f85f42e6fb2eca0d7a55b 100644 (file)
@@ -639,6 +639,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
        int error;
        char *frozen_buffer = NULL;
        int need_copy = 0;
+       unsigned long start_lock, time_lock;
 
        if (is_handle_aborted(handle))
                return -EROFS;
@@ -654,9 +655,16 @@ repeat:
 
        /* @@@ Need to check for errors here at some point. */
 
+       start_lock = jiffies;
        lock_buffer(bh);
        jbd_lock_bh_state(bh);
 
+       /* If it takes too long to lock the buffer, trace it */
+       time_lock = jbd2_time_diff(start_lock, jiffies);
+       if (time_lock > HZ/10)
+               trace_jbd2_lock_buffer_stall(bh->b_bdev->bd_dev,
+                       jiffies_to_msecs(time_lock));
+
        /* We now hold the buffer lock so it is safe to query the buffer
         * state.  Is the buffer dirty?
         *
index 070df49e4a1df18aa6ddc0cb1e8b87b0b62c4130..c1d1f3eb242d2738456af4e1bf1443b057030d6c 100644 (file)
@@ -358,6 +358,27 @@ TRACE_EVENT(jbd2_write_superblock,
                  MINOR(__entry->dev), __entry->write_op)
 );
 
+TRACE_EVENT(jbd2_lock_buffer_stall,
+
+       TP_PROTO(dev_t dev, unsigned long stall_ms),
+
+       TP_ARGS(dev, stall_ms),
+
+       TP_STRUCT__entry(
+               __field(        dev_t, dev      )
+               __field(unsigned long, stall_ms )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = dev;
+               __entry->stall_ms       = stall_ms;
+       ),
+
+       TP_printk("dev %d,%d stall_ms %lu",
+               MAJOR(__entry->dev), MINOR(__entry->dev),
+               __entry->stall_ms)
+);
+
 #endif /* _TRACE_JBD2_H */
 
 /* This part must be outside protection */