Btrfs: Avoid superfluous tree-log writeout
authorYan, Zheng <zheng.yan@oracle.com>
Thu, 12 Nov 2009 09:33:26 +0000 (09:33 +0000)
committerChris Mason <chris.mason@oracle.com>
Wed, 16 Dec 2009 02:24:25 +0000 (21:24 -0500)
We allow two log transactions at a time, but use same flag
to mark dirty tree-log btree blocks. So we may flush dirty
blocks belonging to newer log transaction when committing a
log transaction. This patch fixes the issue by using two
flags to mark dirty tree-log btree blocks.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h
fs/btrfs/tree-log.c

index 02b6afbd745020fb51bf43d88c93025459687e12..101940fab9b312991d900a62c9cbd67faf1c9213 100644 (file)
@@ -980,12 +980,12 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
 
        while (1) {
                ret = find_first_extent_bit(&log_root_tree->dirty_log_pages,
-                                   0, &start, &end, EXTENT_DIRTY);
+                               0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
                if (ret)
                        break;
 
-               clear_extent_dirty(&log_root_tree->dirty_log_pages,
-                                  start, end, GFP_NOFS);
+               clear_extent_bits(&log_root_tree->dirty_log_pages, start, end,
+                                 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
        }
        eb = fs_info->log_root_tree->node;
 
index 94627c4cc193343aacc82b3aca1a6e9eaaefd8dd..4a86508ce473645e88877efb49e69a4c183e5cd1 100644 (file)
@@ -4919,8 +4919,16 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
        btrfs_set_buffer_uptodate(buf);
 
        if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
-               set_extent_dirty(&root->dirty_log_pages, buf->start,
-                        buf->start + buf->len - 1, GFP_NOFS);
+               /*
+                * we allow two log transactions at a time, use different
+                * EXENT bit to differentiate dirty pages.
+                */
+               if (root->log_transid % 2 == 0)
+                       set_extent_dirty(&root->dirty_log_pages, buf->start,
+                                       buf->start + buf->len - 1, GFP_NOFS);
+               else
+                       set_extent_new(&root->dirty_log_pages, buf->start,
+                                       buf->start + buf->len - 1, GFP_NOFS);
        } else {
                set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
                         buf->start + buf->len - 1, GFP_NOFS);
index c207e8c32c9bfc5212e111edb440adf4b59f2059..b7b22c344b662d61bc4859874363a1af127c8d10 100644 (file)
@@ -354,7 +354,7 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
  * those extents are sent to disk but does not wait on them
  */
 int btrfs_write_marked_extents(struct btrfs_root *root,
-                              struct extent_io_tree *dirty_pages)
+                              struct extent_io_tree *dirty_pages, int mark)
 {
        int ret;
        int err = 0;
@@ -367,7 +367,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
 
        while (1) {
                ret = find_first_extent_bit(dirty_pages, start, &start, &end,
-                                           EXTENT_DIRTY);
+                                           mark);
                if (ret)
                        break;
                while (start <= end) {
@@ -413,7 +413,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
  * on all the pages and clear them from the dirty pages state tree
  */
 int btrfs_wait_marked_extents(struct btrfs_root *root,
-                             struct extent_io_tree *dirty_pages)
+                             struct extent_io_tree *dirty_pages, int mark)
 {
        int ret;
        int err = 0;
@@ -425,12 +425,12 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
        unsigned long index;
 
        while (1) {
-               ret = find_first_extent_bit(dirty_pages, 0, &start, &end,
-                                           EXTENT_DIRTY);
+               ret = find_first_extent_bit(dirty_pages, start, &start, &end,
+                                           mark);
                if (ret)
                        break;
 
-               clear_extent_dirty(dirty_pages, start, end, GFP_NOFS);
+               clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
                while (start <= end) {
                        index = start >> PAGE_CACHE_SHIFT;
                        start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
@@ -460,13 +460,13 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
  * those extents are on disk for transaction or log commit
  */
 int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
-                                       struct extent_io_tree *dirty_pages)
+                               struct extent_io_tree *dirty_pages, int mark)
 {
        int ret;
        int ret2;
 
-       ret = btrfs_write_marked_extents(root, dirty_pages);
-       ret2 = btrfs_wait_marked_extents(root, dirty_pages);
+       ret = btrfs_write_marked_extents(root, dirty_pages, mark);
+       ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
        return ret || ret2;
 }
 
@@ -479,7 +479,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
                return filemap_write_and_wait(btree_inode->i_mapping);
        }
        return btrfs_write_and_wait_marked_extents(root,
-                                          &trans->transaction->dirty_pages);
+                                          &trans->transaction->dirty_pages,
+                                          EXTENT_DIRTY);
 }
 
 /*
index d4e3e7a6938cddebb56e05ae4131b7055f08be05..93c7ccb33118f38d420c00c3b790fe0040b7f0ca 100644 (file)
@@ -107,10 +107,10 @@ void btrfs_throttle(struct btrfs_root *root);
 int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root);
 int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
-                                       struct extent_io_tree *dirty_pages);
+                               struct extent_io_tree *dirty_pages, int mark);
 int btrfs_write_marked_extents(struct btrfs_root *root,
-                                       struct extent_io_tree *dirty_pages);
+                               struct extent_io_tree *dirty_pages, int mark);
 int btrfs_wait_marked_extents(struct btrfs_root *root,
-                                       struct extent_io_tree *dirty_pages);
+                               struct extent_io_tree *dirty_pages, int mark);
 int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
 #endif
index 741666a7676a80a6553f5b22a37a41dd31971d3e..31da0002e78bdb9c5811aa014308878df2e38a35 100644 (file)
@@ -1977,10 +1977,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 {
        int index1;
        int index2;
+       int mark;
        int ret;
        struct btrfs_root *log = root->log_root;
        struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
-       u64 log_transid = 0;
+       unsigned long log_transid = 0;
 
        mutex_lock(&root->log_mutex);
        index1 = root->log_transid % 2;
@@ -2014,24 +2015,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
                goto out;
        }
 
+       log_transid = root->log_transid;
+       if (log_transid % 2 == 0)
+               mark = EXTENT_DIRTY;
+       else
+               mark = EXTENT_NEW;
+
        /* we start IO on  all the marked extents here, but we don't actually
         * wait for them until later.
         */
-       ret = btrfs_write_marked_extents(log, &log->dirty_log_pages);
+       ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
        BUG_ON(ret);
 
        btrfs_set_root_node(&log->root_item, log->node);
 
        root->log_batch = 0;
-       log_transid = root->log_transid;
        root->log_transid++;
        log->log_transid = root->log_transid;
        root->log_start_pid = 0;
        smp_mb();
        /*
-        * log tree has been flushed to disk, new modifications of
-        * the log will be written to new positions. so it's safe to
-        * allow log writers to go in.
+        * IO has been started, blocks of the log tree have WRITTEN flag set
+        * in their headers. new modifications of the log will be written to
+        * new positions. so it's safe to allow log writers to go in.
         */
        mutex_unlock(&root->log_mutex);
 
@@ -2052,7 +2058,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 
        index2 = log_root_tree->log_transid % 2;
        if (atomic_read(&log_root_tree->log_commit[index2])) {
-               btrfs_wait_marked_extents(log, &log->dirty_log_pages);
+               btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
                wait_log_commit(trans, log_root_tree,
                                log_root_tree->log_transid);
                mutex_unlock(&log_root_tree->log_mutex);
@@ -2072,16 +2078,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
         * check the full commit flag again
         */
        if (root->fs_info->last_trans_log_full_commit == trans->transid) {
-               btrfs_wait_marked_extents(log, &log->dirty_log_pages);
+               btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
                mutex_unlock(&log_root_tree->log_mutex);
                ret = -EAGAIN;
                goto out_wake_log_root;
        }
 
        ret = btrfs_write_and_wait_marked_extents(log_root_tree,
-                               &log_root_tree->dirty_log_pages);
+                               &log_root_tree->dirty_log_pages,
+                               EXTENT_DIRTY | EXTENT_NEW);
        BUG_ON(ret);
-       btrfs_wait_marked_extents(log, &log->dirty_log_pages);
+       btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
 
        btrfs_set_super_log_root(&root->fs_info->super_for_commit,
                                log_root_tree->node->start);
@@ -2147,12 +2154,12 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
 
        while (1) {
                ret = find_first_extent_bit(&log->dirty_log_pages,
-                                   0, &start, &end, EXTENT_DIRTY);
+                               0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
                if (ret)
                        break;
 
-               clear_extent_dirty(&log->dirty_log_pages,
-                                  start, end, GFP_NOFS);
+               clear_extent_bits(&log->dirty_log_pages, start, end,
+                                 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
        }
 
        if (log->log_transid > 0) {