Btrfs: delayed-refs: use rb_first_cached for href_root
authorLiu Bo <bo.liu@linux.alibaba.com>
Wed, 22 Aug 2018 19:51:49 +0000 (03:51 +0800)
committerDavid Sterba <dsterba@suse.com>
Mon, 15 Oct 2018 15:23:33 +0000 (17:23 +0200)
rb_first_cached() trades an extra pointer "leftmost" for doing the same
job as rb_first() but in O(1).

Functions manipulating href_root need to get the first entry, this
converts href_root to use rb_first_cached().

This patch is first in the sequenct of similar updates to other rbtrees
and this is analysis of the expected behaviour and improvements.

There's a common pattern:

while (node = rb_first) {
        entry = rb_entry(node)
        next = rb_next(node)
        rb_erase(node)
        cleanup(entry)
}

rb_first needs to traverse the tree up to logN depth, rb_erase can
completely reshuffle the tree. With the caching we'll skip the traversal
in rb_first.  That's a cached memory access vs looped pointer
dereference trade-off that IMHO has a clear winner.

Measurements show there's not much difference in a sample tree with
10000 nodes: 4.5s / rb_first and 4.8s / rb_first_cached. Real effects of
caching and pointer chasing are unpredictable though.

Further optimzations can be done to avoid the expensive rb_erase step.
In some cases it's ok to process the nodes in any order, so the tree can
be traversed in post-order, not rebalancing the children nodes and just
calling free. Care must be taken regarding the next node.

Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com>
Signed-off-by: Liu Bo <bo.liu@linux.alibaba.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ update changelog from mail discussions ]
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/delayed-ref.c
fs/btrfs/delayed-ref.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/transaction.c

index 62ff545ba1f7146536b33e972fdb1e935e84df14..f07952e16a3bf07a07670e8d32200ac7f64cb39c 100644 (file)
@@ -101,14 +101,15 @@ static int comp_refs(struct btrfs_delayed_ref_node *ref1,
 }
 
 /* insert a new ref to head ref rbtree */
-static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
+static struct btrfs_delayed_ref_head *htree_insert(struct rb_root_cached *root,
                                                   struct rb_node *node)
 {
-       struct rb_node **p = &root->rb_node;
+       struct rb_node **p = &root->rb_root.rb_node;
        struct rb_node *parent_node = NULL;
        struct btrfs_delayed_ref_head *entry;
        struct btrfs_delayed_ref_head *ins;
        u64 bytenr;
+       bool leftmost = true;
 
        ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
        bytenr = ins->bytenr;
@@ -117,16 +118,18 @@ static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
                entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
                                 href_node);
 
-               if (bytenr < entry->bytenr)
+               if (bytenr < entry->bytenr) {
                        p = &(*p)->rb_left;
-               else if (bytenr > entry->bytenr)
+               } else if (bytenr > entry->bytenr) {
                        p = &(*p)->rb_right;
-               else
+                       leftmost = false;
+               } else {
                        return entry;
+               }
        }
 
        rb_link_node(node, parent_node, p);
-       rb_insert_color(node, root);
+       rb_insert_color_cached(node, root, leftmost);
        return NULL;
 }
 
@@ -164,10 +167,11 @@ static struct btrfs_delayed_ref_node* tree_insert(struct rb_root *root,
  * If return_bigger is given, the next bigger entry is returned if no exact
  * match is found.
  */
-static struct btrfs_delayed_ref_head *
-find_ref_head(struct rb_root *root, u64 bytenr,
-             int return_bigger)
+static struct btrfs_delayed_ref_head* find_ref_head(
+               struct btrfs_delayed_ref_root *dr, u64 bytenr,
+               int return_bigger)
 {
+       struct rb_root *root = &dr->href_root.rb_root;
        struct rb_node *n;
        struct btrfs_delayed_ref_head *entry;
 
@@ -187,7 +191,7 @@ find_ref_head(struct rb_root *root, u64 bytenr,
                if (bytenr > entry->bytenr) {
                        n = rb_next(&entry->href_node);
                        if (!n)
-                               n = rb_first(root);
+                               n = rb_first_cached(&dr->href_root);
                        entry = rb_entry(n, struct btrfs_delayed_ref_head,
                                         href_node);
                        return entry;
@@ -357,12 +361,12 @@ btrfs_select_ref_head(struct btrfs_trans_handle *trans)
 
 again:
        start = delayed_refs->run_delayed_start;
-       head = find_ref_head(&delayed_refs->href_root, start, 1);
+       head = find_ref_head(delayed_refs, start, 1);
        if (!head && !loop) {
                delayed_refs->run_delayed_start = 0;
                start = 0;
                loop = true;
-               head = find_ref_head(&delayed_refs->href_root, start, 1);
+               head = find_ref_head(delayed_refs, start, 1);
                if (!head)
                        return NULL;
        } else if (!head && loop) {
@@ -903,7 +907,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
 struct btrfs_delayed_ref_head *
 btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
 {
-       return find_ref_head(&delayed_refs->href_root, bytenr, 0);
+       return find_ref_head(delayed_refs, bytenr, 0);
 }
 
 void __cold btrfs_delayed_ref_exit(void)
index d9f2a4ebd5db86d8637d567ce626903bb37c0747..88438b6cee451cac710e72282c7b70e7ee6e7228 100644 (file)
@@ -148,7 +148,7 @@ struct btrfs_delayed_data_ref {
 
 struct btrfs_delayed_ref_root {
        /* head ref rbtree */
-       struct rb_root href_root;
+       struct rb_root_cached href_root;
 
        /* dirty extent records */
        struct rb_root dirty_extent_root;
index 3611df2ce5c12650496cb6e14532311eba3d8a82..a21a9678681384e7d3d32f60b0a76dd7f306c24d 100644 (file)
@@ -4203,7 +4203,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
                return ret;
        }
 
-       while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
+       while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) {
                struct btrfs_delayed_ref_head *head;
                struct rb_node *n;
                bool pin_bytes = false;
@@ -4239,7 +4239,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
                if (head->processing == 0)
                        delayed_refs->num_heads_ready--;
                atomic_dec(&delayed_refs->num_entries);
-               rb_erase(&head->href_node, &delayed_refs->href_root);
+               rb_erase_cached(&head->href_node, &delayed_refs->href_root);
                RB_CLEAR_NODE(&head->href_node);
                spin_unlock(&head->lock);
                spin_unlock(&delayed_refs->lock);
index 68654231821581909efbeafa673f0ace79051a6f..30b3d8561768e4ea29036fe33ca62d8677533320 100644 (file)
@@ -2454,7 +2454,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
                return 1;
        }
        delayed_refs->num_heads--;
-       rb_erase(&head->href_node, &delayed_refs->href_root);
+       rb_erase_cached(&head->href_node, &delayed_refs->href_root);
        RB_CLEAR_NODE(&head->href_node);
        spin_unlock(&head->lock);
        spin_unlock(&delayed_refs->lock);
@@ -2940,7 +2940,7 @@ again:
                        btrfs_create_pending_block_groups(trans);
 
                spin_lock(&delayed_refs->lock);
-               node = rb_first(&delayed_refs->href_root);
+               node = rb_first_cached(&delayed_refs->href_root);
                if (!node) {
                        spin_unlock(&delayed_refs->lock);
                        goto out;
@@ -6929,7 +6929,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
         * at this point we have a head with no other entries.  Go
         * ahead and process it.
         */
-       rb_erase(&head->href_node, &delayed_refs->href_root);
+       rb_erase_cached(&head->href_node, &delayed_refs->href_root);
        RB_CLEAR_NODE(&head->href_node);
        atomic_dec(&delayed_refs->num_entries);
 
index e7856e15adbf4e98f8aa837ea5c9fc0ea433aaf4..3b1cc978d409d19c6caeda873d40123ad872435f 100644 (file)
@@ -44,7 +44,8 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
        WARN_ON(refcount_read(&transaction->use_count) == 0);
        if (refcount_dec_and_test(&transaction->use_count)) {
                BUG_ON(!list_empty(&transaction->list));
-               WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
+               WARN_ON(!RB_EMPTY_ROOT(
+                               &transaction->delayed_refs.href_root.rb_root));
                if (transaction->delayed_refs.pending_csums)
                        btrfs_err(transaction->fs_info,
                                  "pending csums is %llu",
@@ -245,7 +246,7 @@ loop:
 
        memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs));
 
-       cur_trans->delayed_refs.href_root = RB_ROOT;
+       cur_trans->delayed_refs.href_root = RB_ROOT_CACHED;
        cur_trans->delayed_refs.dirty_extent_root = RB_ROOT;
        atomic_set(&cur_trans->delayed_refs.num_entries, 0);