Btrfs: fix transaction throttling for delayed refs
authorJosef Bacik <jbacik@fusionio.com>
Wed, 12 Jun 2013 17:56:06 +0000 (13:56 -0400)
committerJosef Bacik <jbacik@fusionio.com>
Mon, 1 Jul 2013 12:52:28 +0000 (08:52 -0400)
Dave has this fs_mark script that can make btrfs abort with sufficient amount of
ram.  This is because with more ram we can keep more dirty metadata in cache
which in a round about way makes for many more pending delayed refs.  What
happens is we end up not throttling the transaction enough so when we go to
commit the transaction when we've completely filled the file system we'll
abort() because we use all of the space in the global reserve and we still have
delayed refs to run.  To fix this we need to make the delayed ref flushing and
the transaction throttling dependant upon the number of delayed refs that we
have instead of how much reserved space is left in the global reserve.  With
this patch we not only stop aborting transactions but we also get a smoother run
speed with fs_mark and it makes us about 10% faster.  Thanks,

Reported-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/transaction.c

index 0049fe0f3f74781693dd9bfe48ec7be95e67a909..76e4983b39ead5e5cfde8e30ae90000b9a54cd1b 100644 (file)
@@ -3056,6 +3056,8 @@ static inline u64 btrfs_calc_trunc_metadata_size(struct btrfs_root *root,
                num_items;
 }
 
+int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
+                                      struct btrfs_root *root);
 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root, unsigned long count);
index ca1893edda523a6dc765a1818bb22c6570bb7b9d..6d5c5f73ad6432b8755bb419f66ad3c06780c98e 100644 (file)
@@ -2526,6 +2526,51 @@ static int refs_newer(struct btrfs_delayed_ref_root *delayed_refs, int seq,
        return 0;
 }
 
+static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
+{
+       u64 num_bytes;
+
+       num_bytes = heads * (sizeof(struct btrfs_extent_item) +
+                            sizeof(struct btrfs_extent_inline_ref));
+       if (!btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
+               num_bytes += heads * sizeof(struct btrfs_tree_block_info);
+
+       /*
+        * We don't ever fill up leaves all the way so multiply by 2 just to be
+        * closer to what we're really going to want to ouse.
+        */
+       return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
+}
+
+int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
+                                      struct btrfs_root *root)
+{
+       struct btrfs_block_rsv *global_rsv;
+       u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
+       u64 num_bytes;
+       int ret = 0;
+
+       num_bytes = btrfs_calc_trans_metadata_size(root, 1);
+       num_heads = heads_to_leaves(root, num_heads);
+       if (num_heads > 1)
+               num_bytes += (num_heads - 1) * root->leafsize;
+       num_bytes <<= 1;
+       global_rsv = &root->fs_info->global_block_rsv;
+
+       /*
+        * If we can't allocate any more chunks lets make sure we have _lots_ of
+        * wiggle room since running delayed refs can create more delayed refs.
+        */
+       if (global_rsv->space_info->full)
+               num_bytes <<= 1;
+
+       spin_lock(&global_rsv->lock);
+       if (global_rsv->reserved <= num_bytes)
+               ret = 1;
+       spin_unlock(&global_rsv->lock);
+       return ret;
+}
+
 /*
  * this starts processing the delayed reference count updates and
  * extent insertions we have queued up so far.  count can be
@@ -2573,7 +2618,8 @@ progress:
                old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
                if (old) {
                        DEFINE_WAIT(__wait);
-                       if (delayed_refs->num_entries < 16348)
+                       if (delayed_refs->flushing ||
+                           !btrfs_should_throttle_delayed_refs(trans, root))
                                return 0;
 
                        prepare_to_wait(&delayed_refs->wait, &__wait,
@@ -2608,7 +2654,7 @@ again:
 
        while (1) {
                if (!(run_all || run_most) &&
-                   delayed_refs->num_heads_ready < 64)
+                   !btrfs_should_throttle_delayed_refs(trans, root))
                        break;
 
                /*
@@ -8665,8 +8711,15 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
                if (end - start >= range->minlen) {
                        if (!block_group_cache_done(cache)) {
                                ret = cache_block_group(cache, 0);
-                               if (!ret)
-                                       wait_block_group_cache_done(cache);
+                               if (ret) {
+                                       btrfs_put_block_group(cache);
+                                       break;
+                               }
+                               ret = wait_block_group_cache_done(cache);
+                               if (ret) {
+                                       btrfs_put_block_group(cache);
+                                       break;
+                               }
                        }
                        ret = btrfs_trim_block_group(cache,
                                                     &group_trimmed,
index c11b7efcc561c8150f69bf1c013b259e9e8df5ef..c916ebdc689a3091f00e4ba5ca10033aa139fd17 100644 (file)
@@ -615,10 +615,11 @@ void btrfs_throttle(struct btrfs_root *root)
 static int should_end_transaction(struct btrfs_trans_handle *trans,
                                  struct btrfs_root *root)
 {
-       int ret;
+       if (root->fs_info->global_block_rsv.space_info->full &&
+           btrfs_should_throttle_delayed_refs(trans, root))
+               return 1;
 
-       ret = btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5);
-       return ret ? 1 : 0;
+       return !!btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5);
 }
 
 int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
@@ -649,7 +650,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 {
        struct btrfs_transaction *cur_trans = trans->transaction;
        struct btrfs_fs_info *info = root->fs_info;
-       int count = 0;
+       unsigned long cur = trans->delayed_ref_updates;
        int lock = (trans->type != TRANS_JOIN_NOLOCK);
        int err = 0;
 
@@ -678,17 +679,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
        if (!list_empty(&trans->new_bgs))
                btrfs_create_pending_block_groups(trans, root);
 
-       while (count < 1) {
-               unsigned long cur = trans->delayed_ref_updates;
+       trans->delayed_ref_updates = 0;
+       if (btrfs_should_throttle_delayed_refs(trans, root)) {
+               cur = max_t(unsigned long, cur, 1);
                trans->delayed_ref_updates = 0;
-               if (cur &&
-                   trans->transaction->delayed_refs.num_heads_ready > 64) {
-                       trans->delayed_ref_updates = 0;
-                       btrfs_run_delayed_refs(trans, root, cur);
-               } else {
-                       break;
-               }
-               count++;
+               btrfs_run_delayed_refs(trans, root, cur);
        }
 
        btrfs_trans_release_metadata(trans, root);
@@ -1626,6 +1621,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
         * start sending their work down.
         */
        cur_trans->delayed_refs.flushing = 1;
+       smp_wmb();
 
        if (!list_empty(&trans->new_bgs))
                btrfs_create_pending_block_groups(trans, root);