btrfs: keep trim from interfering with transaction commits
authorJeff Mahoney <jeffm@suse.com>
Thu, 6 Sep 2018 21:18:16 +0000 (17:18 -0400)
committerDavid Sterba <dsterba@suse.com>
Mon, 15 Oct 2018 15:23:32 +0000 (17:23 +0200)
Commit 499f377f49f08 (btrfs: iterate over unused chunk space in FITRIM)
fixed free space trimming, but introduced latency when it was running.
This is due to it pinning the transaction using both a incremented
refcount and holding the commit root sem for the duration of a single
trim operation.

This was to ensure safety but it's unnecessary.  We already hold the the
chunk mutex so we know that the chunk we're using can't be allocated
while we're trimming it.

In order to check against chunks allocated already in this transaction,
we need to check the pending chunks list.  To to that safely without
joining the transaction (or attaching than then having to commit it) we
need to ensure that the dev root's commit root doesn't change underneath
us and the pending chunk lists stays around until we're done with it.

We can ensure the former by holding the commit root sem and the latter
by pinning the transaction.  We do this now, but the critical section
covers the trim operation itself and we don't need to do that.

This patch moves the pinning and unpinning logic into helpers and unpins
the transaction after performing the search and check for pending
chunks.

Limiting the critical section of the transaction pinning improves the
latency substantially on slower storage (e.g. image files over NFS).

Fixes: 499f377f49f08 ("btrfs: iterate over unused chunk space in FITRIM")
CC: stable@vger.kernel.org # 4.4+
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/extent-tree.c

index 6e1c2a22874fca180fa79cddfd7b13d7246cda59..f0524bdf49b399cff2e5d16436ed3c7a893606bc 100644 (file)
@@ -10755,14 +10755,16 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
  * We don't want a transaction for this since the discard may take a
  * substantial amount of time.  We don't require that a transaction be
  * running, but we do need to take a running transaction into account
- * to ensure that we're not discarding chunks that were released in
- * the current transaction.
+ * to ensure that we're not discarding chunks that were released or
+ * allocated in the current transaction.
  *
  * Holding the chunks lock will prevent other threads from allocating
  * or releasing chunks, but it won't prevent a running transaction
  * from committing and releasing the memory that the pending chunks
  * list head uses.  For that, we need to take a reference to the
- * transaction.
+ * transaction and hold the commit root sem.  We only need to hold
+ * it while performing the free space search since we have already
+ * held back allocations.
  */
 static int btrfs_trim_free_extents(struct btrfs_device *device,
                                   u64 minlen, u64 *trimmed)
@@ -10793,9 +10795,13 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
 
                ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
                if (ret)
-                       return ret;
+                       break;
 
-               down_read(&fs_info->commit_root_sem);
+               ret = down_read_killable(&fs_info->commit_root_sem);
+               if (ret) {
+                       mutex_unlock(&fs_info->chunk_mutex);
+                       break;
+               }
 
                spin_lock(&fs_info->trans_lock);
                trans = fs_info->running_transaction;
@@ -10803,13 +10809,17 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
                        refcount_inc(&trans->use_count);
                spin_unlock(&fs_info->trans_lock);
 
+               if (!trans)
+                       up_read(&fs_info->commit_root_sem);
+
                ret = find_free_dev_extent_start(trans, device, minlen, start,
                                                 &start, &len);
-               if (trans)
+               if (trans) {
+                       up_read(&fs_info->commit_root_sem);
                        btrfs_put_transaction(trans);
+               }
 
                if (ret) {
-                       up_read(&fs_info->commit_root_sem);
                        mutex_unlock(&fs_info->chunk_mutex);
                        if (ret == -ENOSPC)
                                ret = 0;
@@ -10817,7 +10827,6 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
                }
 
                ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
-               up_read(&fs_info->commit_root_sem);
                mutex_unlock(&fs_info->chunk_mutex);
 
                if (ret)