Btrfs: fix race setting block group readonly during device replace

author Filipe Manana <fdmanana@suse.com>

Sat, 14 May 2016 08:12:53 +0000 (09:12 +0100)

committer Filipe Manana <fdmanana@suse.com>

Mon, 30 May 2016 11:58:21 +0000 (12:58 +0100)
author Filipe Manana <fdmanana@suse.com>
Sat, 14 May 2016 08:12:53 +0000 (09:12 +0100)
committer Filipe Manana <fdmanana@suse.com>
Mon, 30 May 2016 11:58:21 +0000 (12:58 +0100)
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c

index 559170464d7c50f43d6804d59dfd9f73e57a707f..e96634a725c3e441a311e36b40b43532031bf5c5 100644 (file)
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -718,12 +718,13 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
         return count;
  }
  
-void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
+int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
                               const u64 range_start, const u64 range_len)
  {
         struct btrfs_root *root;
         struct list_head splice;
         int done;
+       int total_done = 0;
  
         INIT_LIST_HEAD(&splice);
  
@@ -742,6 +743,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
                 done = btrfs_wait_ordered_extents(root, nr,
                                                   range_start, range_len);
                 btrfs_put_fs_root(root);
+               total_done += done;
  
                 spin_lock(&fs_info->ordered_root_lock);
                 if (nr != -1) {
@@ -752,6 +754,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
         list_splice_tail(&splice, &fs_info->ordered_roots);
         spin_unlock(&fs_info->ordered_root_lock);
         mutex_unlock(&fs_info->ordered_operations_mutex);
+
+       return total_done;
  }
  
  /*
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h

index 2049c9be85ee304f41b11753db609fd802de7fd6..451507776ff59f87fdc286db823f4d08ff71899a 100644 (file)
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -199,7 +199,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
                            u32 *sum, int len);
  int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
                                const u64 range_start, const u64 range_len);
-void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
+int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
                               const u64 range_start, const u64 range_len);
  void btrfs_get_logged_extents(struct inode *inode,
                               struct list_head *logged_list,
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c

index 46d847f66e4b870dcd54dc18d023f8ca321f40fb..1611572d47bd0ff32fdd0e6bed4afa069264f5b6 100644 (file)
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3582,6 +3582,46 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
                  */
                 scrub_pause_on(fs_info);
                 ret = btrfs_inc_block_group_ro(root, cache);
+               if (!ret && is_dev_replace) {
+                       /*
+                        * If we are doing a device replace wait for any tasks
+                        * that started dellaloc right before we set the block
+                        * group to RO mode, as they might have just allocated
+                        * an extent from it or decided they could do a nocow
+                        * write. And if any such tasks did that, wait for their
+                        * ordered extents to complete and then commit the
+                        * current transaction, so that we can later see the new
+                        * extent items in the extent tree - the ordered extents
+                        * create delayed data references (for cow writes) when
+                        * they complete, which will be run and insert the
+                        * corresponding extent items into the extent tree when
+                        * we commit the transaction they used when running
+                        * inode.c:btrfs_finish_ordered_io(). We later use
+                        * the commit root of the extent tree to find extents
+                        * to copy from the srcdev into the tgtdev, and we don't
+                        * want to miss any new extents.
+                        */
+                       btrfs_wait_block_group_reservations(cache);
+                       btrfs_wait_nocow_writers(cache);
+                       ret = btrfs_wait_ordered_roots(fs_info, -1,
+                                                      cache->key.objectid,
+                                                      cache->key.offset);
+                       if (ret > 0) {
+                               struct btrfs_trans_handle *trans;
+
+                               trans = btrfs_join_transaction(root);
+                               if (IS_ERR(trans))
+                                       ret = PTR_ERR(trans);
+                               else
+                                       ret = btrfs_commit_transaction(trans,
+                                                                      root);
+                               if (ret) {
+                                       scrub_pause_off(fs_info);
+                                       btrfs_put_block_group(cache);
+                                       break;
+                               }
+                       }
+               }
                 scrub_pause_off(fs_info);
  
                 if (ret == 0) {
author	Filipe Manana <fdmanana@suse.com>
	Sat, 14 May 2016 08:12:53 +0000 (09:12 +0100)
committer	Filipe Manana <fdmanana@suse.com>
	Mon, 30 May 2016 11:58:21 +0000 (12:58 +0100)
fs/btrfs/ordered-data.c		patch \| blob \| history
fs/btrfs/ordered-data.h		patch \| blob \| history
fs/btrfs/scrub.c		patch \| blob \| history