Btrfs: avoid losing data raid profile when deleting a device
authorLiu Bo <bo.li.liu@oracle.com>
Wed, 15 Nov 2017 23:28:11 +0000 (16:28 -0700)
committerDavid Sterba <dsterba@suse.com>
Mon, 22 Jan 2018 15:08:20 +0000 (16:08 +0100)
We've avoided data losing raid profile when doing balance, but it
turns out that deleting a device could also result in the same
problem.

Say we have 3 disks, and they're created with '-d raid1' profile.

- We have chunk P (the only data chunk on the empty btrfs).

- Suppose that chunk P's two raid1 copies reside in disk A and disk B.

- Now, 'btrfs device remove disk B'
         btrfs_rm_device()
   -> btrfs_shrink_device()
      -> btrfs_relocate_chunk() #relocate any chunk on disk B
          to other places.

- Chunk P will be removed and a new chunk will be created to hold
  those data, but as chunk P is the only one holding raid1 profile,
  after it goes away, the new chunk will be created as single profile
  which is our default profile.

This fixes the problem by creating an empty data chunk before
relocating the data chunk.

Metadata/System chunk are supposed to have non-zero bytes all the time
so their raid profile is preserved.

Reported-by: James Alandt <James.Alandt@wdc.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/volumes.c

index 2333d48ca1d7264c0a34a16cf690efb10a0c9512..c831a089471f1ad97c8634911fab65846e5a3772 100644 (file)
@@ -3106,6 +3106,48 @@ error:
        return ret;
 }
 
+/*
+ * return 1 : allocate a data chunk successfully,
+ * return <0: errors during allocating a data chunk,
+ * return 0 : no need to allocate a data chunk.
+ */
+static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
+                                     u64 chunk_offset)
+{
+       struct btrfs_block_group_cache *cache;
+       u64 bytes_used;
+       u64 chunk_type;
+
+       cache = btrfs_lookup_block_group(fs_info, chunk_offset);
+       ASSERT(cache);
+       chunk_type = cache->flags;
+       btrfs_put_block_group(cache);
+
+       if (chunk_type & BTRFS_BLOCK_GROUP_DATA) {
+               spin_lock(&fs_info->data_sinfo->lock);
+               bytes_used = fs_info->data_sinfo->bytes_used;
+               spin_unlock(&fs_info->data_sinfo->lock);
+
+               if (!bytes_used) {
+                       struct btrfs_trans_handle *trans;
+                       int ret;
+
+                       trans = btrfs_join_transaction(fs_info->tree_root);
+                       if (IS_ERR(trans))
+                               return PTR_ERR(trans);
+
+                       ret = btrfs_force_chunk_alloc(trans, fs_info,
+                                                     BTRFS_BLOCK_GROUP_DATA);
+                       btrfs_end_transaction(trans);
+                       if (ret < 0)
+                               return ret;
+
+                       return 1;
+               }
+       }
+       return 0;
+}
+
 static int insert_balance_item(struct btrfs_fs_info *fs_info,
                               struct btrfs_balance_control *bctl)
 {
@@ -3564,7 +3606,6 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
        u32 count_meta = 0;
        u32 count_sys = 0;
        int chunk_reserved = 0;
-       u64 bytes_used = 0;
 
        /* step one make some room on all the devices */
        devices = &fs_info->fs_devices->devices;
@@ -3723,28 +3764,21 @@ again:
                        goto loop;
                }
 
-               ASSERT(fs_info->data_sinfo);
-               spin_lock(&fs_info->data_sinfo->lock);
-               bytes_used = fs_info->data_sinfo->bytes_used;
-               spin_unlock(&fs_info->data_sinfo->lock);
-
-               if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) &&
-                   !chunk_reserved && !bytes_used) {
-                       trans = btrfs_start_transaction(chunk_root, 0);
-                       if (IS_ERR(trans)) {
-                               mutex_unlock(&fs_info->delete_unused_bgs_mutex);
-                               ret = PTR_ERR(trans);
-                               goto error;
-                       }
-
-                       ret = btrfs_force_chunk_alloc(trans, fs_info,
-                                                     BTRFS_BLOCK_GROUP_DATA);
-                       btrfs_end_transaction(trans);
+               if (!chunk_reserved) {
+                       /*
+                        * We may be relocating the only data chunk we have,
+                        * which could potentially end up with losing data's
+                        * raid profile, so lets allocate an empty one in
+                        * advance.
+                        */
+                       ret = btrfs_may_alloc_data_chunk(fs_info,
+                                                        found_key.offset);
                        if (ret < 0) {
                                mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                                goto error;
+                       } else if (ret == 1) {
+                               chunk_reserved = 1;
                        }
-                       chunk_reserved = 1;
                }
 
                ret = btrfs_relocate_chunk(fs_info, found_key.offset);
@@ -4507,6 +4541,18 @@ again:
                chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
                btrfs_release_path(path);
 
+               /*
+                * We may be relocating the only data chunk we have,
+                * which could potentially end up with losing data's
+                * raid profile, so lets allocate an empty one in
+                * advance.
+                */
+               ret = btrfs_may_alloc_data_chunk(fs_info, chunk_offset);
+               if (ret < 0) {
+                       mutex_unlock(&fs_info->delete_unused_bgs_mutex);
+                       goto done;
+               }
+
                ret = btrfs_relocate_chunk(fs_info, chunk_offset);
                mutex_unlock(&fs_info->delete_unused_bgs_mutex);
                if (ret && ret != -ENOSPC)