btrfs: add support for 3-copy replication (raid1c3)
authorDavid Sterba <dsterba@suse.com>
Fri, 2 Mar 2018 21:56:53 +0000 (22:56 +0100)
committerDavid Sterba <dsterba@suse.com>
Mon, 18 Nov 2019 16:51:49 +0000 (17:51 +0100)
Add new block group profile to store 3 copies in a simliar way that
current RAID1 does. The profile attributes and constraints are defined
in the raid table and used by the same code that already handles the
2-copy RAID1.

The minimum number of devices is 3, the maximum number of devices/chunks
that can be lost/damaged is 2. Like RAID6 but with 33% space
utilization.

Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/ctree.h
fs/btrfs/super.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h
include/uapi/linux/btrfs.h
include/uapi/linux/btrfs_tree.h

index 1c8f01eaf27c65d1f4015b47e374231038118bfb..aa1b437fb9515ea28de6c7448ac6890e6a9e5436 100644 (file)
@@ -57,9 +57,9 @@ struct btrfs_ref;
  * filesystem data as well that can be used to read data in order to repair
  * read errors on other disks.
  *
- * Current value is derived from RAID1 with 2 copies.
+ * Current value is derived from RAID1C3 with 3 copies.
  */
-#define BTRFS_MAX_MIRRORS (2 + 1)
+#define BTRFS_MAX_MIRRORS (3 + 1)
 
 #define BTRFS_MAX_LEVEL 8
 
index 3f49407cc2aa282afcab95b5e29b30413524a85f..a5aff138e2e0bda63a1f1630ff55b9a0ec04537e 100644 (file)
@@ -1935,6 +1935,8 @@ static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
                num_stripes = nr_devices;
        else if (type & BTRFS_BLOCK_GROUP_RAID1)
                num_stripes = 2;
+       else if (type & BTRFS_BLOCK_GROUP_RAID1C3)
+               num_stripes = 3;
        else if (type & BTRFS_BLOCK_GROUP_RAID10)
                num_stripes = 4;
 
index e6c458d99722e6f3b52c52af305b2fb14283bf05..d17835fbd52f0bc9d4e78c60cf993d51ad9d68aa 100644 (file)
@@ -58,6 +58,18 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
                .bg_flag        = BTRFS_BLOCK_GROUP_RAID1,
                .mindev_error   = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
        },
+       [BTRFS_RAID_RAID1C3] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 0,
+               .devs_min       = 3,
+               .tolerated_failures = 2,
+               .devs_increment = 3,
+               .ncopies        = 3,
+               .raid_name      = "raid1c3",
+               .bg_flag        = BTRFS_BLOCK_GROUP_RAID1C3,
+               .mindev_error   = BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET,
+       },
        [BTRFS_RAID_DUP] = {
                .sub_stripes    = 1,
                .dev_stripes    = 2,
@@ -4839,8 +4851,11 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
             btrfs_cmp_device_info, NULL);
 
-       /* round down to number of usable stripes */
-       ndevs = round_down(ndevs, devs_increment);
+       /*
+        * Round down to number of usable stripes, devs_increment can be any
+        * number so we can't use round_down()
+        */
+       ndevs -= ndevs % devs_increment;
 
        if (ndevs < devs_min) {
                ret = -ENOSPC;
index ac4ba8c57283c4eaa936fa4b730406cbcda8f297..a4e26b84e1b9d961bf159b34edbc7789e6136604 100644 (file)
@@ -545,6 +545,8 @@ static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
                return BTRFS_RAID_RAID10;
        else if (flags & BTRFS_BLOCK_GROUP_RAID1)
                return BTRFS_RAID_RAID1;
+       else if (flags & BTRFS_BLOCK_GROUP_RAID1C3)
+               return BTRFS_RAID_RAID1C3;
        else if (flags & BTRFS_BLOCK_GROUP_DUP)
                return BTRFS_RAID_DUP;
        else if (flags & BTRFS_BLOCK_GROUP_RAID0)
index 3ee0678c0a8355ca8aa8495733e231a013042695..ba22f91a3f5bcc25c6d96ff2960b0366d8256749 100644 (file)
@@ -831,7 +831,8 @@ enum btrfs_err_code {
        BTRFS_ERROR_DEV_TGT_REPLACE,
        BTRFS_ERROR_DEV_MISSING_NOT_FOUND,
        BTRFS_ERROR_DEV_ONLY_WRITABLE,
-       BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS
+       BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS,
+       BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET,
 };
 
 #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
index 5160be1d7332ca2c614f8ec7688d0eb952408f12..52b2964b0311c084a5a01222c169607575b7da32 100644 (file)
@@ -841,6 +841,7 @@ struct btrfs_dev_replace_item {
 #define BTRFS_BLOCK_GROUP_RAID10       (1ULL << 6)
 #define BTRFS_BLOCK_GROUP_RAID5         (1ULL << 7)
 #define BTRFS_BLOCK_GROUP_RAID6         (1ULL << 8)
+#define BTRFS_BLOCK_GROUP_RAID1C3       (1ULL << 9)
 #define BTRFS_BLOCK_GROUP_RESERVED     (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
                                         BTRFS_SPACE_INFO_GLOBAL_RSV)
 
@@ -852,6 +853,7 @@ enum btrfs_raid_types {
        BTRFS_RAID_SINGLE,
        BTRFS_RAID_RAID5,
        BTRFS_RAID_RAID6,
+       BTRFS_RAID_RAID1C3,
        BTRFS_NR_RAID_TYPES
 };
 
@@ -861,6 +863,7 @@ enum btrfs_raid_types {
 
 #define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 |   \
                                         BTRFS_BLOCK_GROUP_RAID1 |   \
+                                        BTRFS_BLOCK_GROUP_RAID1C3 | \
                                         BTRFS_BLOCK_GROUP_RAID5 |   \
                                         BTRFS_BLOCK_GROUP_RAID6 |   \
                                         BTRFS_BLOCK_GROUP_DUP |     \
@@ -868,7 +871,8 @@ enum btrfs_raid_types {
 #define BTRFS_BLOCK_GROUP_RAID56_MASK  (BTRFS_BLOCK_GROUP_RAID5 |   \
                                         BTRFS_BLOCK_GROUP_RAID6)
 
-#define BTRFS_BLOCK_GROUP_RAID1_MASK   (BTRFS_BLOCK_GROUP_RAID1)
+#define BTRFS_BLOCK_GROUP_RAID1_MASK   (BTRFS_BLOCK_GROUP_RAID1 |   \
+                                        BTRFS_BLOCK_GROUP_RAID1C3)
 
 /*
  * We need a bit for restriper to be able to tell when chunks of type