Btrfs: prevent ioctls from interfering with a swap file
authorOmar Sandoval <osandov@fb.com>
Thu, 3 Nov 2016 17:28:12 +0000 (10:28 -0700)
committerDavid Sterba <dsterba@suse.com>
Mon, 17 Dec 2018 13:51:29 +0000 (14:51 +0100)
A later patch will implement swap file support for Btrfs, but before we
do that, we need to make sure that the various Btrfs ioctls cannot
change a swap file.

When a swap file is active, we must make sure that the extents of the
file are not moved and that they don't become shared. That means that
the following are not safe:

- chattr +c (enable compression)
- reflink
- dedupe
- snapshot
- defrag

Don't allow those to happen on an active swap file.

Additionally, balance, resize, device remove, and device replace are
also unsafe if they affect an active swapfile. Add a red-black tree of
block groups and devices which contain an active swapfile. Relocation
checks each block group against this tree and skips it or errors out for
balance or resize, respectively. Device remove and device replace check
the tree for the device they will operate on.

Note that we don't have to worry about chattr -C (disable nocow), which
we ignore for non-empty files, because an active swapfile must be
non-empty and can't be truncated. We also don't have to worry about
autodefrag because it's only done on COW files. Truncate and fallocate
are already taken care of by the generic code. Device add doesn't do
relocation so it's not an issue, either.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/ctree.h
fs/btrfs/dev-replace.c
fs/btrfs/disk-io.c
fs/btrfs/ioctl.c
fs/btrfs/relocation.c
fs/btrfs/volumes.c

index 9025bab58e81dd8ac459e61b986376d4cc1af154..8b41ec42f40558044421a1a151e5e01b25e9c59c 100644 (file)
@@ -712,6 +712,28 @@ struct btrfs_fs_devices;
 struct btrfs_balance_control;
 struct btrfs_delayed_root;
 
+/*
+ * Block group or device which contains an active swapfile. Used for preventing
+ * unsafe operations while a swapfile is active.
+ *
+ * These are sorted on (ptr, inode) (note that a block group or device can
+ * contain more than one swapfile). We compare the pointer values because we
+ * don't actually care what the object is, we just need a quick check whether
+ * the object exists in the rbtree.
+ */
+struct btrfs_swapfile_pin {
+       struct rb_node node;
+       void *ptr;
+       struct inode *inode;
+       /*
+        * If true, ptr points to a struct btrfs_block_group_cache. Otherwise,
+        * ptr points to a struct btrfs_device.
+        */
+       bool is_block_group;
+};
+
+bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
+
 #define BTRFS_FS_BARRIER                       1
 #define BTRFS_FS_CLOSING_START                 2
 #define BTRFS_FS_CLOSING_DONE                  3
@@ -1114,6 +1136,10 @@ struct btrfs_fs_info {
        u32 sectorsize;
        u32 stripesize;
 
+       /* Block groups and devices containing active swapfiles. */
+       spinlock_t swapfile_pins_lock;
+       struct rb_root swapfile_pins;
+
 #ifdef CONFIG_BTRFS_FS_REF_VERIFY
        spinlock_t ref_verify_lock;
        struct rb_root block_tree;
@@ -1274,6 +1300,9 @@ struct btrfs_root {
        u64 qgroup_meta_rsv_pertrans;
        u64 qgroup_meta_rsv_prealloc;
 
+       /* Number of active swapfiles */
+       atomic_t nr_swapfiles;
+
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
        u64 alloc_bytenr;
 #endif
index 2aa48aecc52b75a95eb1e19e7a1974d8ad800dde..46092e67f61a48a6d2b4340915a2d64aad3fd44f 100644 (file)
@@ -407,6 +407,13 @@ int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
        if (IS_ERR(src_device))
                return PTR_ERR(src_device);
 
+       if (btrfs_pinned_by_swapfile(fs_info, src_device)) {
+               btrfs_warn_in_rcu(fs_info,
+         "cannot replace device %s (devid %llu) due to active swapfile",
+                       btrfs_dev_name(src_device), src_device->devid);
+               return -ETXTBSY;
+       }
+
        ret = btrfs_init_dev_replace_tgtdev(fs_info, tgtdev_name,
                                            src_device, &tgt_device);
        if (ret)
index 6d776717d8b39b566e6ec14f479648ad6f788905..c1d127decc8dc7b9b6fbe0ca0b1423ecf29b2c35 100644 (file)
@@ -1178,6 +1178,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
        refcount_set(&root->refs, 1);
        atomic_set(&root->will_be_snapshotted, 0);
        atomic_set(&root->snapshot_force_cow, 0);
+       atomic_set(&root->nr_swapfiles, 0);
        root->log_transid = 0;
        root->log_transid_committed = -1;
        root->last_log_commit = 0;
@@ -2745,6 +2746,9 @@ int open_ctree(struct super_block *sb,
        fs_info->sectorsize = 4096;
        fs_info->stripesize = 4096;
 
+       spin_lock_init(&fs_info->swapfile_pins_lock);
+       fs_info->swapfile_pins = RB_ROOT;
+
        ret = btrfs_alloc_stripe_hash_table(fs_info);
        if (ret) {
                err = ret;
index 802a628e9f7d7fe629a76e8d108b75c04ed4246e..36d8dcc7a475663cae0d93e902cbd2a4b37a2d4e 100644 (file)
@@ -290,6 +290,11 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        } else if (fsflags & FS_COMPR_FL) {
                const char *comp;
 
+               if (IS_SWAPFILE(inode)) {
+                       ret = -ETXTBSY;
+                       goto out_unlock;
+               }
+
                binode->flags |= BTRFS_INODE_COMPRESS;
                binode->flags &= ~BTRFS_INODE_NOCOMPRESS;
 
@@ -754,6 +759,12 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
        if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
                return -EINVAL;
 
+       if (atomic_read(&root->nr_swapfiles)) {
+               btrfs_warn(fs_info,
+                          "cannot snapshot subvolume with active swapfile");
+               return -ETXTBSY;
+       }
+
        pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_KERNEL);
        if (!pending_snapshot)
                return -ENOMEM;
@@ -1505,9 +1516,13 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
                }
 
                inode_lock(inode);
-               if (do_compress)
-                       BTRFS_I(inode)->defrag_compress = compress_type;
-               ret = cluster_pages_for_defrag(inode, pages, i, cluster);
+               if (IS_SWAPFILE(inode)) {
+                       ret = -ETXTBSY;
+               } else {
+                       if (do_compress)
+                               BTRFS_I(inode)->defrag_compress = compress_type;
+                       ret = cluster_pages_for_defrag(inode, pages, i, cluster);
+               }
                if (ret < 0) {
                        inode_unlock(inode);
                        goto out_ra;
@@ -3577,6 +3592,11 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
                goto out_unlock;
        }
 
+       if (IS_SWAPFILE(src) || IS_SWAPFILE(dst)) {
+               ret = -ETXTBSY;
+               goto out_unlock;
+       }
+
        tail_len = olen % BTRFS_MAX_DEDUPE_LEN;
        chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN);
        if (chunk_count == 0)
@@ -4253,6 +4273,11 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
                goto out_unlock;
        }
 
+       if (IS_SWAPFILE(src) || IS_SWAPFILE(inode)) {
+               ret = -ETXTBSY;
+               goto out_unlock;
+       }
+
        /* determine range to clone */
        ret = -EINVAL;
        if (off + len > src->i_size || off + len < off)
index a3f75b8926d4474aa1093ffdd852d422b853e2d2..5471eea1c0fd7ebf97b43f7f692f76ef12756553 100644 (file)
@@ -4223,6 +4223,7 @@ static void describe_relocation(struct btrfs_fs_info *fs_info,
  */
 int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
 {
+       struct btrfs_block_group_cache *bg;
        struct btrfs_root *extent_root = fs_info->extent_root;
        struct reloc_control *rc;
        struct inode *inode;
@@ -4231,14 +4232,23 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
        int rw = 0;
        int err = 0;
 
+       bg = btrfs_lookup_block_group(fs_info, group_start);
+       if (!bg)
+               return -ENOENT;
+
+       if (btrfs_pinned_by_swapfile(fs_info, bg)) {
+               btrfs_put_block_group(bg);
+               return -ETXTBSY;
+       }
+
        rc = alloc_reloc_control();
-       if (!rc)
+       if (!rc) {
+               btrfs_put_block_group(bg);
                return -ENOMEM;
+       }
 
        rc->extent_root = extent_root;
-
-       rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
-       BUG_ON(!rc->block_group);
+       rc->block_group = bg;
 
        ret = btrfs_inc_block_group_ro(rc->block_group);
        if (ret) {
index 303ae9de85b160050a1b0abb6d2944ea6228456f..0f7925e34232d103c546f29c4ab0db59b97fcf77 100644 (file)
@@ -1900,6 +1900,14 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
                goto out;
        }
 
+       if (btrfs_pinned_by_swapfile(fs_info, device)) {
+               btrfs_warn_in_rcu(fs_info,
+                 "cannot remove device %s (devid %llu) due to active swapfile",
+                                 rcu_str_deref(device->name), device->devid);
+               ret = -ETXTBSY;
+               goto out;
+       }
+
        if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
                ret = BTRFS_ERROR_DEV_TGT_REPLACE;
                goto out;
@@ -3638,10 +3646,15 @@ again:
 
                ret = btrfs_relocate_chunk(fs_info, found_key.offset);
                mutex_unlock(&fs_info->delete_unused_bgs_mutex);
-               if (ret && ret != -ENOSPC)
-                       goto error;
                if (ret == -ENOSPC) {
                        enospc_errors++;
+               } else if (ret == -ETXTBSY) {
+                       btrfs_info(fs_info,
+          "skipping relocation of block group %llu due to active swapfile",
+                                  found_key.offset);
+                       ret = 0;
+               } else if (ret) {
+                       goto error;
                } else {
                        spin_lock(&fs_info->balance_lock);
                        bctl->stat.completed++;
@@ -4433,10 +4446,16 @@ again:
 
                ret = btrfs_relocate_chunk(fs_info, chunk_offset);
                mutex_unlock(&fs_info->delete_unused_bgs_mutex);
-               if (ret && ret != -ENOSPC)
-                       goto done;
-               if (ret == -ENOSPC)
+               if (ret == -ENOSPC) {
                        failed++;
+               } else if (ret) {
+                       if (ret == -ETXTBSY) {
+                               btrfs_warn(fs_info,
+                  "could not shrink block group %llu due to active swapfile",
+                                          chunk_offset);
+                       }
+                       goto done;
+               }
        } while (key.offset-- > 0);
 
        if (failed && !retried) {
@@ -7572,3 +7591,27 @@ out:
        btrfs_free_path(path);
        return ret;
 }
+
+/*
+ * Check whether the given block group or device is pinned by any inode being
+ * used as a swapfile.
+ */
+bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr)
+{
+       struct btrfs_swapfile_pin *sp;
+       struct rb_node *node;
+
+       spin_lock(&fs_info->swapfile_pins_lock);
+       node = fs_info->swapfile_pins.rb_node;
+       while (node) {
+               sp = rb_entry(node, struct btrfs_swapfile_pin, node);
+               if (ptr < sp->ptr)
+                       node = node->rb_left;
+               else if (ptr > sp->ptr)
+                       node = node->rb_right;
+               else
+                       break;
+       }
+       spin_unlock(&fs_info->swapfile_pins_lock);
+       return node != NULL;
+}