btrfs: check for refs on snapshot delete resume
authorJosef Bacik <josef@toxicpanda.com>
Wed, 6 Feb 2019 20:46:14 +0000 (15:46 -0500)
committerDavid Sterba <dsterba@suse.com>
Wed, 27 Feb 2019 13:08:47 +0000 (14:08 +0100)
There's a bug in snapshot deletion where we won't update the
drop_progress key if we're in the UPDATE_BACKREF stage.  This is a
problem because we could drop refs for blocks we know don't belong to
ours.  If we crash or umount at the right time we could experience
messages such as the following when snapshot deletion resumes

 BTRFS error (device dm-3): unable to find ref byte nr 66797568 parent 0 root 258  owner 1 offset 0
 ------------[ cut here ]------------
 WARNING: CPU: 3 PID: 16052 at fs/btrfs/extent-tree.c:7108 __btrfs_free_extent.isra.78+0x62c/0xb30 [btrfs]
 CPU: 3 PID: 16052 Comm: umount Tainted: G        W  OE     5.0.0-rc4+ #147
 Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./890FX Deluxe5, BIOS P1.40 05/03/2011
 RIP: 0010:__btrfs_free_extent.isra.78+0x62c/0xb30 [btrfs]
 RSP: 0018:ffffc90005cd7b18 EFLAGS: 00010286
 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000
 RDX: ffff88842fade680 RSI: ffff88842fad6b18 RDI: ffff88842fad6b18
 RBP: ffffc90005cd7bc8 R08: 0000000000000000 R09: 0000000000000001
 R10: 0000000000000001 R11: ffffffff822696b8 R12: 0000000003fb4000
 R13: 0000000000000001 R14: 0000000000000102 R15: ffff88819c9d67e0
 FS:  00007f08bb138fc0(0000) GS:ffff88842fac0000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007f8f5d861ea0 CR3: 00000003e99fe000 CR4: 00000000000006e0
 Call Trace:
 ? _raw_spin_unlock+0x27/0x40
 ? btrfs_merge_delayed_refs+0x356/0x3e0 [btrfs]
 __btrfs_run_delayed_refs+0x75a/0x13c0 [btrfs]
 ? join_transaction+0x2b/0x460 [btrfs]
 btrfs_run_delayed_refs+0xf3/0x1c0 [btrfs]
 btrfs_commit_transaction+0x52/0xa50 [btrfs]
 ? start_transaction+0xa6/0x510 [btrfs]
 btrfs_sync_fs+0x79/0x1c0 [btrfs]
 sync_filesystem+0x70/0x90
 generic_shutdown_super+0x27/0x120
 kill_anon_super+0x12/0x30
 btrfs_kill_super+0x16/0xa0 [btrfs]
 deactivate_locked_super+0x43/0x70
 deactivate_super+0x40/0x60
 cleanup_mnt+0x3f/0x80
 __cleanup_mnt+0x12/0x20
 task_work_run+0x8b/0xc0
 exit_to_usermode_loop+0xce/0xd0
 do_syscall_64+0x20b/0x210
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

To fix this simply mark dead roots we read from disk as DEAD and then
set the walk_control->restarted flag so we know we have a restarted
deletion.  From here whenever we try to drop refs for blocks we check to
verify our ref is set on them, and if it is not we skip it.  Once we
find a ref that is set we unset walk_control->restarted since the tree
should be in a normal state from then on, and any problems we run into
from there are different issues.  I tested this with an existing broken
fs and my reproducer that creates a broken fs and it fixed both file
systems.

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/root-tree.c

index 85140913c0f5569c6e02d4d2d98203b3f8b44959..0f4838e00fbc892beb64abd118934c033a5e94be 100644 (file)
@@ -1210,6 +1210,8 @@ enum {
         * Set for the subvolume tree owning the reloc tree.
         */
        BTRFS_ROOT_DEAD_RELOC_TREE,
+       /* Mark dead root stored on device whose cleanup needs to be resumed */
+       BTRFS_ROOT_DEAD_TREE,
 };
 
 /*
index 994f0cc41799304581207b23570d68ed44354f94..36af54bec1119e79eb9c55d9e95e7a7c27800235 100644 (file)
@@ -8771,6 +8771,7 @@ struct walk_control {
        int keep_locks;
        int reada_slot;
        int reada_count;
+       int restarted;
 };
 
 #define DROP_REFERENCE 1
@@ -8933,6 +8934,33 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
        return 0;
 }
 
+/*
+ * This is used to verify a ref exists for this root to deal with a bug where we
+ * would have a drop_progress key that hadn't been updated properly.
+ */
+static int check_ref_exists(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root, u64 bytenr, u64 parent,
+                           int level)
+{
+       struct btrfs_path *path;
+       struct btrfs_extent_inline_ref *iref;
+       int ret;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       ret = lookup_extent_backref(trans, path, &iref, bytenr,
+                                   root->fs_info->nodesize, parent,
+                                   root->root_key.objectid, level, 0);
+       btrfs_free_path(path);
+       if (ret == -ENOENT)
+               return 0;
+       if (ret < 0)
+               return ret;
+       return 1;
+}
+
 /*
  * helper to process tree block pointer.
  *
@@ -9087,6 +9115,23 @@ skip:
                        parent = 0;
                }
 
+               /*
+                * If we had a drop_progress we need to verify the refs are set
+                * as expected.  If we find our ref then we know that from here
+                * on out everything should be correct, and we can clear the
+                * ->restarted flag.
+                */
+               if (wc->restarted) {
+                       ret = check_ref_exists(trans, root, bytenr, parent,
+                                              level - 1);
+                       if (ret < 0)
+                               goto out_unlock;
+                       if (ret == 0)
+                               goto no_delete;
+                       ret = 0;
+                       wc->restarted = 0;
+               }
+
                /*
                 * Reloc tree doesn't contribute to qgroup numbers, and we have
                 * already accounted them at merge time (replace_path),
@@ -9108,7 +9153,7 @@ skip:
                if (ret)
                        goto out_unlock;
        }
-
+no_delete:
        *lookup_info = 1;
        ret = 1;
 
@@ -9425,6 +9470,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
                }
        }
 
+       wc->restarted = test_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
        wc->level = level;
        wc->shared_level = -1;
        wc->stage = DROP_REFERENCE;
index 0d2b957ca3a31f4aa44ccfd7c0fd43f006c5922a..893d12fbfda0761573b87acb790732b74c46ddc9 100644 (file)
@@ -263,8 +263,10 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
                if (root) {
                        WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
                                          &root->state));
-                       if (btrfs_root_refs(&root->root_item) == 0)
+                       if (btrfs_root_refs(&root->root_item) == 0) {
+                               set_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
                                btrfs_add_dead_root(root);
+                       }
                        continue;
                }
 
@@ -310,8 +312,10 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
                        break;
                }
 
-               if (btrfs_root_refs(&root->root_item) == 0)
+               if (btrfs_root_refs(&root->root_item) == 0) {
+                       set_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
                        btrfs_add_dead_root(root);
+               }
        }
 
        btrfs_free_path(path);