Btrfs: part 2, fix incremental send's decision to delay a dir move/rename
authorFilipe Manana <fdmanana@gmail.com>
Wed, 19 Mar 2014 14:20:54 +0000 (14:20 +0000)
committerChris Mason <clm@fb.com>
Fri, 21 Mar 2014 22:25:48 +0000 (15:25 -0700)
For an incremental send, fix the process of determining whether the directory
inode we're currently processing needs to have its move/rename operation delayed.

We were ignoring the fact that if the inode's new immediate ancestor has a higher
inode number than ours but wasn't renamed/moved, we might still need to delay our
move/rename, because some other ancestor directory higher in the hierarchy might
have an inode number higher than ours *and* was renamed/moved too - in this case
we have to wait for rename/move of that ancestor to happen before our current
directory's rename/move operation.

Simple steps to reproduce this issue:

      $ mkfs.btrfs -f /dev/sdd
      $ mount /dev/sdd /mnt

      $ mkdir -p /mnt/a/x1/x2
      $ mkdir /mnt/a/Z
      $ mkdir -p /mnt/a/x1/x2/x3/x4/x5

      $ btrfs subvolume snapshot -r /mnt /mnt/snap1
      $ btrfs send /mnt/snap1 -f /tmp/base.send

      $ mv /mnt/a/x1/x2/x3 /mnt/a/Z/X33
      $ mv /mnt/a/x1/x2 /mnt/a/Z/X33/x4/x5/X22

      $ btrfs subvolume snapshot -r /mnt /mnt/snap2
      $ btrfs send -p /mnt/snap1 /mnt/snap2 -f /tmp/incremental.send

The incremental send caused the kernel code to enter an infinite loop when
building the path string for directory Z after its references are processed.

A more complex scenario:

      $ mkfs.btrfs -f /dev/sdd
      $ mount /dev/sdd /mnt

      $ mkdir -p /mnt/a/b/c/d
      $ mkdir /mnt/a/b/c/d/e
      $ mkdir /mnt/a/b/c/d/f
      $ mv /mnt/a/b/c/d/e /mnt/a/b/c/d/f/E2
      $ mkdir /mmt/a/b/c/g
      $ mv /mnt/a/b/c/d /mnt/a/b/D2

      $ btrfs subvolume snapshot -r /mnt /mnt/snap1
      $ btrfs send /mnt/snap1 -f /tmp/base.send

      $ mkdir /mnt/a/o
      $ mv /mnt/a/b/c/g /mnt/a/b/D2/f/G2
      $ mv /mnt/a/b/D2 /mnt/a/b/dd
      $ mv /mnt/a/b/c /mnt/a/C2
      $ mv /mnt/a/b/dd/f /mnt/a/o/FF
      $ mv /mnt/a/b /mnt/a/o/FF/E2/BB

      $ btrfs subvolume snapshot -r /mnt /mnt/snap2
      $ btrfs send -p /mnt/snap1 /mnt/snap2 -f /tmp/incremental.send

A test case for xfstests follows.

Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com>
Signed-off-by: Chris Mason <clm@fb.com>
fs/btrfs/send.c

index f16472409eec9c6021b7f4b9a7937a2d63c2e887..143fed3f4586d303df29f078690a60b07e97ab2c 100644 (file)
@@ -2916,7 +2916,10 @@ static void free_waiting_dir_move(struct send_ctx *sctx,
        kfree(dm);
 }
 
-static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino)
+static int add_pending_dir_move(struct send_ctx *sctx,
+                               u64 ino,
+                               u64 ino_gen,
+                               u64 parent_ino)
 {
        struct rb_node **p = &sctx->pending_dir_moves.rb_node;
        struct rb_node *parent = NULL;
@@ -2929,8 +2932,8 @@ static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino)
        if (!pm)
                return -ENOMEM;
        pm->parent_ino = parent_ino;
-       pm->ino = sctx->cur_ino;
-       pm->gen = sctx->cur_inode_gen;
+       pm->ino = ino;
+       pm->gen = ino_gen;
        INIT_LIST_HEAD(&pm->list);
        INIT_LIST_HEAD(&pm->update_refs);
        RB_CLEAR_NODE(&pm->node);
@@ -3183,6 +3186,8 @@ static int wait_for_parent_move(struct send_ctx *sctx,
        struct fs_path *path_before = NULL;
        struct fs_path *path_after = NULL;
        int len1, len2;
+       int register_upper_dirs;
+       u64 gen;
 
        if (is_waiting_for_move(sctx, ino))
                return 1;
@@ -3220,7 +3225,7 @@ static int wait_for_parent_move(struct send_ctx *sctx,
        }
 
        ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
-                           NULL, path_after);
+                           &gen, path_after);
        if (ret == -ENOENT) {
                ret = 0;
                goto out;
@@ -3237,6 +3242,60 @@ static int wait_for_parent_move(struct send_ctx *sctx,
        }
        ret = 0;
 
+       /*
+        * Ok, our new most direct ancestor has a higher inode number but
+        * wasn't moved/renamed. So maybe some of the new ancestors higher in
+        * the hierarchy have an higher inode number too *and* were renamed
+        * or moved - in this case we need to wait for the ancestor's rename
+        * or move operation before we can do the move/rename for the current
+        * inode.
+        */
+       register_upper_dirs = 0;
+       ino = parent_ino_after;
+again:
+       while ((ret == 0 || register_upper_dirs) && ino > sctx->cur_ino) {
+               u64 parent_gen;
+
+               fs_path_reset(path_before);
+               fs_path_reset(path_after);
+
+               ret = get_first_ref(sctx->send_root, ino, &parent_ino_after,
+                                   &parent_gen, path_after);
+               if (ret < 0)
+                       goto out;
+               ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before,
+                                   NULL, path_before);
+               if (ret == -ENOENT) {
+                       ret = 0;
+                       break;
+               } else if (ret < 0) {
+                       goto out;
+               }
+
+               len1 = fs_path_len(path_before);
+               len2 = fs_path_len(path_after);
+               if (parent_ino_before != parent_ino_after || len1 != len2 ||
+                   memcmp(path_before->start, path_after->start, len1)) {
+                       ret = 1;
+                       if (register_upper_dirs) {
+                               break;
+                       } else {
+                               register_upper_dirs = 1;
+                               ino = parent_ref->dir;
+                               gen = parent_ref->dir_gen;
+                               goto again;
+                       }
+               } else if (register_upper_dirs) {
+                       ret = add_pending_dir_move(sctx, ino, gen,
+                                                  parent_ino_after);
+                       if (ret < 0 && ret != -EEXIST)
+                               goto out;
+               }
+
+               ino = parent_ino_after;
+               gen = parent_gen;
+       }
+
 out:
        fs_path_free(path_before);
        fs_path_free(path_after);
@@ -3402,7 +3461,9 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                                        goto out;
                                if (ret) {
                                        ret = add_pending_dir_move(sctx,
-                                                                  cur->dir);
+                                                          sctx->cur_ino,
+                                                          sctx->cur_inode_gen,
+                                                          cur->dir);
                                        *pending_move = 1;
                                } else {
                                        ret = send_rename(sctx, valid_path,