Btrfs: improve performance on fsync against new inode after rename/unlink

author Filipe Manana <fdmanana@suse.com>

Mon, 6 Jun 2016 15:11:13 +0000 (16:11 +0100)

committer Filipe Manana <fdmanana@suse.com>

Mon, 1 Aug 2016 06:32:14 +0000 (07:32 +0100)
author Filipe Manana <fdmanana@suse.com>
Mon, 6 Jun 2016 15:11:13 +0000 (16:11 +0100)
committer Filipe Manana <fdmanana@suse.com>
Mon, 1 Aug 2016 06:32:14 +0000 (07:32 +0100)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index f9686541997b2cdfda13feb5a3ce932c23ad6f0a..453b9d0da5f1613ac276112fc45729ab01d0413d 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4203,6 +4203,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
         int err = 0;
         struct btrfs_root *root = BTRFS_I(dir)->root;
         struct btrfs_trans_handle *trans;
+       u64 last_unlink_trans;
  
         if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
                 return -ENOTEMPTY;
@@ -4225,11 +4226,27 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
         if (err)
                 goto out;
  
+       last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
+
         /* now the directory is empty */
         err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
                                  dentry->d_name.name, dentry->d_name.len);
-       if (!err)
+       if (!err) {
                 btrfs_i_size_write(inode, 0);
+               /*
+                * Propagate the last_unlink_trans value of the deleted dir to
+                * its parent directory. This is to prevent an unrecoverable
+                * log tree in the case we do something like this:
+                * 1) create dir foo
+                * 2) create snapshot under dir foo
+                * 3) delete the snapshot
+                * 4) rmdir foo
+                * 5) mkdir foo
+                * 6) fsync foo or some file inside foo
+                */
+               if (last_unlink_trans >= trans->transid)
+                       BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
+       }
  out:
         btrfs_end_transaction(trans, root);
         btrfs_btree_balance_dirty(root);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c

index c05f69a8ec42dad19269620e0ec7e8b38f91ca25..5fa624cd815d54e10436d1c75e92c4808b257065 100644 (file)
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4469,7 +4469,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
  static int btrfs_check_ref_name_override(struct extent_buffer *eb,
                                          const int slot,
                                          const struct btrfs_key *key,
-                                        struct inode *inode)
+                                        struct inode *inode,
+                                        u64 *other_ino)
  {
         int ret;
         struct btrfs_path *search_path;
@@ -4528,7 +4529,16 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
                                            search_path, parent,
                                            name, this_name_len, 0);
                 if (di && !IS_ERR(di)) {
-                       ret = 1;
+                       struct btrfs_key di_key;
+
+                       btrfs_dir_item_key_to_cpu(search_path->nodes[0],
+                                                 di, &di_key);
+                       if (di_key.type == BTRFS_INODE_ITEM_KEY) {
+                               ret = 1;
+                               *other_ino = di_key.objectid;
+                       } else {
+                               ret = -EAGAIN;
+                       }
                         goto out;
                 } else if (IS_ERR(di)) {
                         ret = PTR_ERR(di);
@@ -4718,16 +4728,71 @@ again:
                 if ((min_key.type == BTRFS_INODE_REF_KEY ||
                      min_key.type == BTRFS_INODE_EXTREF_KEY) &&
                     BTRFS_I(inode)->generation == trans->transid) {
+                       u64 other_ino = 0;
+
                         ret = btrfs_check_ref_name_override(path->nodes[0],
                                                             path->slots[0],
-                                                           &min_key, inode);
+                                                           &min_key, inode,
+                                                           &other_ino);
                         if (ret < 0) {
                                 err = ret;
                                 goto out_unlock;
                         } else if (ret > 0) {
-                               err = 1;
-                               btrfs_set_log_full_commit(root->fs_info, trans);
-                               goto out_unlock;
+                               struct btrfs_key inode_key;
+                               struct inode *other_inode;
+
+                               if (ins_nr > 0) {
+                                       ins_nr++;
+                               } else {
+                                       ins_nr = 1;
+                                       ins_start_slot = path->slots[0];
+                               }
+                               ret = copy_items(trans, inode, dst_path, path,
+                                                &last_extent, ins_start_slot,
+                                                ins_nr, inode_only,
+                                                logged_isize);
+                               if (ret < 0) {
+                                       err = ret;
+                                       goto out_unlock;
+                               }
+                               ins_nr = 0;
+                               btrfs_release_path(path);
+                               inode_key.objectid = other_ino;
+                               inode_key.type = BTRFS_INODE_ITEM_KEY;
+                               inode_key.offset = 0;
+                               other_inode = btrfs_iget(root->fs_info->sb,
+                                                        &inode_key, root,
+                                                        NULL);
+                               /*
+                                * If the other inode that had a conflicting dir
+                                * entry was deleted in the current transaction,
+                                * we don't need to do more work nor fallback to
+                                * a transaction commit.
+                                */
+                               if (IS_ERR(other_inode) &&
+                                   PTR_ERR(other_inode) == -ENOENT) {
+                                       goto next_key;
+                               } else if (IS_ERR(other_inode)) {
+                                       err = PTR_ERR(other_inode);
+                                       goto out_unlock;
+                               }
+                               /*
+                                * We are safe logging the other inode without
+                                * acquiring its i_mutex as long as we log with
+                                * the LOG_INODE_EXISTS mode. We're safe against
+                                * concurrent renames of the other inode as well
+                                * because during a rename we pin the log and
+                                * update the log with the new name before we
+                                * unpin it.
+                                */
+                               err = btrfs_log_inode(trans, root, other_inode,
+                                                     LOG_INODE_EXISTS,
+                                                     0, LLONG_MAX, ctx);
+                               iput(other_inode);
+                               if (err)
+                                       goto out_unlock;
+                               else
+                                       goto next_key;
                         }
                 }
  
@@ -4795,7 +4860,7 @@ next_slot:
                         ins_nr = 0;
                 }
                 btrfs_release_path(path);
-
+next_key:
                 if (min_key.offset < (u64)-1) {
                         min_key.offset++;
                 } else if (min_key.type < max_key.type) {
@@ -4989,8 +5054,12 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
                 if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
                         break;
  
-               if (IS_ROOT(parent))
+               if (IS_ROOT(parent)) {
+                       inode = d_inode(parent);
+                       if (btrfs_must_commit_transaction(trans, inode))
+                               ret = 1;
                         break;
+               }
  
                 parent = dget_parent(parent);
                 dput(old_parent);
author	Filipe Manana <fdmanana@suse.com>
	Mon, 6 Jun 2016 15:11:13 +0000 (16:11 +0100)
committer	Filipe Manana <fdmanana@suse.com>
	Mon, 1 Aug 2016 06:32:14 +0000 (07:32 +0100)
fs/btrfs/inode.c		patch \| blob \| history
fs/btrfs/tree-log.c		patch \| blob \| history