*/
struct list_head delalloc_inodes;
- /*
- * list for tracking inodes that must be sent to disk before a
- * rename or truncate commit
- */
- struct list_head ordered_operations;
-
/* node for the red-black tree that links inodes in subvolume root */
struct rb_node rb_node;
static void free_fs_root(struct btrfs_root *root);
static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
int read_only);
-static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
- struct btrfs_root *root);
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_root *root);
btrfs_cleanup_transaction(root);
}
-static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
- struct btrfs_root *root)
-{
- struct btrfs_inode *btrfs_inode;
- struct list_head splice;
-
- INIT_LIST_HEAD(&splice);
-
- mutex_lock(&root->fs_info->ordered_operations_mutex);
- spin_lock(&root->fs_info->ordered_root_lock);
-
- list_splice_init(&t->ordered_operations, &splice);
- while (!list_empty(&splice)) {
- btrfs_inode = list_entry(splice.next, struct btrfs_inode,
- ordered_operations);
-
- list_del_init(&btrfs_inode->ordered_operations);
- spin_unlock(&root->fs_info->ordered_root_lock);
-
- btrfs_invalidate_inodes(btrfs_inode->root);
-
- spin_lock(&root->fs_info->ordered_root_lock);
- }
-
- spin_unlock(&root->fs_info->ordered_root_lock);
- mutex_unlock(&root->fs_info->ordered_operations_mutex);
-}
-
static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
{
struct btrfs_ordered_extent *ordered;
void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
struct btrfs_root *root)
{
- btrfs_destroy_ordered_operations(cur_trans, root);
-
btrfs_destroy_delayed_refs(cur_trans, root);
cur_trans->state = TRANS_STATE_COMMIT_START;
int btrfs_release_file(struct inode *inode, struct file *filp)
{
- /*
- * ordered_data_close is set by settattr when we are about to truncate
- * a file from a non-zero size to a zero size. This tries to
- * flush down new bytes that may have been written if the
- * application were using truncate to replace a file in place.
- */
- if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
- &BTRFS_I(inode)->runtime_flags)) {
- struct btrfs_trans_handle *trans;
- struct btrfs_root *root = BTRFS_I(inode)->root;
-
- /*
- * We need to block on a committing transaction to keep us from
- * throwing a ordered operation on to the list and causing
- * something like sync to deadlock trying to flush out this
- * inode.
- */
- trans = btrfs_start_transaction(root, 0);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode);
- btrfs_end_transaction(trans, root);
- if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
- filemap_flush(inode->i_mapping);
- }
if (filp->private_data)
btrfs_ioctl_trans_end(filp);
+ filemap_flush(inode->i_mapping);
return 0;
}
min_size);
BUG_ON(ret);
- /*
- * setattr is responsible for setting the ordered_data_close flag,
- * but that is only tested during the last file release. That
- * could happen well after the next commit, leaving a great big
- * window where new writes may get lost if someone chooses to write
- * to this file after truncating to zero
- *
- * The inode doesn't have any dirty data here, and so if we commit
- * this is a noop. If someone immediately starts writing to the inode
- * it is very likely we'll catch some of their writes in this
- * transaction, and the commit will find this file on the ordered
- * data list with good things to send down.
- *
- * This is a best effort solution, there is still a window where
- * using truncate to replace the contents of the file will
- * end up with a zero length file after a crash.
- */
- if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
- &BTRFS_I(inode)->runtime_flags))
- btrfs_add_ordered_operation(trans, root, inode);
-
/*
* So if we truncate and then write and fsync we normally would just
* write the extents that changed, which is a problem if we need to
mutex_init(&ei->delalloc_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);
INIT_LIST_HEAD(&ei->delalloc_inodes);
- INIT_LIST_HEAD(&ei->ordered_operations);
RB_CLEAR_NODE(&ei->rb_node);
return inode;
if (!root)
goto free;
- /*
- * Make sure we're properly removed from the ordered operation
- * lists.
- */
- smp_mb();
- if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
- spin_lock(&root->fs_info->ordered_root_lock);
- list_del_init(&BTRFS_I(inode)->ordered_operations);
- spin_unlock(&root->fs_info->ordered_root_lock);
- }
-
if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags)) {
btrfs_info(root->fs_info, "inode %llu still on the orphan list",
ret = 0;
/*
- * we're using rename to replace one file with another.
- * and the replacement file is large. Start IO on it now so
- * we don't add too much work to the end of the transaction
+ * we're using rename to replace one file with another. Start IO on it
+ * now so we don't add too much work to the end of the transaction
*/
- if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size &&
- old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
+ if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
filemap_flush(old_inode->i_mapping);
/* close the racy window with snapshot create/destroy ioctl */
*/
btrfs_pin_log_trans(root);
}
- /*
- * make sure the inode gets flushed if it is replacing
- * something.
- */
- if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))
- btrfs_add_ordered_operation(trans, root, old_inode);
inode_inc_iversion(old_dir);
inode_inc_iversion(new_dir);
trace_btrfs_ordered_extent_remove(inode, entry);
- /*
- * we have no more ordered extents for this inode and
- * no dirty pages. We can safely remove it from the
- * list of ordered extents
- */
- if (RB_EMPTY_ROOT(&tree->tree) &&
- !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
- spin_lock(&root->fs_info->ordered_root_lock);
- list_del_init(&BTRFS_I(inode)->ordered_operations);
- spin_unlock(&root->fs_info->ordered_root_lock);
- }
-
if (!root->nr_ordered_extents) {
spin_lock(&root->fs_info->ordered_root_lock);
BUG_ON(list_empty(&root->ordered_root));
mutex_unlock(&fs_info->ordered_operations_mutex);
}
-/*
- * this is used during transaction commit to write all the inodes
- * added to the ordered operation list. These files must be fully on
- * disk before the transaction commits.
- *
- * we have two modes here, one is to just start the IO via filemap_flush
- * and the other is to wait for all the io. When we wait, we have an
- * extra check to make sure the ordered operation list really is empty
- * before we return
- */
-int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, int wait)
-{
- struct btrfs_inode *btrfs_inode;
- struct inode *inode;
- struct btrfs_transaction *cur_trans = trans->transaction;
- struct list_head splice;
- struct list_head works;
- struct btrfs_delalloc_work *work, *next;
- int ret = 0;
-
- INIT_LIST_HEAD(&splice);
- INIT_LIST_HEAD(&works);
-
- mutex_lock(&root->fs_info->ordered_extent_flush_mutex);
- spin_lock(&root->fs_info->ordered_root_lock);
- list_splice_init(&cur_trans->ordered_operations, &splice);
- while (!list_empty(&splice)) {
- btrfs_inode = list_entry(splice.next, struct btrfs_inode,
- ordered_operations);
- inode = &btrfs_inode->vfs_inode;
-
- list_del_init(&btrfs_inode->ordered_operations);
-
- /*
- * the inode may be getting freed (in sys_unlink path).
- */
- inode = igrab(inode);
- if (!inode)
- continue;
-
- if (!wait)
- list_add_tail(&BTRFS_I(inode)->ordered_operations,
- &cur_trans->ordered_operations);
- spin_unlock(&root->fs_info->ordered_root_lock);
-
- work = btrfs_alloc_delalloc_work(inode, wait, 1);
- if (!work) {
- spin_lock(&root->fs_info->ordered_root_lock);
- if (list_empty(&BTRFS_I(inode)->ordered_operations))
- list_add_tail(&btrfs_inode->ordered_operations,
- &splice);
- list_splice_tail(&splice,
- &cur_trans->ordered_operations);
- spin_unlock(&root->fs_info->ordered_root_lock);
- ret = -ENOMEM;
- goto out;
- }
- list_add_tail(&work->list, &works);
- btrfs_queue_work(root->fs_info->flush_workers,
- &work->work);
-
- cond_resched();
- spin_lock(&root->fs_info->ordered_root_lock);
- }
- spin_unlock(&root->fs_info->ordered_root_lock);
-out:
- list_for_each_entry_safe(work, next, &works, list) {
- list_del_init(&work->list);
- btrfs_wait_and_free_delalloc_work(work);
- }
- mutex_unlock(&root->fs_info->ordered_extent_flush_mutex);
- return ret;
-}
-
/*
* Used to start IO or wait for a given ordered extent to finish.
*
return index;
}
-
-/*
- * add a given inode to the list of inodes that must be fully on
- * disk before a transaction commit finishes.
- *
- * This basically gives us the ext3 style data=ordered mode, and it is mostly
- * used to make sure renamed files are fully on disk.
- *
- * It is a noop if the inode is already fully on disk.
- *
- * If trans is not null, we'll do a friendly check for a transaction that
- * is already flushing things and force the IO down ourselves.
- */
-void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode)
-{
- struct btrfs_transaction *cur_trans = trans->transaction;
- u64 last_mod;
-
- last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
-
- /*
- * if this file hasn't been changed since the last transaction
- * commit, we can safely return without doing anything
- */
- if (last_mod <= root->fs_info->last_trans_committed)
- return;
-
- spin_lock(&root->fs_info->ordered_root_lock);
- if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
- list_add_tail(&BTRFS_I(inode)->ordered_operations,
- &cur_trans->ordered_operations);
- }
- spin_unlock(&root->fs_info->ordered_root_lock);
-}
-
int __init ordered_data_init(void)
{
btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",
struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
u32 *sum, int len);
-int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, int wait);
-void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *inode);
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr);
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr);
void btrfs_get_logged_extents(struct inode *inode,
spin_lock_init(&cur_trans->delayed_refs.lock);
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
- INIT_LIST_HEAD(&cur_trans->ordered_operations);
INIT_LIST_HEAD(&cur_trans->pending_chunks);
INIT_LIST_HEAD(&cur_trans->switch_commits);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
kmem_cache_free(btrfs_trans_handle_cachep, trans);
}
-static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
-{
- int ret;
-
- ret = btrfs_run_delayed_items(trans, root);
- if (ret)
- return ret;
-
- /*
- * rename don't use btrfs_join_transaction, so, once we
- * set the transaction to blocked above, we aren't going
- * to get any new ordered operations. We can safely run
- * it here and no for sure that nothing new will be added
- * to the list
- */
- ret = btrfs_run_ordered_operations(trans, root, 1);
-
- return ret;
-}
-
static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
{
if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
struct btrfs_transaction *prev_trans = NULL;
int ret;
- ret = btrfs_run_ordered_operations(trans, root, 0);
- if (ret) {
- btrfs_abort_transaction(trans, root, ret);
- btrfs_end_transaction(trans, root);
- return ret;
- }
-
/* Stop the commit early if ->aborted is set */
if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
ret = cur_trans->aborted;
if (ret)
goto cleanup_transaction;
- ret = btrfs_flush_all_pending_stuffs(trans, root);
+ ret = btrfs_run_delayed_items(trans, root);
if (ret)
goto cleanup_transaction;
extwriter_counter_read(cur_trans) == 0);
/* some pending stuffs might be added after the previous flush. */
- ret = btrfs_flush_all_pending_stuffs(trans, root);
+ ret = btrfs_run_delayed_items(trans, root);
if (ret)
goto cleanup_transaction;
wait_queue_head_t writer_wait;
wait_queue_head_t commit_wait;
struct list_head pending_snapshots;
- struct list_head ordered_operations;
struct list_head pending_chunks;
struct list_head switch_commits;
struct btrfs_delayed_ref_root delayed_refs;