return sb->s_fs_info;
}
-struct btrfs_subvolume_writers {
- struct percpu_counter counter;
- wait_queue_head_t wait;
-};
-
/*
* The state of btrfs root
*/
* root_item_lock.
*/
int dedupe_in_progress;
- struct btrfs_subvolume_writers *subv_writers;
- atomic_t will_be_snapshotted;
+ /* For exclusion of snapshot creation and nocow writes */
+ struct btrfs_drew_lock snapshot_lock;
+
atomic_t snapshot_force_cow;
/* For qgroup metadata reserved space */
}
}
-static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void)
-{
- struct btrfs_subvolume_writers *writers;
- int ret;
-
- writers = kmalloc(sizeof(*writers), GFP_NOFS);
- if (!writers)
- return ERR_PTR(-ENOMEM);
-
- ret = percpu_counter_init(&writers->counter, 0, GFP_NOFS);
- if (ret < 0) {
- kfree(writers);
- return ERR_PTR(ret);
- }
-
- init_waitqueue_head(&writers->wait);
- return writers;
-}
-
-static void
-btrfs_free_subvolume_writers(struct btrfs_subvolume_writers *writers)
-{
- percpu_counter_destroy(&writers->counter);
- kfree(writers);
-}
-
static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
u64 objectid)
{
atomic_set(&root->log_writers, 0);
atomic_set(&root->log_batch, 0);
refcount_set(&root->refs, 1);
- atomic_set(&root->will_be_snapshotted, 0);
atomic_set(&root->snapshot_force_cow, 0);
atomic_set(&root->nr_swapfiles, 0);
root->log_transid = 0;
static int btrfs_init_fs_root(struct btrfs_root *root)
{
int ret;
- struct btrfs_subvolume_writers *writers;
+ unsigned int nofs_flag;
root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
goto fail;
}
- writers = btrfs_alloc_subvolume_writers();
- if (IS_ERR(writers)) {
- ret = PTR_ERR(writers);
+ /*
+ * We might be called under a transaction (e.g. indirect backref
+ * resolution) which could deadlock if it triggers memory reclaim
+ */
+ nofs_flag = memalloc_nofs_save();
+ ret = btrfs_drew_lock_init(&root->snapshot_lock);
+ memalloc_nofs_restore(nofs_flag);
+ if (ret)
goto fail;
- }
- root->subv_writers = writers;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
set_bit(BTRFS_ROOT_REF_COWS, &root->state);
WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
if (root->anon_dev)
free_anon_bdev(root->anon_dev);
- if (root->subv_writers)
- btrfs_free_subvolume_writers(root->subv_writers);
+ btrfs_drew_lock_destroy(&root->snapshot_lock);
free_extent_buffer(root->node);
free_extent_buffer(root->commit_root);
kfree(root->free_ino_ctl);
return bg_ret;
return dev_ret;
}
-
-/*
- * btrfs_{start,end}_write_no_snapshotting() are similar to
- * mnt_{want,drop}_write(), they are used to prevent some tasks from writing
- * data into the page cache through nocow before the subvolume is snapshoted,
- * but flush the data into disk after the snapshot creation, or to prevent
- * operations while snapshotting is ongoing and that cause the snapshot to be
- * inconsistent (writes followed by expanding truncates for example).
- */
-void btrfs_end_write_no_snapshotting(struct btrfs_root *root)
-{
- percpu_counter_dec(&root->subv_writers->counter);
- cond_wake_up(&root->subv_writers->wait);
-}
-
-int btrfs_start_write_no_snapshotting(struct btrfs_root *root)
-{
- if (atomic_read(&root->will_be_snapshotted))
- return 0;
-
- percpu_counter_inc(&root->subv_writers->counter);
- /*
- * Make sure counter is updated before we check for snapshot creation.
- */
- smp_mb();
- if (atomic_read(&root->will_be_snapshotted)) {
- btrfs_end_write_no_snapshotting(root);
- return 0;
- }
- return 1;
-}
-
-void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
-{
- while (true) {
- int ret;
-
- ret = btrfs_start_write_no_snapshotting(root);
- if (ret)
- break;
- wait_var_event(&root->will_be_snapshotted,
- !atomic_read(&root->will_be_snapshotted));
- }
-}
u64 num_bytes;
int ret;
- ret = btrfs_start_write_no_snapshotting(root);
- if (!ret)
+ if (!btrfs_drew_try_write_lock(&root->snapshot_lock))
return -EAGAIN;
lockstart = round_down(pos, fs_info->sectorsize);
NULL, NULL, NULL);
if (ret <= 0) {
ret = 0;
- btrfs_end_write_no_snapshotting(root);
+ btrfs_drew_write_unlock(&root->snapshot_lock);
} else {
*write_bytes = min_t(size_t, *write_bytes ,
num_bytes - pos + lockstart);
data_reserved, pos,
write_bytes);
else
- btrfs_end_write_no_snapshotting(root);
+ btrfs_drew_write_unlock(&root->snapshot_lock);
break;
}
release_bytes = 0;
if (only_release_metadata)
- btrfs_end_write_no_snapshotting(root);
+ btrfs_drew_write_unlock(&root->snapshot_lock);
if (only_release_metadata && copied > 0) {
lockstart = round_down(pos,
if (release_bytes) {
if (only_release_metadata) {
- btrfs_end_write_no_snapshotting(root);
+ btrfs_drew_write_unlock(&root->snapshot_lock);
btrfs_delalloc_release_metadata(BTRFS_I(inode),
release_bytes, true);
} else {
* truncation, it must capture all writes that happened before
* this truncation.
*/
- btrfs_wait_for_snapshot_creation(root);
+ btrfs_drew_write_lock(&root->snapshot_lock);
ret = btrfs_cont_expand(inode, oldsize, newsize);
if (ret) {
- btrfs_end_write_no_snapshotting(root);
+ btrfs_drew_write_unlock(&root->snapshot_lock);
return ret;
}
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
- btrfs_end_write_no_snapshotting(root);
+ btrfs_drew_write_unlock(&root->snapshot_lock);
return PTR_ERR(trans);
}
btrfs_inode_safe_disk_i_size_write(inode, 0);
pagecache_isize_extended(inode, oldsize, newsize);
ret = btrfs_update_inode(trans, root, inode);
- btrfs_end_write_no_snapshotting(root);
+ btrfs_drew_write_unlock(&root->snapshot_lock);
btrfs_end_transaction(trans);
} else {
* possible. This is to avoid later writeback (running dealloc) to
* fallback to COW mode and unexpectedly fail with ENOSPC.
*/
- atomic_inc(&root->will_be_snapshotted);
- smp_mb__after_atomic();
- /* wait for no snapshot writes */
- wait_event(root->subv_writers->wait,
- percpu_counter_sum(&root->subv_writers->counter) == 0);
+ btrfs_drew_read_lock(&root->snapshot_lock);
ret = btrfs_start_delalloc_snapshot(root);
if (ret)
dec_and_free:
if (snapshot_force_cow)
atomic_dec(&root->snapshot_force_cow);
- if (atomic_dec_and_test(&root->will_be_snapshotted))
- wake_up_var(&root->will_be_snapshotted);
+ btrfs_drew_read_unlock(&root->snapshot_lock);
+
free_pending:
kfree(pending_snapshot->root_item);
btrfs_free_path(pending_snapshot->path);