This feels more at home in block-group.c than in extent-tree.c.
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>i
[ refresh ]
Signed-off-by: David Sterba <dsterba@suse.com>
#include "sysfs.h"
#include "tree-log.h"
#include "delalloc-space.h"
+#include "math.h"
void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
{
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
}
+
+static void force_metadata_allocation(struct btrfs_fs_info *info)
+{
+ struct list_head *head = &info->space_info;
+ struct btrfs_space_info *found;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(found, head, list) {
+ if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
+ found->force_alloc = CHUNK_ALLOC_FORCE;
+ }
+ rcu_read_unlock();
+}
+
+static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
+ struct btrfs_space_info *sinfo, int force)
+{
+ u64 bytes_used = btrfs_space_info_used(sinfo, false);
+ u64 thresh;
+
+ if (force == CHUNK_ALLOC_FORCE)
+ return 1;
+
+ /*
+ * in limited mode, we want to have some free space up to
+ * about 1% of the FS size.
+ */
+ if (force == CHUNK_ALLOC_LIMITED) {
+ thresh = btrfs_super_total_bytes(fs_info->super_copy);
+ thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1));
+
+ if (sinfo->total_bytes - bytes_used < thresh)
+ return 1;
+ }
+
+ if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8))
+ return 0;
+ return 1;
+}
+
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
+{
+ u64 alloc_flags = btrfs_get_alloc_profile(trans->fs_info, type);
+
+ return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
+}
+
+/*
+ * If force is CHUNK_ALLOC_FORCE:
+ * - return 1 if it successfully allocates a chunk,
+ * - return errors including -ENOSPC otherwise.
+ * If force is NOT CHUNK_ALLOC_FORCE:
+ * - return 0 if it doesn't need to allocate a new chunk,
+ * - return 1 if it successfully allocates a chunk,
+ * - return errors including -ENOSPC otherwise.
+ */
+int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
+ enum btrfs_chunk_alloc_enum force)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_space_info *space_info;
+ bool wait_for_alloc = false;
+ bool should_alloc = false;
+ int ret = 0;
+
+ /* Don't re-enter if we're already allocating a chunk */
+ if (trans->allocating_chunk)
+ return -ENOSPC;
+
+ space_info = btrfs_find_space_info(fs_info, flags);
+ ASSERT(space_info);
+
+ do {
+ spin_lock(&space_info->lock);
+ if (force < space_info->force_alloc)
+ force = space_info->force_alloc;
+ should_alloc = should_alloc_chunk(fs_info, space_info, force);
+ if (space_info->full) {
+ /* No more free physical space */
+ if (should_alloc)
+ ret = -ENOSPC;
+ else
+ ret = 0;
+ spin_unlock(&space_info->lock);
+ return ret;
+ } else if (!should_alloc) {
+ spin_unlock(&space_info->lock);
+ return 0;
+ } else if (space_info->chunk_alloc) {
+ /*
+ * Someone is already allocating, so we need to block
+ * until this someone is finished and then loop to
+ * recheck if we should continue with our allocation
+ * attempt.
+ */
+ wait_for_alloc = true;
+ spin_unlock(&space_info->lock);
+ mutex_lock(&fs_info->chunk_mutex);
+ mutex_unlock(&fs_info->chunk_mutex);
+ } else {
+ /* Proceed with allocation */
+ space_info->chunk_alloc = 1;
+ wait_for_alloc = false;
+ spin_unlock(&space_info->lock);
+ }
+
+ cond_resched();
+ } while (wait_for_alloc);
+
+ mutex_lock(&fs_info->chunk_mutex);
+ trans->allocating_chunk = true;
+
+ /*
+ * If we have mixed data/metadata chunks we want to make sure we keep
+ * allocating mixed chunks instead of individual chunks.
+ */
+ if (btrfs_mixed_space_info(space_info))
+ flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
+
+ /*
+ * if we're doing a data chunk, go ahead and make sure that
+ * we keep a reasonable number of metadata chunks allocated in the
+ * FS as well.
+ */
+ if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
+ fs_info->data_chunk_allocations++;
+ if (!(fs_info->data_chunk_allocations %
+ fs_info->metadata_ratio))
+ force_metadata_allocation(fs_info);
+ }
+
+ /*
+ * Check if we have enough space in SYSTEM chunk because we may need
+ * to update devices.
+ */
+ check_system_chunk(trans, flags);
+
+ ret = btrfs_alloc_chunk(trans, flags);
+ trans->allocating_chunk = false;
+
+ spin_lock(&space_info->lock);
+ if (ret < 0) {
+ if (ret == -ENOSPC)
+ space_info->full = 1;
+ else
+ goto out;
+ } else {
+ ret = 1;
+ space_info->max_extent_size = 0;
+ }
+
+ space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
+out:
+ space_info->chunk_alloc = 0;
+ spin_unlock(&space_info->lock);
+ mutex_unlock(&fs_info->chunk_mutex);
+ /*
+ * When we allocate a new chunk we reserve space in the chunk block
+ * reserve to make sure we can COW nodes/leafs in the chunk tree or
+ * add new nodes/leafs to it if we end up needing to do it when
+ * inserting the chunk item and updating device items as part of the
+ * second phase of chunk allocation, performed by
+ * btrfs_finish_chunk_alloc(). So make sure we don't accumulate a
+ * large number of new block groups to create in our transaction
+ * handle's new_bgs list to avoid exhausting the chunk block reserve
+ * in extreme cases - like having a single transaction create many new
+ * block groups when starting to write out the free space caches of all
+ * the block groups that were made dirty during the lifetime of the
+ * transaction.
+ */
+ if (trans->chunk_bytes_reserved >= (u64)SZ_2M)
+ btrfs_create_pending_block_groups(trans);
+
+ return ret;
+}
+
+static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
+{
+ u64 num_dev;
+
+ num_dev = btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)].devs_max;
+ if (!num_dev)
+ num_dev = fs_info->fs_devices->rw_devices;
+
+ return num_dev;
+}
+
+/*
+ * If @is_allocation is true, reserve space in the system space info necessary
+ * for allocating a chunk, otherwise if it's false, reserve space necessary for
+ * removing a chunk.
+ */
+void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
+{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
+ struct btrfs_space_info *info;
+ u64 left;
+ u64 thresh;
+ int ret = 0;
+ u64 num_devs;
+
+ /*
+ * Needed because we can end up allocating a system chunk and for an
+ * atomic and race free space reservation in the chunk block reserve.
+ */
+ lockdep_assert_held(&fs_info->chunk_mutex);
+
+ info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
+ spin_lock(&info->lock);
+ left = info->total_bytes - btrfs_space_info_used(info, true);
+ spin_unlock(&info->lock);
+
+ num_devs = get_profile_num_devs(fs_info, type);
+
+ /* num_devs device items to update and 1 chunk item to add or remove */
+ thresh = btrfs_calc_trunc_metadata_size(fs_info, num_devs) +
+ btrfs_calc_trans_metadata_size(fs_info, 1);
+
+ if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
+ btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
+ left, thresh, type);
+ btrfs_dump_space_info(fs_info, info, 0, 0);
+ }
+
+ if (left < thresh) {
+ u64 flags = btrfs_system_alloc_profile(fs_info);
+
+ /*
+ * Ignore failure to create system chunk. We might end up not
+ * needing it, as we might not need to COW all nodes/leafs from
+ * the paths we visit in the chunk tree (they were already COWed
+ * or created in the current transaction for example).
+ */
+ ret = btrfs_alloc_chunk(trans, flags);
+ }
+
+ if (!ret) {
+ ret = btrfs_block_rsv_add(fs_info->chunk_root,
+ &fs_info->chunk_block_rsv,
+ thresh, BTRFS_RESERVE_NO_FLUSH);
+ if (!ret)
+ trans->chunk_bytes_reserved += thresh;
+ }
+}
+
BTRFS_DC_SETUP,
};
+/*
+ * Control flags for do_chunk_alloc's force field CHUNK_ALLOC_NO_FORCE means to
+ * only allocate a chunk if we really need one.
+ *
+ * CHUNK_ALLOC_LIMITED means to only try and allocate one if we have very few
+ * chunks already allocated. This is used as part of the clustering code to
+ * help make sure we have a good pool of storage to cluster in, without filling
+ * the FS with empty chunks
+ *
+ * CHUNK_ALLOC_FORCE means it must try to allocate one
+ */
+enum btrfs_chunk_alloc_enum {
+ CHUNK_ALLOC_NO_FORCE,
+ CHUNK_ALLOC_LIMITED,
+ CHUNK_ALLOC_FORCE,
+};
+
struct btrfs_caching_control {
struct list_head list;
struct mutex mutex;
u64 ram_bytes, u64 num_bytes, int delalloc);
void btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
u64 num_bytes, int delalloc);
+int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
+ enum btrfs_chunk_alloc_enum force);
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type);
+void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type);
static inline int btrfs_block_group_cache_done(
struct btrfs_block_group_cache *cache)
COMMIT_TRANS = 9,
};
-/*
- * control flags for do_chunk_alloc's force field
- * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
- * if we really need one.
- *
- * CHUNK_ALLOC_LIMITED means to only try and allocate one
- * if we have very few chunks already allocated. This is
- * used as part of the clustering code to help make sure
- * we have a good pool of storage to cluster in, without
- * filling the FS with empty chunks
- *
- * CHUNK_ALLOC_FORCE means it must try to allocate one
- *
- */
-enum btrfs_chunk_alloc_enum {
- CHUNK_ALLOC_NO_FORCE,
- CHUNK_ALLOC_LIMITED,
- CHUNK_ALLOC_FORCE,
-};
-
-int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
- enum btrfs_chunk_alloc_enum force);
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv,
int nitems, bool use_global_rsv);
u64 start, u64 end);
int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
u64 num_bytes, u64 *actual_bytes);
-int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type);
int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range);
int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
int btrfs_start_write_no_snapshotting(struct btrfs_root *root);
void btrfs_end_write_no_snapshotting(struct btrfs_root *root);
void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
-void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
#include "space-info.h"
#include "transaction.h"
#include "qgroup.h"
+#include "block-group.h"
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
{
return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
}
-static void force_metadata_allocation(struct btrfs_fs_info *info)
-{
- struct list_head *head = &info->space_info;
- struct btrfs_space_info *found;
-
- rcu_read_lock();
- list_for_each_entry_rcu(found, head, list) {
- if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
- found->force_alloc = CHUNK_ALLOC_FORCE;
- }
- rcu_read_unlock();
-}
-
-static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *sinfo, int force)
-{
- u64 bytes_used = btrfs_space_info_used(sinfo, false);
- u64 thresh;
-
- if (force == CHUNK_ALLOC_FORCE)
- return 1;
-
- /*
- * in limited mode, we want to have some free space up to
- * about 1% of the FS size.
- */
- if (force == CHUNK_ALLOC_LIMITED) {
- thresh = btrfs_super_total_bytes(fs_info->super_copy);
- thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1));
-
- if (sinfo->total_bytes - bytes_used < thresh)
- return 1;
- }
-
- if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8))
- return 0;
- return 1;
-}
-
-static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
-{
- u64 num_dev;
-
- num_dev = btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)].devs_max;
- if (!num_dev)
- num_dev = fs_info->fs_devices->rw_devices;
-
- return num_dev;
-}
-
-/*
- * If @is_allocation is true, reserve space in the system space info necessary
- * for allocating a chunk, otherwise if it's false, reserve space necessary for
- * removing a chunk.
- */
-void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
-{
- struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_space_info *info;
- u64 left;
- u64 thresh;
- int ret = 0;
- u64 num_devs;
-
- /*
- * Needed because we can end up allocating a system chunk and for an
- * atomic and race free space reservation in the chunk block reserve.
- */
- lockdep_assert_held(&fs_info->chunk_mutex);
-
- info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
- spin_lock(&info->lock);
- left = info->total_bytes - btrfs_space_info_used(info, true);
- spin_unlock(&info->lock);
-
- num_devs = get_profile_num_devs(fs_info, type);
-
- /* num_devs device items to update and 1 chunk item to add or remove */
- thresh = btrfs_calc_trunc_metadata_size(fs_info, num_devs) +
- btrfs_calc_trans_metadata_size(fs_info, 1);
-
- if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
- btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
- left, thresh, type);
- btrfs_dump_space_info(fs_info, info, 0, 0);
- }
-
- if (left < thresh) {
- u64 flags = btrfs_system_alloc_profile(fs_info);
-
- /*
- * Ignore failure to create system chunk. We might end up not
- * needing it, as we might not need to COW all nodes/leafs from
- * the paths we visit in the chunk tree (they were already COWed
- * or created in the current transaction for example).
- */
- ret = btrfs_alloc_chunk(trans, flags);
- }
-
- if (!ret) {
- ret = btrfs_block_rsv_add(fs_info->chunk_root,
- &fs_info->chunk_block_rsv,
- thresh, BTRFS_RESERVE_NO_FLUSH);
- if (!ret)
- trans->chunk_bytes_reserved += thresh;
- }
-}
-
-/*
- * If force is CHUNK_ALLOC_FORCE:
- * - return 1 if it successfully allocates a chunk,
- * - return errors including -ENOSPC otherwise.
- * If force is NOT CHUNK_ALLOC_FORCE:
- * - return 0 if it doesn't need to allocate a new chunk,
- * - return 1 if it successfully allocates a chunk,
- * - return errors including -ENOSPC otherwise.
- */
-int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
- enum btrfs_chunk_alloc_enum force)
-{
- struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_space_info *space_info;
- bool wait_for_alloc = false;
- bool should_alloc = false;
- int ret = 0;
-
- /* Don't re-enter if we're already allocating a chunk */
- if (trans->allocating_chunk)
- return -ENOSPC;
-
- space_info = btrfs_find_space_info(fs_info, flags);
- ASSERT(space_info);
-
- do {
- spin_lock(&space_info->lock);
- if (force < space_info->force_alloc)
- force = space_info->force_alloc;
- should_alloc = should_alloc_chunk(fs_info, space_info, force);
- if (space_info->full) {
- /* No more free physical space */
- if (should_alloc)
- ret = -ENOSPC;
- else
- ret = 0;
- spin_unlock(&space_info->lock);
- return ret;
- } else if (!should_alloc) {
- spin_unlock(&space_info->lock);
- return 0;
- } else if (space_info->chunk_alloc) {
- /*
- * Someone is already allocating, so we need to block
- * until this someone is finished and then loop to
- * recheck if we should continue with our allocation
- * attempt.
- */
- wait_for_alloc = true;
- spin_unlock(&space_info->lock);
- mutex_lock(&fs_info->chunk_mutex);
- mutex_unlock(&fs_info->chunk_mutex);
- } else {
- /* Proceed with allocation */
- space_info->chunk_alloc = 1;
- wait_for_alloc = false;
- spin_unlock(&space_info->lock);
- }
-
- cond_resched();
- } while (wait_for_alloc);
-
- mutex_lock(&fs_info->chunk_mutex);
- trans->allocating_chunk = true;
-
- /*
- * If we have mixed data/metadata chunks we want to make sure we keep
- * allocating mixed chunks instead of individual chunks.
- */
- if (btrfs_mixed_space_info(space_info))
- flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
-
- /*
- * if we're doing a data chunk, go ahead and make sure that
- * we keep a reasonable number of metadata chunks allocated in the
- * FS as well.
- */
- if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
- fs_info->data_chunk_allocations++;
- if (!(fs_info->data_chunk_allocations %
- fs_info->metadata_ratio))
- force_metadata_allocation(fs_info);
- }
-
- /*
- * Check if we have enough space in SYSTEM chunk because we may need
- * to update devices.
- */
- check_system_chunk(trans, flags);
-
- ret = btrfs_alloc_chunk(trans, flags);
- trans->allocating_chunk = false;
-
- spin_lock(&space_info->lock);
- if (ret < 0) {
- if (ret == -ENOSPC)
- space_info->full = 1;
- else
- goto out;
- } else {
- ret = 1;
- space_info->max_extent_size = 0;
- }
-
- space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
-out:
- space_info->chunk_alloc = 0;
- spin_unlock(&space_info->lock);
- mutex_unlock(&fs_info->chunk_mutex);
- /*
- * When we allocate a new chunk we reserve space in the chunk block
- * reserve to make sure we can COW nodes/leafs in the chunk tree or
- * add new nodes/leafs to it if we end up needing to do it when
- * inserting the chunk item and updating device items as part of the
- * second phase of chunk allocation, performed by
- * btrfs_finish_chunk_alloc(). So make sure we don't accumulate a
- * large number of new block groups to create in our transaction
- * handle's new_bgs list to avoid exhausting the chunk block reserve
- * in extreme cases - like having a single transaction create many new
- * block groups when starting to write out the free space caches of all
- * the block groups that were made dirty during the lifetime of the
- * transaction.
- */
- if (trans->chunk_bytes_reserved >= (u64)SZ_2M)
- btrfs_create_pending_block_groups(trans);
-
- return ret;
-}
-
static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
{
struct btrfs_block_group_cache *cache;
return ret;
}
-int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
-{
- u64 alloc_flags = get_alloc_profile(trans->fs_info, type);
-
- return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
-}
-
/*
* helper to account the unused space of all the readonly block group in the
* space_info. takes mirrors into account.