refcount_t refs;
};
-struct scrub_fixup_nodatasum {
- struct scrub_ctx *sctx;
- struct btrfs_device *dev;
- u64 logical;
- struct btrfs_root *root;
- struct btrfs_work work;
- int mirror_num;
-};
-
struct scrub_warning {
struct btrfs_path *path;
u64 extent_item_size;
static void scrub_pending_bio_inc(struct scrub_ctx *sctx);
static void scrub_pending_bio_dec(struct scrub_ctx *sctx);
-static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx);
-static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx);
static int scrub_handle_errored_block(struct scrub_block *sblock_to_check);
static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
struct scrub_block *sblocks_for_recheck);
return ret;
}
-/*
- * used for workers that require transaction commits (i.e., for the
- * NOCOW case)
- */
-static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
-{
- struct btrfs_fs_info *fs_info = sctx->fs_info;
-
- refcount_inc(&sctx->refs);
- /*
- * increment scrubs_running to prevent cancel requests from
- * completing as long as a worker is running. we must also
- * increment scrubs_paused to prevent deadlocking on pause
- * requests used for transactions commits (as the worker uses a
- * transaction context). it is safe to regard the worker
- * as paused for all matters practical. effectively, we only
- * avoid cancellation requests from completing.
- */
- mutex_lock(&fs_info->scrub_lock);
- atomic_inc(&fs_info->scrubs_running);
- atomic_inc(&fs_info->scrubs_paused);
- mutex_unlock(&fs_info->scrub_lock);
-
- /*
- * check if @scrubs_running=@scrubs_paused condition
- * inside wait_event() is not an atomic operation.
- * which means we may inc/dec @scrub_running/paused
- * at any time. Let's wake up @scrub_pause_wait as
- * much as we can to let commit transaction blocked less.
- */
- wake_up(&fs_info->scrub_pause_wait);
-
- atomic_inc(&sctx->workers_pending);
-}
-
-/* used for workers that require transaction commits */
-static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
-{
- struct btrfs_fs_info *fs_info = sctx->fs_info;
-
- /*
- * see scrub_pending_trans_workers_inc() why we're pretending
- * to be paused in the scrub counters
- */
- mutex_lock(&fs_info->scrub_lock);
- atomic_dec(&fs_info->scrubs_running);
- atomic_dec(&fs_info->scrubs_paused);
- mutex_unlock(&fs_info->scrub_lock);
- atomic_dec(&sctx->workers_pending);
- wake_up(&fs_info->scrub_pause_wait);
- wake_up(&sctx->list_wait);
- scrub_put_ctx(sctx);
-}
-
static void scrub_free_csums(struct scrub_ctx *sctx)
{
while (!list_empty(&sctx->csum_list)) {
btrfs_free_path(path);
}
-static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
-{
- struct page *page = NULL;
- unsigned long index;
- struct scrub_fixup_nodatasum *fixup = fixup_ctx;
- int ret;
- int corrected = 0;
- struct btrfs_key key;
- struct inode *inode = NULL;
- struct btrfs_fs_info *fs_info;
- u64 end = offset + PAGE_SIZE - 1;
- struct btrfs_root *local_root;
- int srcu_index;
-
- key.objectid = root;
- key.type = BTRFS_ROOT_ITEM_KEY;
- key.offset = (u64)-1;
-
- fs_info = fixup->root->fs_info;
- srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
-
- local_root = btrfs_read_fs_root_no_name(fs_info, &key);
- if (IS_ERR(local_root)) {
- srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
- return PTR_ERR(local_root);
- }
-
- key.type = BTRFS_INODE_ITEM_KEY;
- key.objectid = inum;
- key.offset = 0;
- inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
- srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
- if (IS_ERR(inode))
- return PTR_ERR(inode);
-
- index = offset >> PAGE_SHIFT;
-
- page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
- if (!page) {
- ret = -ENOMEM;
- goto out;
- }
-
- if (PageUptodate(page)) {
- if (PageDirty(page)) {
- /*
- * we need to write the data to the defect sector. the
- * data that was in that sector is not in memory,
- * because the page was modified. we must not write the
- * modified page to that sector.
- *
- * TODO: what could be done here: wait for the delalloc
- * runner to write out that page (might involve
- * COW) and see whether the sector is still
- * referenced afterwards.
- *
- * For the meantime, we'll treat this error
- * incorrectable, although there is a chance that a
- * later scrub will find the bad sector again and that
- * there's no dirty page in memory, then.
- */
- ret = -EIO;
- goto out;
- }
- ret = repair_io_failure(fs_info, inum, offset, PAGE_SIZE,
- fixup->logical, page,
- offset - page_offset(page),
- fixup->mirror_num);
- unlock_page(page);
- corrected = !ret;
- } else {
- /*
- * we need to get good data first. the general readpage path
- * will call repair_io_failure for us, we just have to make
- * sure we read the bad mirror.
- */
- ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
- EXTENT_DAMAGED);
- if (ret) {
- /* set_extent_bits should give proper error */
- WARN_ON(ret > 0);
- if (ret > 0)
- ret = -EFAULT;
- goto out;
- }
-
- ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page,
- btrfs_get_extent,
- fixup->mirror_num);
- wait_on_page_locked(page);
-
- corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset,
- end, EXTENT_DAMAGED, 0, NULL);
- if (!corrected)
- clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
- EXTENT_DAMAGED);
- }
-
-out:
- if (page)
- put_page(page);
-
- iput(inode);
-
- if (ret < 0)
- return ret;
-
- if (ret == 0 && corrected) {
- /*
- * we only need to call readpage for one of the inodes belonging
- * to this extent. so make iterate_extent_inodes stop
- */
- return 1;
- }
-
- return -EIO;
-}
-
-static void scrub_fixup_nodatasum(struct btrfs_work *work)
-{
- struct btrfs_fs_info *fs_info;
- int ret;
- struct scrub_fixup_nodatasum *fixup;
- struct scrub_ctx *sctx;
- struct btrfs_trans_handle *trans = NULL;
- struct btrfs_path *path;
- int uncorrectable = 0;
-
- fixup = container_of(work, struct scrub_fixup_nodatasum, work);
- sctx = fixup->sctx;
- fs_info = fixup->root->fs_info;
-
- path = btrfs_alloc_path();
- if (!path) {
- spin_lock(&sctx->stat_lock);
- ++sctx->stat.malloc_errors;
- spin_unlock(&sctx->stat_lock);
- uncorrectable = 1;
- goto out;
- }
-
- trans = btrfs_join_transaction(fixup->root);
- if (IS_ERR(trans)) {
- uncorrectable = 1;
- goto out;
- }
-
- /*
- * the idea is to trigger a regular read through the standard path. we
- * read a page from the (failed) logical address by specifying the
- * corresponding copynum of the failed sector. thus, that readpage is
- * expected to fail.
- * that is the point where on-the-fly error correction will kick in
- * (once it's finished) and rewrite the failed sector if a good copy
- * can be found.
- */
- ret = iterate_inodes_from_logical(fixup->logical, fs_info, path,
- scrub_fixup_readpage, fixup, false);
- if (ret < 0) {
- uncorrectable = 1;
- goto out;
- }
- WARN_ON(ret != 1);
-
- spin_lock(&sctx->stat_lock);
- ++sctx->stat.corrected_errors;
- spin_unlock(&sctx->stat_lock);
-
-out:
- if (trans && !IS_ERR(trans))
- btrfs_end_transaction(trans);
- if (uncorrectable) {
- spin_lock(&sctx->stat_lock);
- ++sctx->stat.uncorrectable_errors;
- spin_unlock(&sctx->stat_lock);
- btrfs_dev_replace_stats_inc(
- &fs_info->dev_replace.num_uncorrectable_read_errors);
- btrfs_err_rl_in_rcu(fs_info,
- "unable to fixup (nodatasum) error at logical %llu on dev %s",
- fixup->logical, rcu_str_deref(fixup->dev->name));
- }
-
- btrfs_free_path(path);
- kfree(fixup);
-
- scrub_pending_trans_workers_dec(sctx);
-}
-
static inline void scrub_get_recover(struct scrub_recover *recover)
{
refcount_inc(&recover->refs);
goto out;
}
- /*
- * NOTE: Even for nodatasum case, it's still possible that it's a
- * compressed data extent, thus scrub_fixup_nodatasum(), which write
- * inode page cache onto disk, could cause serious data corruption.
- *
- * So here we could only read from disk, and hope our recovery could
- * reach disk before the newer write.
- */
- if (0 && !is_metadata && !have_csum) {
- struct scrub_fixup_nodatasum *fixup_nodatasum;
-
- WARN_ON(sctx->is_dev_replace);
-
- /*
- * !is_metadata and !have_csum, this means that the data
- * might not be COWed, that it might be modified
- * concurrently. The general strategy to work on the
- * commit root does not help in the case when COW is not
- * used.
- */
- fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS);
- if (!fixup_nodatasum)
- goto did_not_correct_error;
- fixup_nodatasum->sctx = sctx;
- fixup_nodatasum->dev = dev;
- fixup_nodatasum->logical = logical;
- fixup_nodatasum->root = fs_info->extent_root;
- fixup_nodatasum->mirror_num = failed_mirror_index + 1;
- scrub_pending_trans_workers_inc(sctx);
- btrfs_init_work(&fixup_nodatasum->work, btrfs_scrub_helper,
- scrub_fixup_nodatasum, NULL, NULL);
- btrfs_queue_work(fs_info->scrub_workers,
- &fixup_nodatasum->work);
- goto out;
- }
-
/*
* now build and submit the bios for the other mirrors, check
* checksums.