/* IO errors */
int errors;
+ int mirror_num;
/* for reads, this is the bio we are copying the data into */
struct bio *orig_bio;
+
+ /*
+ * the start of a variable length array of checksums only
+ * used by reads
+ */
+ u32 sums;
};
+static inline int compressed_bio_size(struct btrfs_root *root,
+ unsigned long disk_size)
+{
+ u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
+ return sizeof(struct compressed_bio) +
+ ((disk_size + root->sectorsize - 1) / root->sectorsize) *
+ csum_size;
+}
+
static struct bio *compressed_bio_alloc(struct block_device *bdev,
u64 first_byte, gfp_t gfp_flags)
{
return bio;
}
+static int check_compressed_csum(struct inode *inode,
+ struct compressed_bio *cb,
+ u64 disk_start)
+{
+ int ret;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct page *page;
+ unsigned long i;
+ char *kaddr;
+ u32 csum;
+ u32 *cb_sum = &cb->sums;
+
+ if (btrfs_test_opt(root, NODATASUM) ||
+ btrfs_test_flag(inode, NODATASUM))
+ return 0;
+
+ for (i = 0; i < cb->nr_pages; i++) {
+ page = cb->compressed_pages[i];
+ csum = ~(u32)0;
+
+ kaddr = kmap_atomic(page, KM_USER0);
+ csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE);
+ btrfs_csum_final(csum, (char *)&csum);
+ kunmap_atomic(kaddr, KM_USER0);
+
+ if (csum != *cb_sum) {
+ printk("btrfs csum failed ino %lu extent %llu csum %u "
+ "wanted %u mirror %d\n", inode->i_ino,
+ (unsigned long long)disk_start,
+ csum, *cb_sum, cb->mirror_num);
+ ret = -EIO;
+ goto fail;
+ }
+ cb_sum++;
+
+ }
+ ret = 0;
+fail:
+ return ret;
+}
+
/* when we finish reading compressed pages from the disk, we
* decompress them and then run the bio end_io routines on the
* decompressed pages (in the inode address space).
if (!atomic_dec_and_test(&cb->pending_bios))
goto out;
+ inode = cb->inode;
+ ret = check_compressed_csum(inode, cb, (u64)bio->bi_sector << 9);
+ if (ret)
+ goto csum_failed;
+
/* ok, we're the last bio for this extent, lets start
* the decompression.
*/
- inode = cb->inode;
tree = &BTRFS_I(inode)->io_tree;
ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
cb->start,
cb->orig_bio->bi_io_vec,
cb->orig_bio->bi_vcnt,
cb->compressed_len);
+csum_failed:
if (ret)
cb->errors = 1;
/* do io completion on the original bio */
if (cb->errors) {
bio_io_error(cb->orig_bio);
- } else
+ } else {
+ int bio_index = 0;
+ struct bio_vec *bvec = cb->orig_bio->bi_io_vec;
+
+ /*
+ * we have verified the checksum already, set page
+ * checked so the end_io handlers know about it
+ */
+ while(bio_index < cb->orig_bio->bi_vcnt) {
+ SetPageChecked(bvec->bv_page);
+ bvec++;
+ bio_index++;
+ }
bio_endio(cb->orig_bio, 0);
+ }
/* finally free the cb struct */
kfree(cb->compressed_pages);
int ret;
WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
- cb = kmalloc(sizeof(*cb), GFP_NOFS);
+ cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
atomic_set(&cb->pending_bios, 0);
cb->errors = 0;
cb->inode = inode;
cb->start = start;
cb->len = len;
+ cb->mirror_num = 0;
cb->compressed_pages = compressed_pages;
cb->compressed_len = compressed_len;
cb->orig_bio = NULL;
bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
- ret = btrfs_csum_file_bytes(root, inode, start, len);
- BUG_ON(ret);
-
bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
BUG_ON(ret);
+ ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
+ BUG_ON(ret);
+
ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
BUG_ON(ret);
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
BUG_ON(ret);
+ ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
+ BUG_ON(ret);
+
ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
BUG_ON(ret);
u64 em_start;
struct extent_map *em;
int ret;
+ u32 *sums;
tree = &BTRFS_I(inode)->io_tree;
em_tree = &BTRFS_I(inode)->extent_tree;
PAGE_CACHE_SIZE);
spin_unlock(&em_tree->lock);
- cb = kmalloc(sizeof(*cb), GFP_NOFS);
+ compressed_len = em->block_len;
+ cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
atomic_set(&cb->pending_bios, 0);
cb->errors = 0;
cb->inode = inode;
+ cb->mirror_num = mirror_num;
+ sums = &cb->sums;
cb->start = em->orig_start;
- compressed_len = em->block_len;
em_len = em->len;
em_start = em->start;
+
free_extent_map(em);
em = NULL;
add_ra_bio_pages(inode, em_start + em_len, cb);
- if (!btrfs_test_opt(root, NODATASUM) &&
- !btrfs_test_flag(inode, NODATASUM)) {
- btrfs_lookup_bio_sums(root, inode, cb->orig_bio);
- }
-
/* include any pages we added in add_ra-bio_pages */
uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
cb->len = uncompressed_len;
for (page_index = 0; page_index < nr_pages; page_index++) {
page = cb->compressed_pages[page_index];
page->mapping = inode->i_mapping;
+ page->index = em_start >> PAGE_CACHE_SHIFT;
+
if (comp_bio->bi_size)
ret = tree->ops->merge_bio_hook(page, 0,
PAGE_CACHE_SIZE,
*/
atomic_inc(&cb->pending_bios);
- ret = btrfs_map_bio(root, READ, comp_bio, 0, 0);
+ if (!btrfs_test_opt(root, NODATASUM) &&
+ !btrfs_test_flag(inode, NODATASUM)) {
+ btrfs_lookup_bio_sums(root, inode, comp_bio,
+ sums);
+ }
+ sums += (comp_bio->bi_size + root->sectorsize - 1) /
+ root->sectorsize;
+
+ ret = btrfs_map_bio(root, READ, comp_bio,
+ mirror_num, 0);
BUG_ON(ret);
bio_put(comp_bio);
ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
BUG_ON(ret);
- ret = btrfs_map_bio(root, READ, comp_bio, 0, 0);
+ if (!btrfs_test_opt(root, NODATASUM) &&
+ !btrfs_test_flag(inode, NODATASUM)) {
+ btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
+ }
+
+ ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
BUG_ON(ret);
bio_put(comp_bio);
/* directory objectid inside the root tree */
#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
+/* holds checksums of all the data extents */
+#define BTRFS_CSUM_TREE_OBJECTID 7ULL
+
/* orhpan objectid for tracking unlinked/truncated files */
#define BTRFS_ORPHAN_OBJECTID -5ULL
#define BTRFS_TREE_RELOC_OBJECTID -8ULL
#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL
+/*
+ * extent checksums all have this objectid
+ * this allows them to share the logging tree
+ * for fsyncs
+ */
+#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
+
/* dummy objectid represents multiple objectids */
#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
struct btrfs_root *chunk_root;
struct btrfs_root *dev_root;
struct btrfs_root *fs_root;
+ struct btrfs_root *csum_root;
/* the log root tree is a directory of all the other log roots */
struct btrfs_root *log_root_tree;
struct btrfs_workers workers;
struct btrfs_workers delalloc_workers;
struct btrfs_workers endio_workers;
+ struct btrfs_workers endio_meta_workers;
struct btrfs_workers endio_write_workers;
struct btrfs_workers submit_workers;
/*
* extent data is for file data
*/
#define BTRFS_EXTENT_DATA_KEY 108
+
/*
- * csum items have the checksums for data in the extents
+ * extent csums are stored in a separate tree and hold csums for
+ * an entire extent on disk.
*/
-#define BTRFS_CSUM_ITEM_KEY 120
-
-
-/* reserve 21-31 for other file/dir stuff */
+#define BTRFS_EXTENT_CSUM_KEY 128
/*
* root items point to tree roots. There are typically in the root
/* file-item.c */
int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
- struct bio *bio);
+ struct bio *bio, u32 *dst);
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 objectid, u64 pos,
struct btrfs_path *path, u64 objectid,
u64 bytenr, int mod);
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
+ struct btrfs_root *root,
struct btrfs_ordered_sum *sums);
int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
- struct bio *bio);
+ struct bio *bio, u64 file_start, int contig);
int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode,
u64 start, unsigned long len);
struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- u64 objectid, u64 offset,
- int cow);
+ u64 bytenr, int cow);
int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_path *path,
u64 isize);
end_io_wq->error = err;
end_io_wq->work.func = end_workqueue_fn;
end_io_wq->work.flags = 0;
- if (bio->bi_rw & (1 << BIO_RW))
+
+ if (bio->bi_rw & (1 << BIO_RW)) {
btrfs_queue_worker(&fs_info->endio_write_workers,
&end_io_wq->work);
- else
- btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work);
+ } else {
+ if (end_io_wq->metadata)
+ btrfs_queue_worker(&fs_info->endio_meta_workers,
+ &end_io_wq->work);
+ else
+ btrfs_queue_worker(&fs_info->endio_workers,
+ &end_io_wq->work);
+ }
}
int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
info = (struct btrfs_fs_info *)bdi->unplug_io_data;
list_for_each(cur, &info->fs_devices->devices) {
device = list_entry(cur, struct btrfs_device, dev_list);
+ if (!device->bdev)
+ continue;
+
bdi = blk_get_backing_dev_info(device->bdev);
if (bdi->unplug_io_fn) {
bdi->unplug_io_fn(bdi, page);
* blocksize <= pagesize, it is basically a noop
*/
if (end_io_wq->metadata && !bio_ready_for_csum(bio)) {
- btrfs_queue_worker(&fs_info->endio_workers,
+ btrfs_queue_worker(&fs_info->endio_meta_workers,
&end_io_wq->work);
return;
}
struct buffer_head *bh;
struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root),
GFP_NOFS);
+ struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
+ GFP_NOFS);
struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root),
GFP_NOFS);
struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info),
struct btrfs_super_block *disk_super;
if (!extent_root || !tree_root || !fs_info ||
- !chunk_root || !dev_root) {
+ !chunk_root || !dev_root || !csum_root) {
err = -ENOMEM;
goto fail;
}
init_completion(&fs_info->kobj_unregister);
fs_info->tree_root = tree_root;
fs_info->extent_root = extent_root;
+ fs_info->csum_root = csum_root;
fs_info->chunk_root = chunk_root;
fs_info->dev_root = dev_root;
fs_info->fs_devices = fs_devices;
btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1);
btrfs_init_workers(&fs_info->endio_workers, "endio",
fs_info->thread_pool_size);
+ btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta",
+ fs_info->thread_pool_size);
btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
fs_info->thread_pool_size);
btrfs_start_workers(&fs_info->delalloc_workers, 1);
btrfs_start_workers(&fs_info->fixup_workers, 1);
btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
+ btrfs_start_workers(&fs_info->endio_meta_workers,
+ fs_info->thread_pool_size);
btrfs_start_workers(&fs_info->endio_write_workers,
fs_info->thread_pool_size);
if (ret)
goto fail_extent_root;
+ ret = find_and_setup_root(tree_root, fs_info,
+ BTRFS_CSUM_TREE_OBJECTID, csum_root);
+ if (ret)
+ goto fail_extent_root;
+
+ csum_root->track_dirty = 1;
+
btrfs_read_block_groups(extent_root);
fs_info->generation = generation + 1;
fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
"btrfs-cleaner");
if (!fs_info->cleaner_kthread)
- goto fail_extent_root;
+ goto fail_csum_root;
fs_info->transaction_kthread = kthread_run(transaction_kthread,
tree_root,
filemap_write_and_wait(fs_info->btree_inode->i_mapping);
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
+fail_csum_root:
+ free_extent_buffer(csum_root->node);
fail_extent_root:
free_extent_buffer(extent_root->node);
fail_tree_root:
btrfs_stop_workers(&fs_info->delalloc_workers);
btrfs_stop_workers(&fs_info->workers);
btrfs_stop_workers(&fs_info->endio_workers);
+ btrfs_stop_workers(&fs_info->endio_meta_workers);
btrfs_stop_workers(&fs_info->endio_write_workers);
btrfs_stop_workers(&fs_info->submit_workers);
fail_iput:
kfree(fs_info);
kfree(chunk_root);
kfree(dev_root);
+ kfree(csum_root);
return ERR_PTR(err);
}
if (root->fs_info->dev_root->node);
free_extent_buffer(root->fs_info->dev_root->node);
+ if (root->fs_info->csum_root->node);
+ free_extent_buffer(root->fs_info->csum_root->node);
+
btrfs_free_block_groups(root->fs_info);
del_fs_roots(fs_info);
btrfs_stop_workers(&fs_info->delalloc_workers);
btrfs_stop_workers(&fs_info->workers);
btrfs_stop_workers(&fs_info->endio_workers);
+ btrfs_stop_workers(&fs_info->endio_meta_workers);
btrfs_stop_workers(&fs_info->endio_write_workers);
btrfs_stop_workers(&fs_info->submit_workers);
kfree(fs_info->tree_root);
kfree(fs_info->chunk_root);
kfree(fs_info->dev_root);
+ kfree(fs_info->csum_root);
return 0;
}
int whole_page;
int ret;
+ if (err)
+ uptodate = 0;
+
do {
struct page *page = bvec->bv_page;
tree = &BTRFS_I(page->mapping->host)->io_tree;
if (ret == 0) {
uptodate =
test_bit(BIO_UPTODATE, &bio->bi_flags);
+ if (err)
+ uptodate = 0;
continue;
}
}
struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- u64 objectid, u64 offset,
- int cow)
+ u64 bytenr, int cow)
{
int ret;
struct btrfs_key file_key;
btrfs_super_csum_size(&root->fs_info->super_copy);
int csums_in_item;
- file_key.objectid = objectid;
- file_key.offset = offset;
- btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
+ file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
+ file_key.offset = bytenr;
+ btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY);
ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow);
if (ret < 0)
goto fail;
goto fail;
path->slots[0]--;
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY ||
- found_key.objectid != objectid) {
+ if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY)
goto fail;
- }
- csum_offset = (offset - found_key.offset) >>
+
+ csum_offset = (bytenr - found_key.offset) >>
root->fs_info->sb->s_blocksize_bits;
csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
csums_in_item /= csum_size;
}
int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
- struct bio *bio)
+ struct bio *bio, u32 *dst)
{
u32 sum;
struct bio_vec *bvec = bio->bi_io_vec;
u64 offset;
u64 item_start_offset = 0;
u64 item_last_offset = 0;
+ u64 disk_bytenr;
u32 diff;
u16 csum_size =
btrfs_super_csum_size(&root->fs_info->super_copy);
WARN_ON(bio->bi_vcnt <= 0);
+ disk_bytenr = (u64)bio->bi_sector << 9;
while(bio_index < bio->bi_vcnt) {
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
- ret = btrfs_find_ordered_sum(inode, offset, &sum);
+ ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum);
if (ret == 0)
goto found;
- if (!item || offset < item_start_offset ||
- offset >= item_last_offset) {
+ if (!item || disk_bytenr < item_start_offset ||
+ disk_bytenr >= item_last_offset) {
struct btrfs_key found_key;
u32 item_size;
if (item)
btrfs_release_path(root, path);
- item = btrfs_lookup_csum(NULL, root, path,
- inode->i_ino, offset, 0);
+ item = btrfs_lookup_csum(NULL, root->fs_info->csum_root,
+ path, disk_bytenr, 0);
if (IS_ERR(item)) {
ret = PTR_ERR(item);
if (ret == -ENOENT || ret == -EFBIG)
* this byte range must be able to fit inside
* a single leaf so it will also fit inside a u32
*/
- diff = offset - item_start_offset;
+ diff = disk_bytenr - item_start_offset;
diff = diff / root->sectorsize;
diff = diff * csum_size;
((unsigned long)item) + diff,
csum_size);
found:
- set_state_private(io_tree, offset, sum);
+ if (dst)
+ *dst++ = sum;
+ else
+ set_state_private(io_tree, offset, sum);
+ disk_bytenr += bvec->bv_len;
bio_index++;
bvec++;
}
return 0;
}
-int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode,
- u64 start, unsigned long len)
-{
- struct btrfs_ordered_sum *sums;
- struct btrfs_sector_sum *sector_sum;
- struct btrfs_ordered_extent *ordered;
- char *data;
- struct page *page;
- unsigned long total_bytes = 0;
- unsigned long this_sum_bytes = 0;
-
- sums = kzalloc(btrfs_ordered_sum_size(root, len), GFP_NOFS);
- if (!sums)
- return -ENOMEM;
-
- sector_sum = sums->sums;
- sums->file_offset = start;
- sums->len = len;
- INIT_LIST_HEAD(&sums->list);
- ordered = btrfs_lookup_ordered_extent(inode, sums->file_offset);
- BUG_ON(!ordered);
-
- while(len > 0) {
- if (start >= ordered->file_offset + ordered->len ||
- start < ordered->file_offset) {
- sums->len = this_sum_bytes;
- this_sum_bytes = 0;
- btrfs_add_ordered_sum(inode, ordered, sums);
- btrfs_put_ordered_extent(ordered);
-
- sums = kzalloc(btrfs_ordered_sum_size(root, len),
- GFP_NOFS);
- BUG_ON(!sums);
- sector_sum = sums->sums;
- sums->len = len;
- sums->file_offset = start;
- ordered = btrfs_lookup_ordered_extent(inode,
- sums->file_offset);
- BUG_ON(!ordered);
- }
-
- page = find_get_page(inode->i_mapping,
- start >> PAGE_CACHE_SHIFT);
-
- data = kmap_atomic(page, KM_USER0);
- sector_sum->sum = ~(u32)0;
- sector_sum->sum = btrfs_csum_data(root, data, sector_sum->sum,
- PAGE_CACHE_SIZE);
- kunmap_atomic(data, KM_USER0);
- btrfs_csum_final(sector_sum->sum,
- (char *)§or_sum->sum);
- sector_sum->offset = page_offset(page);
- page_cache_release(page);
-
- sector_sum++;
- total_bytes += PAGE_CACHE_SIZE;
- this_sum_bytes += PAGE_CACHE_SIZE;
- start += PAGE_CACHE_SIZE;
-
- WARN_ON(len < PAGE_CACHE_SIZE);
- len -= PAGE_CACHE_SIZE;
- }
- btrfs_add_ordered_sum(inode, ordered, sums);
- btrfs_put_ordered_extent(ordered);
- return 0;
-}
-
int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
- struct bio *bio)
+ struct bio *bio, u64 file_start, int contig)
{
struct btrfs_ordered_sum *sums;
struct btrfs_sector_sum *sector_sum;
unsigned long total_bytes = 0;
unsigned long this_sum_bytes = 0;
u64 offset;
+ u64 disk_bytenr;
WARN_ON(bio->bi_vcnt <= 0);
sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS);
return -ENOMEM;
sector_sum = sums->sums;
- sums->file_offset = page_offset(bvec->bv_page) + bvec->bv_offset;
+ disk_bytenr = (u64)bio->bi_sector << 9;
sums->len = bio->bi_size;
INIT_LIST_HEAD(&sums->list);
- ordered = btrfs_lookup_ordered_extent(inode, sums->file_offset);
+
+ if (contig)
+ offset = file_start;
+ else
+ offset = page_offset(bvec->bv_page) + bvec->bv_offset;
+
+ ordered = btrfs_lookup_ordered_extent(inode, offset);
BUG_ON(!ordered);
+ sums->bytenr = ordered->start;
while(bio_index < bio->bi_vcnt) {
- offset = page_offset(bvec->bv_page) + bvec->bv_offset;
- if (offset >= ordered->file_offset + ordered->len ||
- offset < ordered->file_offset) {
+ if (!contig)
+ offset = page_offset(bvec->bv_page) + bvec->bv_offset;
+
+ if (!contig && (offset >= ordered->file_offset + ordered->len ||
+ offset < ordered->file_offset)) {
unsigned long bytes_left;
sums->len = this_sum_bytes;
this_sum_bytes = 0;
BUG_ON(!sums);
sector_sum = sums->sums;
sums->len = bytes_left;
- sums->file_offset = offset;
- ordered = btrfs_lookup_ordered_extent(inode,
- sums->file_offset);
+ ordered = btrfs_lookup_ordered_extent(inode, offset);
BUG_ON(!ordered);
+ sums->bytenr = ordered->start;
}
data = kmap_atomic(bvec->bv_page, KM_USER0);
kunmap_atomic(data, KM_USER0);
btrfs_csum_final(sector_sum->sum,
(char *)§or_sum->sum);
- sector_sum->offset = page_offset(bvec->bv_page) +
- bvec->bv_offset;
+ sector_sum->bytenr = disk_bytenr;
sector_sum++;
bio_index++;
total_bytes += bvec->bv_len;
this_sum_bytes += bvec->bv_len;
+ disk_bytenr += bvec->bv_len;
+ offset += bvec->bv_len;
bvec++;
}
this_sum_bytes = 0;
}
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
+ struct btrfs_root *root,
struct btrfs_ordered_sum *sums)
{
- u64 objectid = inode->i_ino;
- u64 offset;
+ u64 bytenr;
int ret;
struct btrfs_key file_key;
struct btrfs_key found_key;
again:
next_offset = (u64)-1;
found_next = 0;
- offset = sector_sum->offset;
- file_key.objectid = objectid;
- file_key.offset = offset;
- btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY);
+ file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
+ file_key.offset = sector_sum->bytenr;
+ bytenr = sector_sum->bytenr;
+ btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY);
- mutex_lock(&BTRFS_I(inode)->csum_mutex);
- item = btrfs_lookup_csum(trans, root, path, objectid, offset, 1);
+ item = btrfs_lookup_csum(trans, root, path, sector_sum->bytenr, 1);
if (!IS_ERR(item)) {
leaf = path->nodes[0];
ret = 0;
slot = 0;
}
btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
- if (found_key.objectid != objectid ||
- found_key.type != BTRFS_CSUM_ITEM_KEY) {
+ if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
+ found_key.type != BTRFS_EXTENT_CSUM_KEY) {
found_next = 1;
goto insert;
}
path->slots[0]--;
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- csum_offset = (offset - found_key.offset) >>
+ csum_offset = (bytenr - found_key.offset) >>
root->fs_info->sb->s_blocksize_bits;
- if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY ||
- found_key.objectid != objectid ||
+ if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY ||
+ found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
csum_offset >= MAX_CSUM_ITEMS(root, csum_size)) {
goto insert;
}
btrfs_release_path(root, path);
csum_offset = 0;
if (found_next) {
- u64 tmp = min((u64)i_size_read(inode), next_offset);
- tmp -= offset & ~((u64)root->sectorsize -1);
+ u64 tmp = total_bytes + root->sectorsize;
+ u64 next_sector = sector_sum->bytenr;
+ struct btrfs_sector_sum *next = sector_sum + 1;
+
+ while(tmp < sums->len) {
+ if (next_sector + root->sectorsize != next->bytenr)
+ break;
+ tmp += root->sectorsize;
+ next_sector = next->bytenr;
+ next++;
+ }
+ tmp = min(tmp, next_offset - file_key.offset);
tmp >>= root->fs_info->sb->s_blocksize_bits;
tmp = max((u64)1, tmp);
tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root, csum_size));
item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
btrfs_item_size_nr(leaf, path->slots[0]));
eb_token = NULL;
- mutex_unlock(&BTRFS_I(inode)->csum_mutex);
cond_resched();
next_sector:
if (total_bytes < sums->len) {
item = (struct btrfs_csum_item *)((char *)item +
csum_size);
- if (item < item_end && offset + PAGE_CACHE_SIZE ==
- sector_sum->offset) {
- offset = sector_sum->offset;
+ if (item < item_end && bytenr + PAGE_CACHE_SIZE ==
+ sector_sum->bytenr) {
+ bytenr = sector_sum->bytenr;
goto next_sector;
}
}
return ret;
fail_unlock:
- mutex_unlock(&BTRFS_I(inode)->csum_mutex);
goto out;
}
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
- ret = btrfs_csum_one_bio(root, inode, bio);
+ ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
BUG_ON(ret);
return 0;
}
btrfs_test_flag(inode, NODATASUM);
if (!(rw & (1 << BIO_RW))) {
-
- if (bio_flags & EXTENT_BIO_COMPRESSED)
+ if (bio_flags & EXTENT_BIO_COMPRESSED) {
return btrfs_submit_compressed_read(inode, bio,
mirror_num, bio_flags);
- else if (!skip_sum)
- btrfs_lookup_bio_sums(root, inode, bio);
+ } else if (!skip_sum)
+ btrfs_lookup_bio_sums(root, inode, bio, NULL);
goto mapit;
} else if (!skip_sum) {
/* we're doing a write, do the async checksumming */
btrfs_set_trans_block_group(trans, inode);
list_for_each(cur, list) {
sum = list_entry(cur, struct btrfs_ordered_sum, list);
- btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root,
- inode, sum);
+ btrfs_csum_file_blocks(trans,
+ BTRFS_I(inode)->root->fs_info->csum_root, sum);
}
return 0;
}
u64 start;
u64 len;
u64 logical;
+ unsigned long bio_flags;
int last_mirror;
};
int ret;
int rw;
u64 logical;
- unsigned long bio_flags = 0;
ret = get_state_private(failure_tree, start, &private);
if (ret) {
failrec->start = start;
failrec->len = end - start + 1;
failrec->last_mirror = 0;
+ failrec->bio_flags = 0;
spin_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, failrec->len);
}
logical = start - em->start;
logical = em->block_start + logical;
- if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
- bio_flags = EXTENT_BIO_COMPRESSED;
+ if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
+ logical = em->block_start;
+ failrec->bio_flags = EXTENT_BIO_COMPRESSED;
+ }
failrec->logical = logical;
free_extent_map(em);
set_extent_bits(failure_tree, start, end, EXTENT_LOCKED |
bio->bi_sector = failrec->logical >> 9;
bio->bi_bdev = failed_bio->bi_bdev;
bio->bi_size = 0;
+
bio_add_page(bio, page, failrec->len, start - page_offset(page));
if (failed_bio->bi_rw & (1 << BIO_RW))
rw = WRITE;
BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
failrec->last_mirror,
- bio_flags);
+ failrec->bio_flags);
return 0;
}
u32 csum = ~(u32)0;
unsigned long flags;
+ if (PageChecked(page)) {
+ ClearPageChecked(page);
+ goto good;
+ }
if (btrfs_test_opt(root, NODATASUM) ||
btrfs_test_flag(inode, NODATASUM))
return 0;
+
if (state && state->start == start) {
private = state->private;
ret = 0;
}
kunmap_atomic(kaddr, KM_IRQ0);
local_irq_restore(flags);
-
+good:
/* if the io failure tree for this inode is non-empty,
* check to see if we've recovered from a failed IO
*/
return err;
}
+#if 0
/*
* when truncating bytes in a file, it is possible to avoid reading
* the leaves that contain only checksum items. This can be the
return ret;
}
+#endif
+
/*
* this can truncate away extent items, csum items and directory items.
* It starts at a high offset and removes keys until it can't find
btrfs_init_path(path);
- ret = drop_csum_leaves(trans, root, path, inode, new_size);
- BUG_ON(ret);
-
search_again:
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret < 0) {
}
item_end--;
}
- if (found_type == BTRFS_CSUM_ITEM_KEY) {
- ret = btrfs_csum_truncate(trans, root, path,
- new_size);
- BUG_ON(ret);
- }
if (item_end < new_size) {
if (found_type == BTRFS_DIR_ITEM_KEY) {
found_type = BTRFS_INODE_ITEM_KEY;
} else if (found_type == BTRFS_EXTENT_ITEM_KEY) {
- found_type = BTRFS_CSUM_ITEM_KEY;
+ found_type = BTRFS_EXTENT_DATA_KEY;
} else if (found_type == BTRFS_EXTENT_DATA_KEY) {
found_type = BTRFS_XATTR_ITEM_KEY;
} else if (found_type == BTRFS_XATTR_ITEM_KEY) {
u64 len = olen;
u64 bs = root->fs_info->sb->s_blocksize;
u64 hint_byte;
- u16 csum_size =
- btrfs_super_csum_size(&root->fs_info->super_copy);
+
/*
* TODO:
* - split compressed inline extents. annoying: we need to
slot = path->slots[0];
btrfs_item_key_to_cpu(leaf, &key, slot);
- if (btrfs_key_type(&key) > BTRFS_CSUM_ITEM_KEY ||
+ if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
key.objectid != src->i_ino)
break;
btrfs_mark_buffer_dirty(leaf);
}
- if (btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) {
- u32 size;
- struct btrfs_key new_key;
- u64 coverslen;
- int coff, clen;
-
- size = btrfs_item_size_nr(leaf, slot);
- coverslen = (size / csum_size) <<
- root->fs_info->sb->s_blocksize_bits;
- printk("csums for %llu~%llu\n",
- key.offset, coverslen);
- if (key.offset + coverslen < off ||
- key.offset >= off+len)
- goto next;
-
- read_extent_buffer(leaf, buf,
- btrfs_item_ptr_offset(leaf, slot),
- size);
- btrfs_release_path(root, path);
-
- coff = 0;
- if (off > key.offset)
- coff = ((off - key.offset) >>
- root->fs_info->sb->s_blocksize_bits) *
- csum_size;
- clen = size - coff;
- if (key.offset + coverslen > off+len)
- clen -= ((key.offset+coverslen-off-len) >>
- root->fs_info->sb->s_blocksize_bits) *
- csum_size;
- printk(" will dup %d~%d of %d\n",
- coff, clen, size);
-
- memcpy(&new_key, &key, sizeof(new_key));
- new_key.objectid = inode->i_ino;
- new_key.offset = key.offset + destoff - off;
-
- ret = btrfs_insert_empty_item(trans, root, path,
- &new_key, clen);
- if (ret)
- goto out;
-
- leaf = path->nodes[0];
- slot = path->slots[0];
- write_extent_buffer(leaf, buf + coff,
- btrfs_item_ptr_offset(leaf, slot),
- clen);
- btrfs_mark_buffer_dirty(leaf);
- }
-
next:
btrfs_release_path(root, path);
key.offset++;
* try to find a checksum. This is used because we allow pages to
* be reclaimed before their checksum is actually put into the btree
*/
-int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum)
+int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
+ u32 *sum)
{
struct btrfs_ordered_sum *ordered_sum;
struct btrfs_sector_sum *sector_sums;
mutex_lock(&tree->mutex);
list_for_each_prev(cur, &ordered->list) {
ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list);
- if (offset >= ordered_sum->file_offset) {
+ if (disk_bytenr >= ordered_sum->bytenr) {
num_sectors = ordered_sum->len / sectorsize;
sector_sums = ordered_sum->sums;
for (i = 0; i < num_sectors; i++) {
- if (sector_sums[i].offset == offset) {
+ if (sector_sums[i].bytenr == disk_bytenr) {
*sum = sector_sums[i].sum;
ret = 0;
goto out;
* the ordered extent are on disk
*/
struct btrfs_sector_sum {
- u64 offset;
+ /* bytenr on disk */
+ u64 bytenr;
u32 sum;
};
struct btrfs_ordered_sum {
- u64 file_offset;
+ /* bytenr is the start of this extent on disk */
+ u64 bytenr;
+
/*
* this is the length in bytes covered by the sums array below.
- * But, the sums array may not be contiguous in the file.
*/
unsigned long len;
struct list_head list;
btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
int btrfs_ordered_update_i_size(struct inode *inode,
struct btrfs_ordered_extent *ordered);
-int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum);
+int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
int btrfs_wait_on_page_writeback_range(struct address_space *mapping,
pgoff_t start, pgoff_t end);
int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
unsigned long file_bytes;
struct btrfs_ordered_sum *sums;
struct btrfs_sector_sum *sector_sum;
- struct inode *inode;
unsigned long ptr;
file_bytes = (item_size / csum_size) * root->sectorsize;
- inode = read_one_inode(root, key->objectid);
- if (!inode) {
- return -EIO;
- }
-
sums = kzalloc(btrfs_ordered_sum_size(root, file_bytes), GFP_NOFS);
if (!sums) {
- iput(inode);
return -ENOMEM;
}
INIT_LIST_HEAD(&sums->list);
sums->len = file_bytes;
- sums->file_offset = key->offset;
+ sums->bytenr = key->offset;
/*
* copy all the sums into the ordered sum struct
cur_offset = key->offset;
ptr = btrfs_item_ptr_offset(eb, slot);
while(item_size > 0) {
- sector_sum->offset = cur_offset;
+ sector_sum->bytenr = cur_offset;
read_extent_buffer(eb, §or_sum->sum, ptr, csum_size);
sector_sum++;
item_size -= csum_size;
}
/* let btrfs_csum_file_blocks add them into the file */
- ret = btrfs_csum_file_blocks(trans, root, inode, sums);
+ ret = btrfs_csum_file_blocks(trans, root->fs_info->csum_root, sums);
BUG_ON(ret);
kfree(sums);
- iput(inode);
-
return 0;
}
/*
ret = replay_one_extent(wc->trans, root, path,
eb, i, &key);
BUG_ON(ret);
- } else if (key.type == BTRFS_CSUM_ITEM_KEY) {
+ } else if (key.type == BTRFS_EXTENT_CSUM_KEY) {
ret = replay_one_csum(wc->trans, root, path,
eb, i, &key);
BUG_ON(ret);
return 0;
}
+static noinline int copy_extent_csums(struct btrfs_trans_handle *trans,
+ struct list_head *list,
+ struct btrfs_root *root,
+ u64 disk_bytenr, u64 len)
+{
+ struct btrfs_ordered_sum *sums;
+ struct btrfs_sector_sum *sector_sum;
+ int ret;
+ struct btrfs_path *path;
+ struct btrfs_csum_item *item = NULL;
+ u64 end = disk_bytenr + len;
+ u64 item_start_offset = 0;
+ u64 item_last_offset = 0;
+ u32 diff;
+ u32 sum;
+ u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
+
+ sums = kzalloc(btrfs_ordered_sum_size(root, len), GFP_NOFS);
+
+ sector_sum = sums->sums;
+ sums->bytenr = disk_bytenr;
+ sums->len = len;
+ list_add_tail(&sums->list, list);
+
+ path = btrfs_alloc_path();
+ while(disk_bytenr < end) {
+ if (!item || disk_bytenr < item_start_offset ||
+ disk_bytenr >= item_last_offset) {
+ struct btrfs_key found_key;
+ u32 item_size;
+
+ if (item)
+ btrfs_release_path(root, path);
+ item = btrfs_lookup_csum(NULL, root, path,
+ disk_bytenr, 0);
+ if (IS_ERR(item)) {
+ ret = PTR_ERR(item);
+ if (ret == -ENOENT || ret == -EFBIG)
+ ret = 0;
+ sum = 0;
+ printk("log no csum found for byte %llu\n",
+ (unsigned long long)disk_bytenr);
+ item = NULL;
+ btrfs_release_path(root, path);
+ goto found;
+ }
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key,
+ path->slots[0]);
+
+ item_start_offset = found_key.offset;
+ item_size = btrfs_item_size_nr(path->nodes[0],
+ path->slots[0]);
+ item_last_offset = item_start_offset +
+ (item_size / csum_size) *
+ root->sectorsize;
+ item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_csum_item);
+ }
+ /*
+ * this byte range must be able to fit inside
+ * a single leaf so it will also fit inside a u32
+ */
+ diff = disk_bytenr - item_start_offset;
+ diff = diff / root->sectorsize;
+ diff = diff * csum_size;
+
+ read_extent_buffer(path->nodes[0], &sum,
+ ((unsigned long)item) + diff,
+ csum_size);
+found:
+ sector_sum->bytenr = disk_bytenr;
+ sector_sum->sum = sum;
+ disk_bytenr += root->sectorsize;
+ sector_sum++;
+ }
+ btrfs_free_path(path);
+ return 0;
+}
+
static noinline int copy_items(struct btrfs_trans_handle *trans,
struct btrfs_root *log,
struct btrfs_path *dst_path,
u32 *ins_sizes;
char *ins_data;
int i;
+ struct list_head ordered_sums;
+
+ INIT_LIST_HEAD(&ordered_sums);
ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
nr * sizeof(u32), GFP_NOFS);
extent);
u64 dl = btrfs_file_extent_disk_num_bytes(src,
extent);
+ u64 cs = btrfs_file_extent_offset(src, extent);
+ u64 cl = btrfs_file_extent_num_bytes(src,
+ extent);;
/* ds == 0 is a hole */
if (ds != 0) {
ret = btrfs_inc_extent_ref(trans, log,
trans->transid,
ins_keys[i].objectid);
BUG_ON(ret);
+ ret = copy_extent_csums(trans,
+ &ordered_sums,
+ log->fs_info->csum_root,
+ ds + cs, cl);
+ BUG_ON(ret);
}
}
}
btrfs_mark_buffer_dirty(dst_path->nodes[0]);
btrfs_release_path(log, dst_path);
kfree(ins_data);
+
+ /*
+ * we have to do this after the loop above to avoid changing the
+ * log tree while trying to change the log tree.
+ */
+ while(!list_empty(&ordered_sums)) {
+ struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
+ struct btrfs_ordered_sum,
+ list);
+ ret = btrfs_csum_file_blocks(trans, log, sums);
+ BUG_ON(ret);
+ list_del(&sums->list);
+ kfree(sums);
+ }
return 0;
}
device->work.func = pending_bios_fn;
fs_devices->num_devices++;
spin_lock_init(&device->io_lock);
+ INIT_LIST_HEAD(&device->dev_alloc_list);
memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
return device;
}