Btrfs: tree logging checksum fixes
authorYan Zheng <zheng.yan@oracle.com>
Tue, 6 Jan 2009 16:42:00 +0000 (11:42 -0500)
committerChris Mason <chris.mason@oracle.com>
Tue, 6 Jan 2009 16:42:00 +0000 (11:42 -0500)
This patch contains following things.

1) Limit the max size of btrfs_ordered_sum structure to PAGE_SIZE.  This
struct is kmalloced so we want to keep it reasonable.

2) Replace copy_extent_csums by btrfs_lookup_csums_range.  This was
duplicated code in tree-log.c

3) Remove replay_one_csum. csum items are replayed at the same time as
   replaying file extents. This guarantees we only replay useful csums.

4) nbytes accounting fix.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
fs/btrfs/extent-tree.c
fs/btrfs/file-item.c
fs/btrfs/inode.c
fs/btrfs/tree-log.c

index 171ca30a3755c892e3a829fa122acd1b048dab59..293da650873f5193c726c4883bc6ecc5114b0a1a 100644 (file)
@@ -5579,7 +5579,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
        BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
 
        disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
-       ret = btrfs_lookup_csums_range(root, disk_bytenr,
+       ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
                                       disk_bytenr + len - 1, &list);
 
        while (!list_empty(&list)) {
index b11abfad81a5bb8f8634c2d347d6441aae6cdfb0..964652435fd1a5f40e061cfe1c3a7dd1d1198248 100644 (file)
 #define MAX_CSUM_ITEMS(r, size) ((((BTRFS_LEAF_DATA_SIZE(r) - \
                                   sizeof(struct btrfs_item) * 2) / \
                                  size) - 1))
+
+#define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \
+                                  sizeof(struct btrfs_ordered_sum)) / \
+                                  sizeof(struct btrfs_sector_sum) * \
+                                  (r)->sectorsize - (r)->sectorsize)
+
 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root,
                             u64 objectid, u64 pos,
@@ -259,8 +265,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
        key.offset = start;
        key.type = BTRFS_EXTENT_CSUM_KEY;
 
-       ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
-                               &key, path, 0, 0);
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret < 0)
                goto fail;
        if (ret > 0 && path->slots[0] > 0) {
@@ -279,7 +284,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
        while (start <= end) {
                leaf = path->nodes[0];
                if (path->slots[0] >= btrfs_header_nritems(leaf)) {
-                       ret = btrfs_next_leaf(root->fs_info->csum_root, path);
+                       ret = btrfs_next_leaf(root, path);
                        if (ret < 0)
                                goto fail;
                        if (ret > 0)
@@ -306,33 +311,38 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
                        continue;
                }
 
-               size = min(csum_end, end + 1) - start;
-               sums = kzalloc(btrfs_ordered_sum_size(root, size), GFP_NOFS);
-               BUG_ON(!sums);
+               csum_end = min(csum_end, end + 1);
+               item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                                     struct btrfs_csum_item);
+               while (start < csum_end) {
+                       size = min_t(size_t, csum_end - start,
+                                       MAX_ORDERED_SUM_BYTES(root));
+                       sums = kzalloc(btrfs_ordered_sum_size(root, size),
+                                       GFP_NOFS);
+                       BUG_ON(!sums);
 
-               sector_sum = sums->sums;
-               sums->bytenr = start;
-               sums->len = size;
+                       sector_sum = sums->sums;
+                       sums->bytenr = start;
+                       sums->len = size;
 
-               offset = (start - key.offset) >>
-                        root->fs_info->sb->s_blocksize_bits;
-               offset *= csum_size;
+                       offset = (start - key.offset) >>
+                               root->fs_info->sb->s_blocksize_bits;
+                       offset *= csum_size;
 
-               item = btrfs_item_ptr(path->nodes[0], path->slots[0],
-                                     struct btrfs_csum_item);
-               while (size > 0) {
-                       read_extent_buffer(path->nodes[0], &sector_sum->sum,
-                                          ((unsigned long)item) + offset,
-                                          csum_size);
-                       sector_sum->bytenr = start;
-
-                       size -= root->sectorsize;
-                       start += root->sectorsize;
-                       offset += csum_size;
-                       sector_sum++;
+                       while (size > 0) {
+                               read_extent_buffer(path->nodes[0],
+                                               &sector_sum->sum,
+                                               ((unsigned long)item) +
+                                               offset, csum_size);
+                               sector_sum->bytenr = start;
+
+                               size -= root->sectorsize;
+                               start += root->sectorsize;
+                               offset += csum_size;
+                               sector_sum++;
+                       }
+                       list_add_tail(&sums->list, list);
                }
-               list_add_tail(&sums->list, list);
-
                path->slots[0]++;
        }
        ret = 0;
index c0ca9c3723c0661823b4c0f6b7c018c66dcfd369..4e57fe68e4b9365858489e0506a4c6e290e8d242 100644 (file)
@@ -157,7 +157,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
        key.objectid = inode->i_ino;
        key.offset = start;
        btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
-       inode_add_bytes(inode, size);
        datasize = btrfs_file_extent_calc_inline_size(cur_size);
 
        inode_add_bytes(inode, size);
@@ -920,8 +919,8 @@ static noinline int csum_exist_in_range(struct btrfs_root *root,
        struct btrfs_ordered_sum *sums;
        LIST_HEAD(list);
 
-       ret = btrfs_lookup_csums_range(root, bytenr, bytenr + num_bytes - 1,
-                                      &list);
+       ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr,
+                                      bytenr + num_bytes - 1, &list);
        if (ret == 0 && list_empty(&list))
                return 0;
 
index 3a72a1b6c247c5be470471cba1328d7f8cf51d44..332ec35d2c0858351aa1d7937d2576e55bc1ad99 100644 (file)
@@ -433,49 +433,6 @@ insert:
                                                   trans->transid);
                }
        }
-
-       if (overwrite_root &&
-           key->type == BTRFS_EXTENT_DATA_KEY) {
-               int extent_type;
-               struct btrfs_file_extent_item *fi;
-
-               fi = (struct btrfs_file_extent_item *)dst_ptr;
-               extent_type = btrfs_file_extent_type(path->nodes[0], fi);
-               if (extent_type == BTRFS_FILE_EXTENT_REG ||
-                   extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
-                       struct btrfs_key ins;
-                       ins.objectid = btrfs_file_extent_disk_bytenr(
-                                                       path->nodes[0], fi);
-                       ins.offset = btrfs_file_extent_disk_num_bytes(
-                                                       path->nodes[0], fi);
-                       ins.type = BTRFS_EXTENT_ITEM_KEY;
-
-                       /*
-                        * is this extent already allocated in the extent
-                        * allocation tree?  If so, just add a reference
-                        */
-                       ret = btrfs_lookup_extent(root, ins.objectid,
-                                                 ins.offset);
-                       if (ret == 0) {
-                               ret = btrfs_inc_extent_ref(trans, root,
-                                               ins.objectid, ins.offset,
-                                               path->nodes[0]->start,
-                                               root->root_key.objectid,
-                                               trans->transid, key->objectid);
-                       } else {
-                               /*
-                                * insert the extent pointer in the extent
-                                * allocation tree
-                                */
-                               ret = btrfs_alloc_logged_extent(trans, root,
-                                               path->nodes[0]->start,
-                                               root->root_key.objectid,
-                                               trans->transid, key->objectid,
-                                               &ins);
-                               BUG_ON(ret);
-                       }
-               }
-       }
 no_copy:
        btrfs_mark_buffer_dirty(path->nodes[0]);
        btrfs_release_path(root, path);
@@ -530,6 +487,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
        u64 extent_end;
        u64 alloc_hint;
        u64 start = key->offset;
+       u64 saved_nbytes;
        struct btrfs_file_extent_item *item;
        struct inode *inode = NULL;
        unsigned long size;
@@ -591,17 +549,95 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
        }
        btrfs_release_path(root, path);
 
+       saved_nbytes = inode_get_bytes(inode);
        /* drop any overlapping extents */
        ret = btrfs_drop_extents(trans, root, inode,
                         start, extent_end, start, &alloc_hint);
        BUG_ON(ret);
 
-       /* insert the extent */
-       ret = overwrite_item(trans, root, path, eb, slot, key);
-       BUG_ON(ret);
+       if (found_type == BTRFS_FILE_EXTENT_REG ||
+           found_type == BTRFS_FILE_EXTENT_PREALLOC) {
+               unsigned long dest_offset;
+               struct btrfs_key ins;
+
+               ret = btrfs_insert_empty_item(trans, root, path, key,
+                                             sizeof(*item));
+               BUG_ON(ret);
+               dest_offset = btrfs_item_ptr_offset(path->nodes[0],
+                                                   path->slots[0]);
+               copy_extent_buffer(path->nodes[0], eb, dest_offset,
+                               (unsigned long)item,  sizeof(*item));
+
+               ins.objectid = btrfs_file_extent_disk_bytenr(eb, item);
+               ins.offset = btrfs_file_extent_disk_num_bytes(eb, item);
+               ins.type = BTRFS_EXTENT_ITEM_KEY;
+
+               if (ins.objectid > 0) {
+                       u64 csum_start;
+                       u64 csum_end;
+                       LIST_HEAD(ordered_sums);
+                       /*
+                        * is this extent already allocated in the extent
+                        * allocation tree?  If so, just add a reference
+                        */
+                       ret = btrfs_lookup_extent(root, ins.objectid,
+                                               ins.offset);
+                       if (ret == 0) {
+                               ret = btrfs_inc_extent_ref(trans, root,
+                                               ins.objectid, ins.offset,
+                                               path->nodes[0]->start,
+                                               root->root_key.objectid,
+                                               trans->transid, key->objectid);
+                       } else {
+                               /*
+                                * insert the extent pointer in the extent
+                                * allocation tree
+                                */
+                               ret = btrfs_alloc_logged_extent(trans, root,
+                                               path->nodes[0]->start,
+                                               root->root_key.objectid,
+                                               trans->transid, key->objectid,
+                                               &ins);
+                               BUG_ON(ret);
+                       }
+                       btrfs_release_path(root, path);
+
+                       if (btrfs_file_extent_compression(eb, item)) {
+                               csum_start = ins.objectid;
+                               csum_end = csum_start + ins.offset;
+                       } else {
+                               csum_start = ins.objectid +
+                                       btrfs_file_extent_offset(eb, item);
+                               csum_end = csum_start +
+                                       btrfs_file_extent_num_bytes(eb, item);
+                       }
+
+                       ret = btrfs_lookup_csums_range(root->log_root,
+                                               csum_start, csum_end - 1,
+                                               &ordered_sums);
+                       BUG_ON(ret);
+                       while (!list_empty(&ordered_sums)) {
+                               struct btrfs_ordered_sum *sums;
+                               sums = list_entry(ordered_sums.next,
+                                               struct btrfs_ordered_sum,
+                                               list);
+                               ret = btrfs_csum_file_blocks(trans,
+                                               root->fs_info->csum_root,
+                                               sums);
+                               BUG_ON(ret);
+                               list_del(&sums->list);
+                               kfree(sums);
+                       }
+               } else {
+                       btrfs_release_path(root, path);
+               }
+       } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
+               /* inline extents are easy, we just overwrite them */
+               ret = overwrite_item(trans, root, path, eb, slot, key);
+               BUG_ON(ret);
+       }
 
-       /* btrfs_drop_extents changes i_bytes & i_blocks, update it here */
-       inode_add_bytes(inode, extent_end - start);
+       inode_set_bytes(inode, saved_nbytes);
        btrfs_update_inode(trans, root, inode);
 out:
        if (inode)
@@ -902,70 +938,6 @@ out_nowrite:
        return 0;
 }
 
-/*
- * replay one csum item from the log tree into the subvolume 'root'
- * eb, slot and key all refer to the log tree
- * path is for temp use by this function and should be released on return
- *
- * This copies the checksums out of the log tree and inserts them into
- * the subvolume.  Any existing checksums for this range in the file
- * are overwritten, and new items are added where required.
- *
- * We keep this simple by reusing the btrfs_ordered_sum code from
- * the data=ordered mode.  This basically means making a copy
- * of all the checksums in ram, which we have to do anyway for kmap
- * rules.
- *
- * The copy is then sent down to btrfs_csum_file_blocks, which
- * does all the hard work of finding existing items in the file
- * or adding new ones.
- */
-static noinline int replay_one_csum(struct btrfs_trans_handle *trans,
-                                     struct btrfs_root *root,
-                                     struct btrfs_path *path,
-                                     struct extent_buffer *eb, int slot,
-                                     struct btrfs_key *key)
-{
-       int ret;
-       u32 item_size = btrfs_item_size_nr(eb, slot);
-       u64 cur_offset;
-       u16 csum_size =
-               btrfs_super_csum_size(&root->fs_info->super_copy);
-       unsigned long file_bytes;
-       struct btrfs_ordered_sum *sums;
-       struct btrfs_sector_sum *sector_sum;
-       unsigned long ptr;
-
-       file_bytes = (item_size / csum_size) * root->sectorsize;
-       sums = kzalloc(btrfs_ordered_sum_size(root, file_bytes), GFP_NOFS);
-       if (!sums)
-               return -ENOMEM;
-
-       INIT_LIST_HEAD(&sums->list);
-       sums->len = file_bytes;
-       sums->bytenr = key->offset;
-
-       /*
-        * copy all the sums into the ordered sum struct
-        */
-       sector_sum = sums->sums;
-       cur_offset = key->offset;
-       ptr = btrfs_item_ptr_offset(eb, slot);
-       while (item_size > 0) {
-               sector_sum->bytenr = cur_offset;
-               read_extent_buffer(eb, &sector_sum->sum, ptr, csum_size);
-               sector_sum++;
-               item_size -= csum_size;
-               ptr += csum_size;
-               cur_offset += root->sectorsize;
-       }
-
-       /* let btrfs_csum_file_blocks add them into the file */
-       ret = btrfs_csum_file_blocks(trans, root->fs_info->csum_root, sums);
-       BUG_ON(ret);
-       kfree(sums);
-       return 0;
-}
 /*
  * There are a few corners where the link count of the file can't
  * be properly maintained during replay.  So, instead of adding
@@ -1659,10 +1631,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
                        ret = replay_one_extent(wc->trans, root, path,
                                                eb, i, &key);
                        BUG_ON(ret);
-               } else if (key.type == BTRFS_EXTENT_CSUM_KEY) {
-                       ret = replay_one_csum(wc->trans, root, path,
-                                             eb, i, &key);
-                       BUG_ON(ret);
                } else if (key.type == BTRFS_DIR_ITEM_KEY ||
                           key.type == BTRFS_DIR_INDEX_KEY) {
                        ret = replay_one_dir_item(wc->trans, root, path,
@@ -2021,7 +1989,7 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
                .process_func = process_one_buffer
        };
 
-       if (!root->log_root)
+       if (!root->log_root || root->fs_info->log_root_recovering)
                return 0;
 
        log = root->log_root;
@@ -2453,86 +2421,6 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
        return 0;
 }
 
-static noinline int copy_extent_csums(struct btrfs_trans_handle *trans,
-                                     struct list_head *list,
-                                     struct btrfs_root *root,
-                                     u64 disk_bytenr, u64 len)
-{
-       struct btrfs_ordered_sum *sums;
-       struct btrfs_sector_sum *sector_sum;
-       int ret;
-       struct btrfs_path *path;
-       struct btrfs_csum_item *item = NULL;
-       u64 end = disk_bytenr + len;
-       u64 item_start_offset = 0;
-       u64 item_last_offset = 0;
-       u32 diff;
-       u32 sum;
-       u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
-
-       sums = kzalloc(btrfs_ordered_sum_size(root, len), GFP_NOFS);
-
-       sector_sum = sums->sums;
-       sums->bytenr = disk_bytenr;
-       sums->len = len;
-       list_add_tail(&sums->list, list);
-
-       path = btrfs_alloc_path();
-       while (disk_bytenr < end) {
-               if (!item || disk_bytenr < item_start_offset ||
-                   disk_bytenr >= item_last_offset) {
-                       struct btrfs_key found_key;
-                       u32 item_size;
-
-                       if (item)
-                               btrfs_release_path(root, path);
-                       item = btrfs_lookup_csum(NULL, root, path,
-                                                disk_bytenr, 0);
-                       if (IS_ERR(item)) {
-                               ret = PTR_ERR(item);
-                               if (ret == -ENOENT || ret == -EFBIG)
-                                       ret = 0;
-                               sum = 0;
-                               printk(KERN_INFO "log no csum found for "
-                                      "byte %llu\n",
-                                      (unsigned long long)disk_bytenr);
-                               item = NULL;
-                               btrfs_release_path(root, path);
-                               goto found;
-                       }
-                       btrfs_item_key_to_cpu(path->nodes[0], &found_key,
-                                             path->slots[0]);
-
-                       item_start_offset = found_key.offset;
-                       item_size = btrfs_item_size_nr(path->nodes[0],
-                                                      path->slots[0]);
-                       item_last_offset = item_start_offset +
-                               (item_size / csum_size) *
-                               root->sectorsize;
-                       item = btrfs_item_ptr(path->nodes[0], path->slots[0],
-                                             struct btrfs_csum_item);
-               }
-               /*
-                * this byte range must be able to fit inside
-                * a single leaf so it will also fit inside a u32
-                */
-               diff = disk_bytenr - item_start_offset;
-               diff = diff / root->sectorsize;
-               diff = diff * csum_size;
-
-               read_extent_buffer(path->nodes[0], &sum,
-                                  ((unsigned long)item) + diff,
-                                  csum_size);
-found:
-               sector_sum->bytenr = disk_bytenr;
-               sector_sum->sum = sum;
-               disk_bytenr += root->sectorsize;
-               sector_sum++;
-       }
-       btrfs_free_path(path);
-       return 0;
-}
-
 static noinline int copy_items(struct btrfs_trans_handle *trans,
                               struct btrfs_root *log,
                               struct btrfs_path *dst_path,
@@ -2622,10 +2510,10 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
                                                   trans->transid,
                                                   ins_keys[i].objectid);
                                        BUG_ON(ret);
-                                       ret = copy_extent_csums(trans,
-                                               &ordered_sums,
-                                               log->fs_info->csum_root,
-                                               ds + cs, cl);
+                                       ret = btrfs_lookup_csums_range(
+                                                  log->fs_info->csum_root,
+                                                  ds + cs, ds + cs + cl - 1,
+                                                  &ordered_sums);
                                        BUG_ON(ret);
                                }
                        }
@@ -2942,9 +2830,9 @@ again:
                tmp_key.offset = (u64)-1;
 
                wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
-
                BUG_ON(!wc.replay_dest);
 
+               wc.replay_dest->log_root = log;
                btrfs_record_root_in_trans(wc.replay_dest);
                ret = walk_log_tree(trans, log, &wc);
                BUG_ON(ret);
@@ -2961,6 +2849,7 @@ again:
                }
 
                key.offset = found_key.offset - 1;
+               wc.replay_dest->log_root = NULL;
                free_extent_buffer(log->node);
                kfree(log);