return btrfs_next_old_leaf(root, path, 0);
}
+/* Release the path up to but not including the given level */
+static void btrfs_release_level(struct btrfs_path *path, int level)
+{
+ int i;
+
+ for (i = 0; i < level; i++) {
+ path->slots[i] = 0;
+ if (!path->nodes[i])
+ continue;
+ if (path->locks[i]) {
+ btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
+ path->locks[i] = 0;
+ }
+ free_extent_buffer(path->nodes[i]);
+ path->nodes[i] = NULL;
+ }
+}
+
+/*
+ * This function assumes 2 things
+ *
+ * 1) You are using path->keep_locks
+ * 2) You are not inserting items.
+ *
+ * If either of these are not true do not use this function. If you need a next
+ * leaf with either of these not being true then this function can be easily
+ * adapted to do that, but at the moment these are the limitations.
+ */
+int btrfs_next_leaf_write(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ int del)
+{
+ struct extent_buffer *b;
+ struct btrfs_key key;
+ u32 nritems;
+ int level = 1;
+ int slot;
+ int ret = 1;
+ int write_lock_level = BTRFS_MAX_LEVEL;
+ int ins_len = del ? -1 : 0;
+
+ WARN_ON(!(path->keep_locks || path->really_keep_locks));
+
+ nritems = btrfs_header_nritems(path->nodes[0]);
+ btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
+
+ while (path->nodes[level]) {
+ nritems = btrfs_header_nritems(path->nodes[level]);
+ if (!(path->locks[level] & BTRFS_WRITE_LOCK)) {
+search:
+ btrfs_release_path(path);
+ ret = btrfs_search_slot(trans, root, &key, path,
+ ins_len, 1);
+ if (ret < 0)
+ goto out;
+ level = 1;
+ continue;
+ }
+
+ if (path->slots[level] >= nritems - 1) {
+ level++;
+ continue;
+ }
+
+ btrfs_release_level(path, level);
+ break;
+ }
+
+ if (!path->nodes[level]) {
+ ret = 1;
+ goto out;
+ }
+
+ path->slots[level]++;
+ b = path->nodes[level];
+
+ while (b) {
+ level = btrfs_header_level(b);
+
+ if (!should_cow_block(trans, root, b))
+ goto cow_done;
+
+ btrfs_set_path_blocking(path);
+ ret = btrfs_cow_block(trans, root, b,
+ path->nodes[level + 1],
+ path->slots[level + 1], &b);
+ if (ret)
+ goto out;
+cow_done:
+ path->nodes[level] = b;
+ btrfs_clear_path_blocking(path, NULL, 0);
+ if (level != 0) {
+ ret = setup_nodes_for_search(trans, root, path, b,
+ level, ins_len,
+ &write_lock_level);
+ if (ret == -EAGAIN)
+ goto search;
+ if (ret)
+ goto out;
+
+ b = path->nodes[level];
+ slot = path->slots[level];
+
+ ret = read_block_for_search(trans, root, path,
+ &b, level, slot, &key, 0);
+ if (ret == -EAGAIN)
+ goto search;
+ if (ret)
+ goto out;
+ level = btrfs_header_level(b);
+ if (!btrfs_try_tree_write_lock(b)) {
+ btrfs_set_path_blocking(path);
+ btrfs_tree_lock(b);
+ btrfs_clear_path_blocking(path, b,
+ BTRFS_WRITE_LOCK);
+ }
+ path->locks[level] = BTRFS_WRITE_LOCK;
+ path->nodes[level] = b;
+ path->slots[level] = 0;
+ } else {
+ path->slots[level] = 0;
+ ret = 0;
+ break;
+ }
+ }
+
+out:
+ if (ret)
+ btrfs_release_path(path);
+
+ return ret;
+}
+
int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
u64 time_seq)
{
struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written, int unlock);
+static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
+ u64 len, u64 orig_start,
+ u64 block_start, u64 block_len,
+ u64 orig_block_len, int type);
static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir,
em->compress_type = async_extent->compress_type;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+ em->generation = -1;
while (1) {
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
+ if (!ret)
+ list_move(&em->list,
+ &em_tree->modified_extents);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
em->orig_block_len = ins.offset;
em->bdev = root->fs_info->fs_devices->latest_bdev;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
+ em->generation = -1;
while (1) {
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
+ if (!ret)
+ list_move(&em->list,
+ &em_tree->modified_extents);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
em = alloc_extent_map();
BUG_ON(!em); /* -ENOMEM */
em->start = cur_offset;
- em->orig_start = em->start;
+ em->orig_start = found_key.offset - extent_offset;
em->len = num_bytes;
em->block_len = num_bytes;
em->block_start = disk_bytenr;
em->bdev = root->fs_info->fs_devices->latest_bdev;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
set_bit(EXTENT_FLAG_FILLING, &em->flags);
+ em->generation = -1;
while (1) {
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
+ if (!ret)
+ list_move(&em->list,
+ &em_tree->modified_extents);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
if (start + len <= found_key.offset)
goto not_found;
em->start = start;
+ em->orig_start = start;
em->len = found_key.offset - start;
goto not_found_em;
}
em->len = (copy_size + root->sectorsize - 1) &
~((u64)root->sectorsize - 1);
em->orig_block_len = em->len;
- em->orig_start = EXTENT_MAP_INLINE;
+ em->orig_start = em->start;
if (compress_type) {
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
em->compress_type = compress_type;
}
not_found:
em->start = start;
+ em->orig_start = start;
em->len = len;
not_found_em:
em->block_start = EXTENT_MAP_HOLE;
}
static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
- struct extent_map *em,
u64 start, u64 len)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_map *em;
struct btrfs_key ins;
u64 alloc_hint;
int ret;
- bool insert = false;
-
- /*
- * Ok if the extent map we looked up is a hole and is for the exact
- * range we want, there is no reason to allocate a new one, however if
- * it is not right then we need to free this one and drop the cache for
- * our range.
- */
- if (em->block_start != EXTENT_MAP_HOLE || em->start != start ||
- em->len != len) {
- free_extent_map(em);
- em = NULL;
- insert = true;
- btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
- }
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
goto out;
}
- if (!em) {
- em = alloc_extent_map();
- if (!em) {
- em = ERR_PTR(-ENOMEM);
- goto out;
- }
- }
-
- em->start = start;
- em->orig_start = em->start;
- em->len = ins.offset;
-
- em->block_start = ins.objectid;
- em->block_len = ins.offset;
- em->orig_block_len = ins.offset;
- em->bdev = root->fs_info->fs_devices->latest_bdev;
-
- /*
- * We need to do this because if we're using the original em we searched
- * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that.
- */
- em->flags = 0;
- set_bit(EXTENT_FLAG_PINNED, &em->flags);
-
- while (insert) {
- write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- write_unlock(&em_tree->lock);
- if (ret != -EEXIST)
- break;
- btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0);
- }
+ em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
+ ins.offset, ins.offset, 0);
+ if (IS_ERR(em))
+ goto out;
ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
ins.offset, ins.offset, 0);
em->block_start = block_start;
em->bdev = root->fs_info->fs_devices->latest_bdev;
em->orig_block_len = orig_block_len;
+ em->generation = -1;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
if (type == BTRFS_ORDERED_PREALLOC)
set_bit(EXTENT_FLAG_FILLING, &em->flags);
em->start + em->len - 1, 0);
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
+ if (!ret)
+ list_move(&em->list,
+ &em_tree->modified_extents);
write_unlock(&em_tree->lock);
} while (ret == -EEXIST);
goto must_cow;
if (can_nocow_odirect(trans, inode, start, len) == 1) {
- u64 orig_start = em->start;
+ u64 orig_start = em->orig_start;
u64 orig_block_len = em->orig_block_len;
if (type == BTRFS_ORDERED_PREALLOC) {
* it above
*/
len = bh_result->b_size;
- em = btrfs_new_extent_direct(inode, em, start, len);
+ free_extent_map(em);
+ em = btrfs_new_extent_direct(inode, start, len);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto unlock_err;
return 0;
}
-struct log_args {
- struct extent_buffer *src;
- u64 next_offset;
- int start_slot;
- int nr;
-};
+static int drop_adjacent_extents(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct inode *inode,
+ struct extent_map *em,
+ struct btrfs_path *path)
+{
+ struct btrfs_file_extent_item *fi;
+ struct extent_buffer *leaf;
+ struct btrfs_key key, new_key;
+ struct btrfs_map_token token;
+ u64 extent_end;
+ u64 extent_offset = 0;
+ int extent_type;
+ int del_slot = 0;
+ int del_nr = 0;
+ int ret = 0;
+
+ while (1) {
+ btrfs_init_map_token(&token);
+ leaf = path->nodes[0];
+ path->slots[0]++;
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ if (del_nr) {
+ ret = btrfs_del_items(trans, root, path,
+ del_slot, del_nr);
+ if (ret)
+ return ret;
+ del_nr = 0;
+ }
+
+ ret = btrfs_next_leaf_write(trans, root, path, 1);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ return 0;
+ leaf = path->nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.objectid != btrfs_ino(inode) ||
+ key.type != BTRFS_EXTENT_DATA_KEY ||
+ key.offset >= em->start + em->len)
+ break;
+
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ extent_type = btrfs_token_file_extent_type(leaf, fi, &token);
+ if (extent_type == BTRFS_FILE_EXTENT_REG ||
+ extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ extent_offset = btrfs_token_file_extent_offset(leaf,
+ fi, &token);
+ extent_end = key.offset +
+ btrfs_token_file_extent_num_bytes(leaf, fi,
+ &token);
+ } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
+ extent_end = key.offset +
+ btrfs_file_extent_inline_len(leaf, fi);
+ } else {
+ BUG();
+ }
+
+ if (extent_end <= em->len + em->start) {
+ if (!del_nr) {
+ del_slot = path->slots[0];
+ }
+ del_nr++;
+ continue;
+ }
+
+ /*
+ * Ok so we'll ignore previous items if we log a new extent,
+ * which can lead to overlapping extents, so if we have an
+ * existing extent we want to adjust we _have_ to check the next
+ * guy to make sure we even need this extent anymore, this keeps
+ * us from panicing in set_item_key_safe.
+ */
+ if (path->slots[0] < btrfs_header_nritems(leaf) - 1) {
+ struct btrfs_key tmp_key;
+
+ btrfs_item_key_to_cpu(leaf, &tmp_key,
+ path->slots[0] + 1);
+ if (tmp_key.objectid == btrfs_ino(inode) &&
+ tmp_key.type == BTRFS_EXTENT_DATA_KEY &&
+ tmp_key.offset <= em->start + em->len) {
+ if (!del_nr)
+ del_slot = path->slots[0];
+ del_nr++;
+ continue;
+ }
+ }
+
+ BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
+ memcpy(&new_key, &key, sizeof(new_key));
+ new_key.offset = em->start + em->len;
+ btrfs_set_item_key_safe(trans, root, path, &new_key);
+ extent_offset += em->start + em->len - key.offset;
+ btrfs_set_token_file_extent_offset(leaf, fi, extent_offset,
+ &token);
+ btrfs_set_token_file_extent_num_bytes(leaf, fi, extent_end -
+ (em->start + em->len),
+ &token);
+ btrfs_mark_buffer_dirty(leaf);
+ }
+
+ if (del_nr)
+ ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
+
+ return ret;
+}
static int log_one_extent(struct btrfs_trans_handle *trans,
struct inode *inode, struct btrfs_root *root,
- struct extent_map *em, struct btrfs_path *path,
- struct btrfs_path *dst_path, struct log_args *args)
+ struct extent_map *em, struct btrfs_path *path)
{
struct btrfs_root *log = root->log_root;
+ struct btrfs_file_extent_item *fi;
+ struct extent_buffer *leaf;
+ struct list_head ordered_sums;
struct btrfs_key key;
- u64 start = em->mod_start;
- u64 search_start = start;
- u64 len = em->mod_len;
- u64 num_bytes;
- int nritems;
+ u64 csum_offset = em->mod_start - em->start;
+ u64 csum_len = em->mod_len;
+ u64 extent_offset = em->start - em->orig_start;
+ u64 block_len;
int ret;
+ bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
- if (BTRFS_I(inode)->logged_trans == trans->transid) {
- ret = __btrfs_drop_extents(trans, log, inode, dst_path, start,
- start + len, NULL, 0);
- if (ret)
- return ret;
+ INIT_LIST_HEAD(&ordered_sums);
+ key.objectid = btrfs_ino(inode);
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = em->start;
+ path->really_keep_locks = 1;
+
+ ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*fi));
+ if (ret && ret != -EEXIST) {
+ path->really_keep_locks = 0;
+ return ret;
+ }
+ leaf = path->nodes[0];
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_generation(leaf, fi, em->generation);
+ if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
+ skip_csum = true;
+ btrfs_set_file_extent_type(leaf, fi,
+ BTRFS_FILE_EXTENT_PREALLOC);
+ } else {
+ btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
+ if (em->block_start == 0)
+ skip_csum = true;
+ }
+
+ block_len = max(em->block_len, em->orig_block_len);
+ if (em->compress_type != BTRFS_COMPRESS_NONE) {
+ btrfs_set_file_extent_disk_bytenr(leaf, fi, em->block_start);
+ btrfs_set_file_extent_disk_num_bytes(leaf, fi, block_len);
+ } else if (em->block_start < EXTENT_MAP_LAST_BYTE) {
+ btrfs_set_file_extent_disk_bytenr(leaf, fi,
+ em->block_start -
+ extent_offset);
+ btrfs_set_file_extent_disk_num_bytes(leaf, fi, block_len);
+ } else {
+ btrfs_set_file_extent_disk_bytenr(leaf, fi, 0);
+ btrfs_set_file_extent_disk_num_bytes(leaf, fi, 0);
}
- while (len) {
- if (args->nr)
- goto next_slot;
-again:
- key.objectid = btrfs_ino(inode);
- key.type = BTRFS_EXTENT_DATA_KEY;
- key.offset = search_start;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- return ret;
-
- if (ret) {
- /*
- * A rare case were we can have an em for a section of a
- * larger extent so we need to make sure that this em
- * falls within the extent we've found. If not we just
- * bail and go back to ye-olde way of doing things but
- * it happens often enough in testing that we need to do
- * this dance to make sure.
- */
- do {
- if (path->slots[0] == 0) {
- btrfs_release_path(path);
- if (search_start == 0)
- return -ENOENT;
- search_start--;
- goto again;
- }
+ btrfs_set_file_extent_offset(leaf, fi, em->start - em->orig_start);
+ btrfs_set_file_extent_num_bytes(leaf, fi, em->len);
+ btrfs_set_file_extent_ram_bytes(leaf, fi, em->len);
+ btrfs_set_file_extent_compression(leaf, fi, em->compress_type);
+ btrfs_set_file_extent_encryption(leaf, fi, 0);
+ btrfs_set_file_extent_other_encoding(leaf, fi, 0);
+ btrfs_mark_buffer_dirty(leaf);
- path->slots[0]--;
- btrfs_item_key_to_cpu(path->nodes[0], &key,
- path->slots[0]);
- if (key.objectid != btrfs_ino(inode) ||
- key.type != BTRFS_EXTENT_DATA_KEY) {
- btrfs_release_path(path);
- return -ENOENT;
- }
- } while (key.offset > start);
+ /*
+ * Have to check the extent to the right of us to make sure it doesn't
+ * fall in our current range. We're ok if the previous extent is in our
+ * range since the recovery stuff will run us in key order and thus just
+ * drop the part we overwrote.
+ */
+ ret = drop_adjacent_extents(trans, log, inode, em, path);
+ btrfs_release_path(path);
+ path->really_keep_locks = 0;
+ if (ret) {
+ return ret;
+ }
- num_bytes = btrfs_file_extent_length(path);
- if (key.offset + num_bytes <= start) {
- btrfs_release_path(path);
- return -ENOENT;
- }
- }
- args->src = path->nodes[0];
-next_slot:
- btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
- num_bytes = btrfs_file_extent_length(path);
- if (args->nr &&
- args->start_slot + args->nr == path->slots[0]) {
- args->nr++;
- } else if (args->nr) {
- ret = copy_items(trans, inode, dst_path, args->src,
- args->start_slot, args->nr,
- LOG_INODE_ALL);
- if (ret)
- return ret;
- args->nr = 1;
- args->start_slot = path->slots[0];
- } else if (!args->nr) {
- args->nr = 1;
- args->start_slot = path->slots[0];
- }
- nritems = btrfs_header_nritems(path->nodes[0]);
- path->slots[0]++;
- if (len < num_bytes) {
- /* I _think_ this is ok, envision we write to a
- * preallocated space that is adjacent to a previously
- * written preallocated space that gets merged when we
- * mark this preallocated space written. If we do not
- * have the adjacent extent in cache then when we copy
- * this extent it could end up being larger than our EM
- * thinks it is, which is a-ok, so just set len to 0.
- */
- len = 0;
- } else {
- len -= num_bytes;
- }
- start = key.offset + num_bytes;
- args->next_offset = start;
- search_start = start;
+ if (skip_csum)
+ return 0;
- if (path->slots[0] < nritems) {
- if (len)
- goto next_slot;
- break;
- }
+ /* block start is already adjusted for the file extent offset. */
+ ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
+ em->block_start + csum_offset,
+ em->block_start + csum_offset +
+ csum_len - 1, &ordered_sums, 0);
+ if (ret)
+ return ret;
- if (args->nr) {
- ret = copy_items(trans, inode, dst_path, args->src,
- args->start_slot, args->nr,
- LOG_INODE_ALL);
- if (ret)
- return ret;
- args->nr = 0;
- btrfs_release_path(path);
- }
+ while (!list_empty(&ordered_sums)) {
+ struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
+ struct btrfs_ordered_sum,
+ list);
+ if (!ret)
+ ret = btrfs_csum_file_blocks(trans, log, sums);
+ list_del(&sums->list);
+ kfree(sums);
}
- return 0;
+ return ret;
}
static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode,
- struct btrfs_path *path,
- struct btrfs_path *dst_path)
+ struct btrfs_path *path)
{
- struct log_args args;
struct extent_map *em, *n;
struct list_head extents;
struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
INIT_LIST_HEAD(&extents);
- memset(&args, 0, sizeof(args));
-
write_lock(&tree->lock);
test_gen = root->fs_info->last_trans_committed;
write_unlock(&tree->lock);
- /*
- * If the previous EM and the last extent we left off on aren't
- * sequential then we need to copy the items we have and redo
- * our search
- */
- if (args.nr && em->mod_start != args.next_offset) {
- ret = copy_items(trans, inode, dst_path, args.src,
- args.start_slot, args.nr,
- LOG_INODE_ALL);
- if (ret) {
- free_extent_map(em);
- write_lock(&tree->lock);
- continue;
- }
- btrfs_release_path(path);
- args.nr = 0;
- }
-
- ret = log_one_extent(trans, inode, root, em, path, dst_path, &args);
+ ret = log_one_extent(trans, inode, root, em, path);
free_extent_map(em);
write_lock(&tree->lock);
}
WARN_ON(!list_empty(&extents));
write_unlock(&tree->lock);
- if (!ret && args.nr)
- ret = copy_items(trans, inode, dst_path, args.src,
- args.start_slot, args.nr, LOG_INODE_ALL);
btrfs_release_path(path);
return ret;
}
log_extents:
if (fast_search) {
- btrfs_release_path(path);
btrfs_release_path(dst_path);
- ret = btrfs_log_changed_extents(trans, root, inode, path,
- dst_path);
+ ret = btrfs_log_changed_extents(trans, root, inode, dst_path);
if (ret) {
err = ret;
goto out_unlock;