{
struct extent_buffer *eb;
- rcu_read_lock();
- eb = rcu_dereference(root->node);
- extent_buffer_get(eb);
- rcu_read_unlock();
+ while (1) {
+ rcu_read_lock();
+ eb = rcu_dereference(root->node);
+
+ /*
+ * RCU really hurts here, we could free up the root node because
+ * it was cow'ed but we may not get the new root node yet so do
+ * the inc_not_zero dance and if it doesn't work then
+ * synchronize_rcu and try again.
+ */
+ if (atomic_inc_not_zero(&eb->refs)) {
+ rcu_read_unlock();
+ break;
+ }
+ rcu_read_unlock();
+ synchronize_rcu();
+ }
return eb;
}
}
if (unlock_orig)
btrfs_tree_unlock(buf);
- free_extent_buffer(buf);
+ free_extent_buffer_stale(buf);
btrfs_mark_buffer_dirty(cow);
*cow_ret = cow;
return 0;
root_sub_used(root, mid->len);
btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
/* once for the root ptr */
- free_extent_buffer(mid);
+ free_extent_buffer_stale(mid);
return 0;
}
if (btrfs_header_nritems(mid) >
ret = wret;
root_sub_used(root, right->len);
btrfs_free_tree_block(trans, root, right, 0, 1, 0);
- free_extent_buffer(right);
+ free_extent_buffer_stale(right);
right = NULL;
} else {
struct btrfs_disk_key right_key;
ret = wret;
root_sub_used(root, mid->len);
btrfs_free_tree_block(trans, root, mid, 0, 1, 0);
- free_extent_buffer(mid);
+ free_extent_buffer_stale(mid);
mid = NULL;
} else {
/* update the parent key to reflect our changes */
root_sub_used(root, leaf->len);
+ extent_buffer_get(leaf);
btrfs_free_tree_block(trans, root, leaf, 0, 1, 0);
+ free_extent_buffer_stale(leaf);
return 0;
}
/*
list_add(&eb->leak_list, &buffers);
spin_unlock_irqrestore(&leak_lock, flags);
#endif
+ spin_lock_init(&eb->refs_lock);
atomic_set(&eb->refs, 1);
atomic_set(&eb->pages_reading, 0);
*/
if (PagePrivate(page) &&
page->private == (unsigned long)eb) {
+ BUG_ON(PageDirty(page));
+ BUG_ON(PageWriteback(page));
/*
* We need to make sure we haven't be attached
* to a new eb.
if (!atomic_inc_not_zero(&exists->refs)) {
spin_unlock(&tree->buffer_lock);
radix_tree_preload_end();
- synchronize_rcu();
exists = NULL;
goto again;
}
goto free_eb;
}
/* add one reference for the tree */
+ spin_lock(&eb->refs_lock);
atomic_inc(&eb->refs);
+ set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags);
+ spin_unlock(&eb->refs_lock);
spin_unlock(&tree->buffer_lock);
radix_tree_preload_end();
return NULL;
}
+static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
+{
+ struct extent_buffer *eb =
+ container_of(head, struct extent_buffer, rcu_head);
+
+ __free_extent_buffer(eb);
+}
+
+static int extent_buffer_under_io(struct extent_buffer *eb,
+ struct page *locked_page)
+{
+ unsigned long num_pages, i;
+
+ num_pages = num_extent_pages(eb->start, eb->len);
+ for (i = 0; i < num_pages; i++) {
+ struct page *page = eb->pages[i];
+ int need_unlock = 0;
+
+ if (!page)
+ continue;
+
+ if (page != locked_page) {
+ if (!trylock_page(page))
+ return 1;
+ need_unlock = 1;
+ }
+
+ if (PageDirty(page) || PageWriteback(page)) {
+ if (need_unlock)
+ unlock_page(page);
+ return 1;
+ }
+ if (need_unlock)
+ unlock_page(page);
+ }
+
+ return 0;
+}
+
+/* Expects to have eb->eb_lock already held */
+static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
+{
+ WARN_ON(atomic_read(&eb->refs) == 0);
+ if (atomic_dec_and_test(&eb->refs)) {
+ struct extent_io_tree *tree = eb->tree;
+ int ret;
+
+ spin_unlock(&eb->refs_lock);
+
+ might_sleep_if(mask & __GFP_WAIT);
+ ret = clear_extent_bit(tree, eb->start,
+ eb->start + eb->len - 1, -1, 0, 0,
+ NULL, mask);
+ if (ret < 0) {
+ unsigned long num_pages, i;
+
+ num_pages = num_extent_pages(eb->start, eb->len);
+ /*
+ * We failed to clear the state bits which likely means
+ * ENOMEM, so just re-up the eb ref and continue, we
+ * will get freed later on via releasepage or something
+ * else and will be ok.
+ */
+ spin_lock(&eb->tree->mapping->private_lock);
+ spin_lock(&eb->refs_lock);
+ set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags);
+ atomic_inc(&eb->refs);
+
+ /*
+ * We may have started to reclaim the pages for a newly
+ * allocated eb, make sure we own all of them again.
+ */
+ for (i = 0; i < num_pages; i++) {
+ struct page *page = eb->pages[i];
+
+ if (!page) {
+ WARN_ON(1);
+ continue;
+ }
+
+ BUG_ON(!PagePrivate(page));
+ if (page->private != (unsigned long)eb) {
+ ClearPagePrivate(page);
+ page_cache_release(page);
+ attach_extent_buffer_page(eb, page);
+ }
+ }
+ spin_unlock(&eb->refs_lock);
+ spin_unlock(&eb->tree->mapping->private_lock);
+ return;
+ }
+
+ spin_lock(&tree->buffer_lock);
+ radix_tree_delete(&tree->buffer,
+ eb->start >> PAGE_CACHE_SHIFT);
+ spin_unlock(&tree->buffer_lock);
+
+ /* Should be safe to release our pages at this point */
+ btrfs_release_extent_buffer_page(eb, 0);
+
+ call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
+ return;
+ }
+ spin_unlock(&eb->refs_lock);
+}
+
void free_extent_buffer(struct extent_buffer *eb)
{
if (!eb)
return;
- if (!atomic_dec_and_test(&eb->refs))
+ spin_lock(&eb->refs_lock);
+ if (atomic_read(&eb->refs) == 2 &&
+ test_bit(EXTENT_BUFFER_STALE, &eb->bflags) &&
+ !extent_buffer_under_io(eb, NULL) &&
+ test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
+ atomic_dec(&eb->refs);
+
+ /*
+ * I know this is terrible, but it's temporary until we stop tracking
+ * the uptodate bits and such for the extent buffers.
+ */
+ release_extent_buffer(eb, GFP_ATOMIC);
+}
+
+void free_extent_buffer_stale(struct extent_buffer *eb)
+{
+ if (!eb)
return;
- WARN_ON(1);
+ spin_lock(&eb->refs_lock);
+ set_bit(EXTENT_BUFFER_STALE, &eb->bflags);
+
+ if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb, NULL) &&
+ test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
+ atomic_dec(&eb->refs);
+ release_extent_buffer(eb, GFP_NOFS);
}
int clear_extent_buffer_dirty(struct extent_io_tree *tree,
was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
num_pages = num_extent_pages(eb->start, eb->len);
+ WARN_ON(atomic_read(&eb->refs) == 0);
for (i = 0; i < num_pages; i++)
__set_page_dirty_nobuffers(extent_buffer_page(eb, i));
return was_dirty;
}
}
-static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
-{
- struct extent_buffer *eb =
- container_of(head, struct extent_buffer, rcu_head);
-
- __free_extent_buffer(eb);
-}
-
-int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
+int try_release_extent_buffer(struct page *page, gfp_t mask)
{
- u64 start = page_offset(page);
- struct extent_buffer *eb = (struct extent_buffer *)page->private;
- int ret = 1;
+ struct extent_buffer *eb;
- if (!PagePrivate(page) || !eb)
+ /*
+ * We need to make sure noboody is attaching this page to an eb right
+ * now.
+ */
+ spin_lock(&page->mapping->private_lock);
+ if (!PagePrivate(page)) {
+ spin_unlock(&page->mapping->private_lock);
return 1;
+ }
- spin_lock(&tree->buffer_lock);
- if (atomic_read(&eb->refs) > 1 ||
- test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
- ret = 0;
- goto out;
+ eb = (struct extent_buffer *)page->private;
+ BUG_ON(!eb);
+
+ /*
+ * This is a little awful but should be ok, we need to make sure that
+ * the eb doesn't disappear out from under us while we're looking at
+ * this page.
+ */
+ spin_lock(&eb->refs_lock);
+ if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb, page)) {
+ spin_unlock(&eb->refs_lock);
+ spin_unlock(&page->mapping->private_lock);
+ return 0;
}
+ spin_unlock(&page->mapping->private_lock);
+
+ if ((mask & GFP_NOFS) == GFP_NOFS)
+ mask = GFP_NOFS;
/*
- * set @eb->refs to 0 if it is already 1, and then release the @eb.
- * Or go back.
+ * If tree ref isn't set then we know the ref on this eb is a real ref,
+ * so just return, this page will likely be freed soon anyway.
*/
- if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) {
- ret = 0;
- goto out;
+ if (!test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
+ spin_unlock(&eb->refs_lock);
+ return 0;
}
- radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT);
- btrfs_release_extent_buffer_page(eb, 0);
-out:
- spin_unlock(&tree->buffer_lock);
+ release_extent_buffer(eb, mask);
- /* at this point we can safely release the extent buffer */
- if (atomic_read(&eb->refs) == 0)
- call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
- return ret;
+ return 1;
}