Btrfs: Add an extent buffer LRU to reduce radix tree hits
authorChris Mason <chris.mason@oracle.com>
Mon, 15 Oct 2007 20:18:14 +0000 (16:18 -0400)
committerChris Mason <chris.mason@oracle.com>
Thu, 25 Sep 2008 15:03:56 +0000 (11:03 -0400)
Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/ctree.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_map.c
fs/btrfs/extent_map.h
fs/btrfs/transaction.c

index 08ddf1873ab1f0c55da018e116a5fae011253597..bef61ee883e1f1093b252dce81b78cc67d510252 100644 (file)
@@ -88,8 +88,6 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans,
        if (IS_ERR(cow))
                return PTR_ERR(cow);
 
-       cow->alloc_addr = (unsigned long)__builtin_return_address(0);
-
        copy_extent_buffer(cow, buf, 0, 0, cow->len);
        btrfs_set_header_bytenr(cow, cow->start);
        btrfs_set_header_generation(cow, trans->transid);
@@ -151,7 +149,6 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans,
        search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1);
        ret = __btrfs_cow_block(trans, root, buf, parent,
                                 parent_slot, cow_ret, search_start, 0);
-       (*cow_ret)->alloc_addr = (unsigned long)__builtin_return_address(0);
        return ret;
 }
 
index 8e606e6658aa7374f388a1933c088a7bf9eef5b7..fd7e6c182b9d3985a25f252aaddd71510cca8b59 100644 (file)
@@ -50,8 +50,6 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
        struct extent_buffer *eb;
        eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
                                bytenr, blocksize, GFP_NOFS);
-       if (eb)
-               eb->alloc_addr = (unsigned long)__builtin_return_address(0);
        return eb;
 }
 
@@ -63,7 +61,6 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
 
        eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
                                 bytenr, blocksize, GFP_NOFS);
-       eb->alloc_addr = (unsigned long)__builtin_return_address(0);
        return eb;
 }
 
@@ -234,7 +231,6 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
                return NULL;
        read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
                                 buf, 1);
-       buf->alloc_addr = (unsigned long)__builtin_return_address(0);
        return buf;
 }
 
@@ -638,6 +634,7 @@ int close_ctree(struct btrfs_root *root)
 
        btrfs_free_block_groups(root->fs_info);
        del_fs_roots(fs_info);
+       extent_map_tree_cleanup(&BTRFS_I(fs_info->btree_inode)->extent_tree);
        truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
        iput(fs_info->btree_inode);
        kfree(fs_info->extent_root);
@@ -647,20 +644,20 @@ int close_ctree(struct btrfs_root *root)
 
 int btrfs_buffer_uptodate(struct extent_buffer *buf)
 {
-       struct inode *btree_inode = buf->pages[0]->mapping->host;
+       struct inode *btree_inode = buf->last_page->mapping->host;
        return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf);
 }
 
 int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
 {
-       struct inode *btree_inode = buf->pages[0]->mapping->host;
+       struct inode *btree_inode = buf->last_page->mapping->host;
        return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree,
                                          buf);
 }
 
 void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
 {
-       struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
+       struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
        u64 transid = btrfs_header_generation(buf);
        struct inode *btree_inode = root->fs_info->btree_inode;
 
@@ -681,7 +678,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
 
 void btrfs_set_buffer_defrag(struct extent_buffer *buf)
 {
-       struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
+       struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
        struct inode *btree_inode = root->fs_info->btree_inode;
        set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start,
                        buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS);
@@ -689,7 +686,7 @@ void btrfs_set_buffer_defrag(struct extent_buffer *buf)
 
 void btrfs_set_buffer_defrag_done(struct extent_buffer *buf)
 {
-       struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
+       struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
        struct inode *btree_inode = root->fs_info->btree_inode;
        set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start,
                        buf->start + buf->len - 1, EXTENT_DEFRAG_DONE,
@@ -698,7 +695,7 @@ void btrfs_set_buffer_defrag_done(struct extent_buffer *buf)
 
 int btrfs_buffer_defrag(struct extent_buffer *buf)
 {
-       struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
+       struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
        struct inode *btree_inode = root->fs_info->btree_inode;
        return test_range_bit(&BTRFS_I(btree_inode)->extent_tree,
                     buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0);
@@ -706,7 +703,7 @@ int btrfs_buffer_defrag(struct extent_buffer *buf)
 
 int btrfs_buffer_defrag_done(struct extent_buffer *buf)
 {
-       struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
+       struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
        struct inode *btree_inode = root->fs_info->btree_inode;
        return test_range_bit(&BTRFS_I(btree_inode)->extent_tree,
                     buf->start, buf->start + buf->len - 1,
@@ -715,7 +712,7 @@ int btrfs_buffer_defrag_done(struct extent_buffer *buf)
 
 int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf)
 {
-       struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
+       struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
        struct inode *btree_inode = root->fs_info->btree_inode;
        return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree,
                     buf->start, buf->start + buf->len - 1,
@@ -724,7 +721,7 @@ int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf)
 
 int btrfs_clear_buffer_defrag(struct extent_buffer *buf)
 {
-       struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
+       struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
        struct inode *btree_inode = root->fs_info->btree_inode;
        return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree,
                     buf->start, buf->start + buf->len - 1,
@@ -733,7 +730,7 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf)
 
 int btrfs_read_buffer(struct extent_buffer *buf)
 {
-       struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
+       struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
        struct inode *btree_inode = root->fs_info->btree_inode;
        return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
                                        buf, 1);
index 72e6b274a45029c29c05458d0dbc58e4ae781af4..525fa845d613082bed862bf2efbb1e7f84bc5a22 100644 (file)
@@ -1176,7 +1176,6 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
                return ERR_PTR(-ENOMEM);
        }
        btrfs_set_buffer_uptodate(buf);
-       buf->alloc_addr = (unsigned long)__builtin_return_address(0);
        set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
                         buf->start + buf->len - 1, GFP_NOFS);
        btrfs_set_buffer_defrag(buf);
index e241699024da542bcb973b6a35a2e308a6de47a7..85b28a6a4e05d601cfafb3a257c01be897175c32 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/blkdev.h>
+#include <linux/swap.h>
 #include "extent_map.h"
 
 /* temporary define until extent_map moves out of btrfs */
@@ -20,14 +21,11 @@ static struct kmem_cache *extent_map_cache;
 static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
 
-static LIST_HEAD(extent_buffers);
 static LIST_HEAD(buffers);
 static LIST_HEAD(states);
 
-static spinlock_t extent_buffers_lock;
 static spinlock_t state_lock = SPIN_LOCK_UNLOCKED;
-static int nr_extent_buffers;
-#define MAX_EXTENT_BUFFER_CACHE 128
+#define BUFFER_LRU_MAX 64
 
 struct tree_entry {
        u64 start;
@@ -47,20 +45,12 @@ void __init extent_map_init(void)
        extent_buffer_cache = btrfs_cache_create("extent_buffers",
                                            sizeof(struct extent_buffer), 0,
                                            NULL);
-       spin_lock_init(&extent_buffers_lock);
 }
 
 void __exit extent_map_exit(void)
 {
-       struct extent_buffer *eb;
        struct extent_state *state;
 
-       while (!list_empty(&extent_buffers)) {
-               eb = list_entry(extent_buffers.next,
-                               struct extent_buffer, list);
-               list_del(&eb->list);
-               kmem_cache_free(extent_buffer_cache, eb);
-       }
        while (!list_empty(&states)) {
                state = list_entry(states.next, struct extent_state, list);
                printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs));
@@ -68,14 +58,6 @@ void __exit extent_map_exit(void)
                kmem_cache_free(extent_state_cache, state);
 
        }
-       while (!list_empty(&buffers)) {
-               eb = list_entry(buffers.next,
-                               struct extent_buffer, leak_list);
-               printk("buffer leak start %Lu len %lu return %lX\n", eb->start, eb->len, eb->alloc_addr);
-               list_del(&eb->leak_list);
-               kmem_cache_free(extent_buffer_cache, eb);
-       }
-
 
        if (extent_map_cache)
                kmem_cache_destroy(extent_map_cache);
@@ -92,10 +74,25 @@ void extent_map_tree_init(struct extent_map_tree *tree,
        tree->state.rb_node = NULL;
        tree->ops = NULL;
        rwlock_init(&tree->lock);
+       spin_lock_init(&tree->lru_lock);
        tree->mapping = mapping;
+       INIT_LIST_HEAD(&tree->buffer_lru);
+       tree->lru_size = 0;
 }
 EXPORT_SYMBOL(extent_map_tree_init);
 
+void extent_map_tree_cleanup(struct extent_map_tree *tree)
+{
+       struct extent_buffer *eb;
+       while(!list_empty(&tree->buffer_lru)) {
+               eb = list_entry(tree->buffer_lru.next, struct extent_buffer,
+                               lru);
+               list_del(&eb->lru);
+               free_extent_buffer(eb);
+       }
+}
+EXPORT_SYMBOL(extent_map_tree_cleanup);
+
 struct extent_map *alloc_extent_map(gfp_t mask)
 {
        struct extent_map *em;
@@ -1915,66 +1912,99 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
        return (em->block_start + start - em->start) >> inode->i_blkbits;
 }
 
-static struct extent_buffer *__alloc_extent_buffer(gfp_t mask)
+static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb)
 {
-       struct extent_buffer *eb = NULL;
-
-       spin_lock(&extent_buffers_lock);
-       if (!list_empty(&extent_buffers)) {
-               eb = list_entry(extent_buffers.next, struct extent_buffer,
-                               list);
-               list_del(&eb->list);
-               WARN_ON(nr_extent_buffers == 0);
-               nr_extent_buffers--;
-       }
-       spin_unlock(&extent_buffers_lock);
+       if (list_empty(&eb->lru)) {
+               extent_buffer_get(eb);
+               list_add(&eb->lru, &tree->buffer_lru);
+               tree->lru_size++;
+               if (tree->lru_size >= BUFFER_LRU_MAX) {
+                       struct extent_buffer *rm;
+                       rm = list_entry(tree->buffer_lru.prev,
+                                       struct extent_buffer, lru);
+                       tree->lru_size--;
+                       list_del(&rm->lru);
+                       free_extent_buffer(rm);
+               }
+       } else
+               list_move(&eb->lru, &tree->buffer_lru);
+       return 0;
+}
+static struct extent_buffer *find_lru(struct extent_map_tree *tree,
+                                     u64 start, unsigned long len)
+{
+       struct list_head *lru = &tree->buffer_lru;
+       struct list_head *cur = lru->next;
+       struct extent_buffer *eb;
 
-       if (eb) {
-               memset(eb, 0, sizeof(*eb));
-       } else {
-               eb = kmem_cache_zalloc(extent_buffer_cache, mask);
-       }
-       spin_lock(&extent_buffers_lock);
-       list_add(&eb->leak_list, &buffers);
-       spin_unlock(&extent_buffers_lock);
+       if (list_empty(lru))
+               return NULL;
 
-       return eb;
+       do {
+               eb = list_entry(cur, struct extent_buffer, lru);
+               if (eb->start == start && eb->len == len) {
+                       extent_buffer_get(eb);
+                       return eb;
+               }
+               cur = cur->next;
+       } while (cur != lru);
+       return NULL;
 }
 
-static void __free_extent_buffer(struct extent_buffer *eb)
+static inline unsigned long num_extent_pages(u64 start, u64 len)
 {
-
-       spin_lock(&extent_buffers_lock);
-       list_del_init(&eb->leak_list);
-       spin_unlock(&extent_buffers_lock);
-
-       if (nr_extent_buffers >= MAX_EXTENT_BUFFER_CACHE) {
-               kmem_cache_free(extent_buffer_cache, eb);
-       } else {
-               spin_lock(&extent_buffers_lock);
-               list_add(&eb->list, &extent_buffers);
-               nr_extent_buffers++;
-               spin_unlock(&extent_buffers_lock);
-       }
+       return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
+               (start >> PAGE_CACHE_SHIFT);
 }
 
-static inline struct page *extent_buffer_page(struct extent_buffer *eb, int i)
+static inline struct page *extent_buffer_page(struct extent_buffer *eb,
+                                             unsigned long i)
 {
        struct page *p;
 
-       if (i < EXTENT_INLINE_PAGES)
-               return eb->pages[i];
+       if (i == 0)
+               return eb->last_page;
        i += eb->start >> PAGE_CACHE_SHIFT;
-       p = find_get_page(eb->pages[0]->mapping, i);
+       p = find_get_page(eb->last_page->mapping, i);
        page_cache_release(p);
        return p;
 }
 
-static inline unsigned long num_extent_pages(u64 start, u64 len)
+static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree,
+                                                  u64 start,
+                                                  unsigned long len,
+                                                  gfp_t mask)
 {
-       return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
-               (start >> PAGE_CACHE_SHIFT);
+       struct extent_buffer *eb = NULL;
+
+       spin_lock(&tree->lru_lock);
+       eb = find_lru(tree, start, len);
+       if (eb)
+               goto lru_add;
+       spin_unlock(&tree->lru_lock);
+
+       if (eb) {
+               memset(eb, 0, sizeof(*eb));
+       } else {
+               eb = kmem_cache_zalloc(extent_buffer_cache, mask);
+       }
+       INIT_LIST_HEAD(&eb->lru);
+       eb->start = start;
+       eb->len = len;
+       atomic_set(&eb->refs, 1);
+
+       spin_lock(&tree->lru_lock);
+lru_add:
+       add_lru(tree, eb);
+       spin_unlock(&tree->lru_lock);
+       return eb;
+}
+
+static void __free_extent_buffer(struct extent_buffer *eb)
+{
+       kmem_cache_free(extent_buffer_cache, eb);
 }
+
 struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
                                          u64 start, unsigned long len,
                                          gfp_t mask)
@@ -1987,14 +2017,12 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
        struct address_space *mapping = tree->mapping;
        int uptodate = 0;
 
-       eb = __alloc_extent_buffer(mask);
+       eb = __alloc_extent_buffer(tree, start, len, mask);
        if (!eb || IS_ERR(eb))
                return NULL;
 
-       eb->alloc_addr = (unsigned long)__builtin_return_address(0);
-       eb->start = start;
-       eb->len = len;
-       atomic_set(&eb->refs, 1);
+       if (eb->flags & EXTENT_BUFFER_FILLED)
+               return eb;
 
        for (i = 0; i < num_pages; i++, index++) {
                p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
@@ -2008,14 +2036,15 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
                        goto fail;
                }
                set_page_extent_mapped(p);
-               if (i < EXTENT_INLINE_PAGES)
-                       eb->pages[i] = p;
+               if (i == 0)
+                       eb->last_page = p;
                if (!PageUptodate(p))
                        uptodate = 0;
                unlock_page(p);
        }
        if (uptodate)
                eb->flags |= EXTENT_UPTODATE;
+       eb->flags |= EXTENT_BUFFER_FILLED;
        return eb;
 fail:
        free_extent_buffer(eb);
@@ -2035,14 +2064,12 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree,
        struct address_space *mapping = tree->mapping;
        int uptodate = 1;
 
-       eb = __alloc_extent_buffer(mask);
+       eb = __alloc_extent_buffer(tree, start, len, mask);
        if (!eb || IS_ERR(eb))
                return NULL;
 
-       eb->alloc_addr = (unsigned long)__builtin_return_address(0);
-       eb->start = start;
-       eb->len = len;
-       atomic_set(&eb->refs, 1);
+       if (eb->flags & EXTENT_BUFFER_FILLED)
+               return eb;
 
        for (i = 0; i < num_pages; i++, index++) {
                p = find_lock_page(mapping, index);
@@ -2055,14 +2082,15 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree,
                        goto fail;
                }
                set_page_extent_mapped(p);
-               if (i < EXTENT_INLINE_PAGES)
-                       eb->pages[i] = p;
+               if (i == 0)
+                       eb->last_page = p;
                if (!PageUptodate(p))
                        uptodate = 0;
                unlock_page(p);
        }
        if (uptodate)
                eb->flags |= EXTENT_UPTODATE;
+       eb->flags |= EXTENT_BUFFER_FILLED;
        return eb;
 fail:
        free_extent_buffer(eb);
@@ -2231,7 +2259,8 @@ int read_extent_buffer_pages(struct extent_map_tree *tree,
                        ret = -EIO;
                }
        }
-       eb->flags |= EXTENT_UPTODATE;
+       if (!ret)
+               eb->flags |= EXTENT_UPTODATE;
        return ret;
 }
 EXPORT_SYMBOL(read_extent_buffer_pages);
index 8409b5cbeda7ef83fc3248fc501b7341dc6e7f1b..52a8b9394fc6e6f458a3483cc36eca385fcc1c7a 100644 (file)
@@ -16,6 +16,7 @@
 #define EXTENT_DELALLOC (1 << 5)
 #define EXTENT_DEFRAG (1 << 6)
 #define EXTENT_DEFRAG_DONE (1 << 7)
+#define EXTENT_BUFFER_FILLED (1 << 8)
 #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
 
 
@@ -33,6 +34,9 @@ struct extent_map_tree {
        struct address_space *mapping;
        rwlock_t lock;
        struct extent_map_ops *ops;
+       spinlock_t lru_lock;
+       struct list_head buffer_lru;
+       int lru_size;
 };
 
 /* note, this must start with the same fields as fs/extent_map.c:tree_entry */
@@ -64,20 +68,17 @@ struct extent_state {
        struct list_head list;
 };
 
-#define EXTENT_INLINE_PAGES 32
 struct extent_buffer {
        u64 start;
        unsigned long len;
-       atomic_t refs;
-       int flags;
-       struct list_head list;
-       struct list_head leak_list;
-       unsigned long alloc_addr;
        char *map_token;
        char *kaddr;
        unsigned long map_start;
        unsigned long map_len;
-       struct page *pages[EXTENT_INLINE_PAGES];
+       struct page *last_page;
+       struct list_head lru;
+       atomic_t refs;
+       int flags;
 };
 
 typedef struct extent_map *(get_extent_t)(struct inode *inode,
@@ -88,6 +89,7 @@ typedef struct extent_map *(get_extent_t)(struct inode *inode,
 
 void extent_map_tree_init(struct extent_map_tree *tree,
                          struct address_space *mapping, gfp_t mask);
+void extent_map_tree_cleanup(struct extent_map_tree *tree);
 struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
                                         u64 start, u64 end);
 int add_extent_mapping(struct extent_map_tree *tree,
index 87456ab742757bc5643b2a3d24bb46e2f6a272ff..67e4aca36a626729bd4adf49b4911e1bc8b82595 100644 (file)
@@ -443,8 +443,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root,
                        BUG_ON(ret);
                        mutex_unlock(&tree_root->fs_info->fs_mutex);
                        btrfs_btree_balance_dirty(tree_root, nr);
-                       schedule();
-
+                       cond_resched();
                        mutex_lock(&tree_root->fs_info->fs_mutex);
                }
                BUG_ON(ret);
@@ -471,7 +470,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root,
                mutex_unlock(&tree_root->fs_info->fs_mutex);
 
                btrfs_btree_balance_dirty(tree_root, nr);
-               schedule();
+               cond_resched();
        }
        return ret;
 }