bcache: Add struct bset_sort_state
authorKent Overstreet <kmo@daterainc.com>
Wed, 11 Sep 2013 05:53:34 +0000 (22:53 -0700)
committerKent Overstreet <kmo@daterainc.com>
Wed, 8 Jan 2014 21:05:12 +0000 (13:05 -0800)
More disentangling bset.c from the rest of the bcache code - soon, the
sorting routines won't have any dependencies on any outside structs.

Signed-off-by: Kent Overstreet <kmo@daterainc.com>
drivers/md/bcache/bcache.h
drivers/md/bcache/bset.c
drivers/md/bcache/bset.h
drivers/md/bcache/btree.c
drivers/md/bcache/super.c
drivers/md/bcache/sysfs.c

index 2b46c86ac440f9dace68863c19a2e2f9e7654f41..7bd4c93475e721ad92f92ee2655076ee61d97246 100644 (file)
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
+#include "bset.h"
 #include "util.h"
 #include "closure.h"
 
@@ -645,8 +646,7 @@ struct cache_set {
         */
        mempool_t               *fill_iter;
 
-       mempool_t               *sort_pool;
-       unsigned                sort_crit_factor;
+       struct bset_sort_state  sort;
 
        /* List of buckets we're currently writing data to */
        struct list_head        data_buckets;
@@ -662,7 +662,6 @@ struct cache_set {
        unsigned                congested_read_threshold_us;
        unsigned                congested_write_threshold_us;
 
-       struct time_stats       sort_time;
        struct time_stats       btree_gc_time;
        struct time_stats       btree_split_time;
        struct time_stats       btree_read_time;
index 9e3a53d87de02bb631d36605ae1e959cbce0a113..9d9c2edda760ede6fa8bfb75515560de510d05d0 100644 (file)
@@ -952,6 +952,26 @@ struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
 
 /* Mergesort */
 
+void bch_bset_sort_state_free(struct bset_sort_state *state)
+{
+       if (state->pool)
+               mempool_destroy(state->pool);
+}
+
+int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order)
+{
+       spin_lock_init(&state->time.lock);
+
+       state->page_order = page_order;
+       state->crit_factor = int_sqrt(1 << page_order);
+
+       state->pool = mempool_create_page_pool(1, page_order);
+       if (!state->pool)
+               return -ENOMEM;
+
+       return 0;
+}
+
 static void sort_key_next(struct btree_iter *iter,
                          struct btree_iter_set *i)
 {
@@ -1077,22 +1097,24 @@ static void btree_mergesort(struct btree *b, struct bset *out,
 }
 
 static void __btree_sort(struct btree *b, struct btree_iter *iter,
-                        unsigned start, unsigned order, bool fixup)
+                        unsigned start, unsigned order, bool fixup,
+                        struct bset_sort_state *state)
 {
        uint64_t start_time;
-       bool remove_stale = !b->written;
        bool used_mempool = false;
        struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO,
                                                     order);
        if (!out) {
-               out = page_address(mempool_alloc(b->c->sort_pool, GFP_NOIO));
+               BUG_ON(order > state->page_order);
+
+               out = page_address(mempool_alloc(state->pool, GFP_NOIO));
                used_mempool = true;
                order = ilog2(bucket_pages(b->c));
        }
 
        start_time = local_clock();
 
-       btree_mergesort(b, out, iter, fixup, remove_stale);
+       btree_mergesort(b, out, iter, fixup, false);
        b->nsets = start;
 
        if (!start && order == b->page_order) {
@@ -1113,18 +1135,18 @@ static void __btree_sort(struct btree *b, struct btree_iter *iter,
        }
 
        if (used_mempool)
-               mempool_free(virt_to_page(out), b->c->sort_pool);
+               mempool_free(virt_to_page(out), state->pool);
        else
                free_pages((unsigned long) out, order);
 
-       if (b->written)
-               bset_build_written_tree(b);
+       bset_build_written_tree(b);
 
        if (!start)
-               bch_time_stats_update(&b->c->sort_time, start_time);
+               bch_time_stats_update(&state->time, start_time);
 }
 
-void bch_btree_sort_partial(struct btree *b, unsigned start)
+void bch_btree_sort_partial(struct btree *b, unsigned start,
+                           struct bset_sort_state *state)
 {
        size_t order = b->page_order, keys = 0;
        struct btree_iter iter;
@@ -1148,18 +1170,19 @@ void bch_btree_sort_partial(struct btree *b, unsigned start)
                        order = ilog2(order);
        }
 
-       __btree_sort(b, &iter, start, order, false);
+       __btree_sort(b, &iter, start, order, false, state);
 
        EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize);
 }
 
-void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter)
+void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter,
+                                   struct bset_sort_state *state)
 {
-       BUG_ON(!b->written);
-       __btree_sort(b, iter, 0, b->page_order, true);
+       __btree_sort(b, iter, 0, b->page_order, true, state);
 }
 
-void bch_btree_sort_into(struct btree *b, struct btree *new)
+void bch_btree_sort_into(struct btree *b, struct btree *new,
+                        struct bset_sort_state *state)
 {
        uint64_t start_time = local_clock();
 
@@ -1168,15 +1191,14 @@ void bch_btree_sort_into(struct btree *b, struct btree *new)
 
        btree_mergesort(b, new->sets->data, &iter, false, true);
 
-       bch_time_stats_update(&b->c->sort_time, start_time);
+       bch_time_stats_update(&state->time, start_time);
 
-       bkey_copy_key(&new->key, &b->key);
        new->sets->size = 0;
 }
 
 #define SORT_CRIT      (4096 / sizeof(uint64_t))
 
-void bch_btree_sort_lazy(struct btree *b)
+void bch_btree_sort_lazy(struct btree *b, struct bset_sort_state *state)
 {
        unsigned crit = SORT_CRIT;
        int i;
@@ -1185,24 +1207,18 @@ void bch_btree_sort_lazy(struct btree *b)
        if (!b->nsets)
                goto out;
 
-       /* If not a leaf node, always sort */
-       if (b->level) {
-               bch_btree_sort(b);
-               return;
-       }
-
        for (i = b->nsets - 1; i >= 0; --i) {
-               crit *= b->c->sort_crit_factor;
+               crit *= state->crit_factor;
 
                if (b->sets[i].data->keys < crit) {
-                       bch_btree_sort_partial(b, i);
+                       bch_btree_sort_partial(b, i, state);
                        return;
                }
        }
 
        /* Sort if we'd overflow */
        if (b->nsets + 1 == MAX_BSETS) {
-               bch_btree_sort(b);
+               bch_btree_sort(b, state);
                return;
        }
 
index 91bcbdb04085a3969e7ca7b27d61a33c3fea428b..4f60c21c7a38e6dcaa258caa18e2afc22f13d758 100644 (file)
@@ -3,6 +3,8 @@
 
 #include <linux/slab.h>
 
+#include "util.h" /* for time_stats */
+
 /*
  * BKEYS:
  *
@@ -190,6 +192,33 @@ struct bset_tree {
        struct bset     *data;
 };
 
+/* Sorting */
+
+struct bset_sort_state {
+       mempool_t               *pool;
+
+       unsigned                page_order;
+       unsigned                crit_factor;
+
+       struct time_stats       time;
+};
+
+void bch_bset_sort_state_free(struct bset_sort_state *);
+int bch_bset_sort_state_init(struct bset_sort_state *, unsigned);
+void bch_btree_sort_lazy(struct btree *, struct bset_sort_state *);
+void bch_btree_sort_into(struct btree *, struct btree *,
+                        struct bset_sort_state *);
+void bch_btree_sort_and_fix_extents(struct btree *, struct btree_iter *,
+                                   struct bset_sort_state *);
+void bch_btree_sort_partial(struct btree *, unsigned,
+                           struct bset_sort_state *);
+
+static inline void bch_btree_sort(struct btree *b,
+                                 struct bset_sort_state *state)
+{
+       bch_btree_sort_partial(b, 0, state);
+}
+
 /* Keylists */
 
 struct keylist {
@@ -374,15 +403,6 @@ static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t,
 })
 
 bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *);
-void bch_btree_sort_lazy(struct btree *);
-void bch_btree_sort_into(struct btree *, struct btree *);
-void bch_btree_sort_and_fix_extents(struct btree *, struct btree_iter *);
-void bch_btree_sort_partial(struct btree *, unsigned);
-
-static inline void bch_btree_sort(struct btree *b)
-{
-       bch_btree_sort_partial(b, 0);
-}
 
 int bch_bset_print_stats(struct cache_set *, char *);
 
index e1e36e761724c9618042434e95e08bebfef815f9..78ba0b67ac16782a42b3c5a909f3d46cbc1315f4 100644 (file)
@@ -263,7 +263,7 @@ void bch_btree_node_read_done(struct btree *b)
                if (i->seq == b->sets[0].data->seq)
                        goto err;
 
-       bch_btree_sort_and_fix_extents(b, iter);
+       bch_btree_sort_and_fix_extents(b, iter, &b->c->sort);
 
        i = b->sets[0].data;
        err = "short btree key";
@@ -476,7 +476,11 @@ void bch_btree_node_write(struct btree *b, struct closure *parent)
        atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size,
                        &PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written);
 
-       bch_btree_sort_lazy(b);
+       /* If not a leaf node, always sort */
+       if (b->level && b->nsets)
+               bch_btree_sort(b, &b->c->sort);
+       else
+               bch_btree_sort_lazy(b, &b->c->sort);
 
        /*
         * do verify if there was more than one set initially (i.e. we did a
@@ -1125,8 +1129,10 @@ err:
 static struct btree *btree_node_alloc_replacement(struct btree *b, bool wait)
 {
        struct btree *n = bch_btree_node_alloc(b->c, b->level, wait);
-       if (!IS_ERR_OR_NULL(n))
-               bch_btree_sort_into(b, n);
+       if (!IS_ERR_OR_NULL(n)) {
+               bch_btree_sort_into(b, n, &b->c->sort);
+               bkey_copy_key(&n->key, &b->key);
+       }
 
        return n;
 }
index d05e75627714e6ee083b006a8188f471aa7140b5..1fc8165a5c017726730fc65fede91b5be1ba28b6 100644 (file)
@@ -1351,6 +1351,7 @@ static void cache_set_free(struct closure *cl)
                if (ca)
                        kobject_put(&ca->kobj);
 
+       bch_bset_sort_state_free(&c->sort);
        free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
 
        if (c->bio_split)
@@ -1481,15 +1482,12 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
                c->btree_pages = max_t(int, c->btree_pages / 4,
                                       BTREE_MAX_PAGES);
 
-       c->sort_crit_factor = int_sqrt(c->btree_pages);
-
        sema_init(&c->sb_write_mutex, 1);
        mutex_init(&c->bucket_lock);
        init_waitqueue_head(&c->try_wait);
        init_waitqueue_head(&c->bucket_wait);
        sema_init(&c->uuid_write_mutex, 1);
 
-       spin_lock_init(&c->sort_time.lock);
        spin_lock_init(&c->btree_gc_time.lock);
        spin_lock_init(&c->btree_split_time.lock);
        spin_lock_init(&c->btree_read_time.lock);
@@ -1517,12 +1515,11 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
                                bucket_pages(c))) ||
            !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
            !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
-           !(c->sort_pool = mempool_create_page_pool(1,
-                               ilog2(bucket_pages(c)))) ||
            !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
            bch_journal_alloc(c) ||
            bch_btree_cache_alloc(c) ||
-           bch_open_buckets_alloc(c))
+           bch_open_buckets_alloc(c) ||
+           bch_bset_sort_state_init(&c->sort, ilog2(c->btree_pages)))
                goto err;
 
        c->congested_read_threshold_us  = 2000;
index d5dd282b176f1dd01a8ee4c7ca34f3584bd70962..206c80fb27c134d4431e4db851588ffc51ad022d 100644 (file)
@@ -490,7 +490,7 @@ lock_root:
 
        sysfs_print_time_stats(&c->btree_gc_time,       btree_gc, sec, ms);
        sysfs_print_time_stats(&c->btree_split_time,    btree_split, sec, us);
-       sysfs_print_time_stats(&c->sort_time,           btree_sort, ms, us);
+       sysfs_print_time_stats(&c->sort.time,           btree_sort, ms, us);
        sysfs_print_time_stats(&c->btree_read_time,     btree_read, ms, us);
        sysfs_print_time_stats(&c->try_harder_time,     try_harder, ms, us);