Btrfs: start of block group code

author Chris Mason <chris.mason@oracle.com>

Thu, 26 Apr 2007 20:46:15 +0000 (16:46 -0400)

committer David Woodhouse <dwmw2@hera.kernel.org>

Thu, 26 Apr 2007 20:46:15 +0000 (16:46 -0400)
author Chris Mason <chris.mason@oracle.com>
Thu, 26 Apr 2007 20:46:15 +0000 (16:46 -0400)
committer David Woodhouse <dwmw2@hera.kernel.org>
Thu, 26 Apr 2007 20:46:15 +0000 (16:46 -0400)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h

index 26d0cdd46f49b0a647109797caeaf5162b1da563..419917279e65aa730bc0a3dc67e008d9576131bd 100644 (file)
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -239,6 +239,19 @@ struct btrfs_device_item {
         __le64 device_id;
  } __attribute__ ((__packed__));
  
+/* tag for the radix tree of block groups in ram */
+#define BTRFS_BLOCK_GROUP_DIRTY 0
+#define BTRFS_BLOCK_GROUP_HINTS 8
+#define BTRFS_BLOCK_GROUP_SIZE (256 * 1024 * 1024)
+struct btrfs_block_group_item {
+       __le64 used;
+} __attribute__ ((__packed__));
+
+struct btrfs_block_group_cache {
+       struct btrfs_key key;
+       struct btrfs_block_group_item item;
+};
+
  struct crypto_hash;
  struct btrfs_fs_info {
         struct btrfs_root *extent_root;
@@ -249,6 +262,7 @@ struct btrfs_fs_info {
         struct radix_tree_root pending_del_radix;
         struct radix_tree_root pinned_radix;
         struct radix_tree_root dev_radix;
+       struct radix_tree_root block_group_radix;
  
         u64 extent_tree_insert[BTRFS_MAX_LEVEL * 3];
         int extent_tree_insert_nr;
@@ -301,49 +315,67 @@ struct btrfs_root {
   * info about object characteristics.  There is one for every file and dir in
   * the FS
   */
-#define BTRFS_INODE_ITEM_KEY   1
+#define BTRFS_INODE_ITEM_KEY           1
+
+/* reserve 2-15 close to the inode for later flexibility */
  
  /*
   * dir items are the name -> inode pointers in a directory.  There is one
   * for every name in a directory.
   */
-#define BTRFS_DIR_ITEM_KEY     2
-#define BTRFS_DIR_INDEX_KEY    3
+#define BTRFS_DIR_ITEM_KEY     16
+#define BTRFS_DIR_INDEX_KEY    17
  /*
- * inline data is file data that fits in the btree.
+ * extent data is for file data
   */
-#define BTRFS_INLINE_DATA_KEY  4
-/*
- * extent data is for data that can't fit in the btree.  It points to
- * a (hopefully) huge chunk of disk
- */
-#define BTRFS_EXTENT_DATA_KEY  5
+#define BTRFS_EXTENT_DATA_KEY  18
  /*
   * csum items have the checksums for data in the extents
   */
-#define BTRFS_CSUM_ITEM_KEY    6
+#define BTRFS_CSUM_ITEM_KEY    19
+
+/* reserve 20-31 for other file stuff */
  
  /*
   * root items point to tree roots.  There are typically in the root
   * tree used by the super block to find all the other trees
   */
-#define BTRFS_ROOT_ITEM_KEY    7
+#define BTRFS_ROOT_ITEM_KEY    32
  /*
   * extent items are in the extent map tree.  These record which blocks
   * are used, and how many references there are to each block
   */
-#define BTRFS_EXTENT_ITEM_KEY  8
+#define BTRFS_EXTENT_ITEM_KEY  33
+
+/*
+ * block groups give us hints into the extent allocation trees.  Which
+ * blocks are free etc etc
+ */
+#define BTRFS_BLOCK_GROUP_ITEM_KEY 34
  
  /*
   * dev items list the devices that make up the FS
   */
-#define BTRFS_DEV_ITEM_KEY     9
+#define BTRFS_DEV_ITEM_KEY     35
  
  /*
   * string items are for debugging.  They just store a short string of
   * data in the FS
   */
-#define BTRFS_STRING_ITEM_KEY  10
+#define BTRFS_STRING_ITEM_KEY  253
+
+
+static inline u64 btrfs_block_group_used(struct btrfs_block_group_item *bi)
+{
+       return le64_to_cpu(bi->used);
+}
+
+static inline void btrfs_set_block_group_used(struct
+                                                  btrfs_block_group_item *bi,
+                                                  u64 val)
+{
+       bi->used = cpu_to_le64(val);
+}
  
  static inline u64 btrfs_inode_generation(struct btrfs_inode_item *i)
  {
@@ -1037,6 +1069,10 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
  int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root,
                                 u64 blocknr, u64 num_blocks);
+int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
+                                   struct btrfs_root *root);
+int btrfs_free_block_groups(struct btrfs_fs_info *info);
+int btrfs_read_block_groups(struct btrfs_root *root);
  /* ctree.c */
  int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
                       *root, struct btrfs_path *path, u32 data_size);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c

index 956727f015a522a3daa137e8d60871d543b3c26b..1c27eb645510739550d153db4cb12fc5303b7cda 100644 (file)
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -529,6 +529,7 @@ struct btrfs_root *open_ctree(struct super_block *sb)
         init_bit_radix(&fs_info->pending_del_radix);
         INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
         INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS);
+       INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL);
         INIT_LIST_HEAD(&fs_info->trans_list);
         sb_set_blocksize(sb, 4096);
         fs_info->running_transaction = NULL;
@@ -613,6 +614,8 @@ struct btrfs_root *open_ctree(struct super_block *sb)
                                   BTRFS_EXTENT_TREE_OBJECTID, extent_root);
         BUG_ON(ret);
  
+       btrfs_read_block_groups(extent_root);
+
         fs_info->generation = btrfs_super_generation(disk_super) + 1;
         memset(&fs_info->kobj, 0, sizeof(fs_info->kobj));
         kobj_set_kset_s(fs_info, btrfs_subsys);
@@ -741,6 +744,7 @@ int close_ctree(struct btrfs_root *root)
         iput(fs_info->btree_inode);
  
         free_dev_radix(fs_info);
+       btrfs_free_block_groups(root->fs_info);
         del_fs_roots(fs_info);
         kfree(fs_info->extent_root);
         kfree(fs_info->tree_root);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

index e6fe3fd388194b7fe6aac8c7e42cab38d8048410..0bb4fc83cfd619454c2c6bc8e46b714983a72674 100644 (file)
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -127,6 +127,105 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
         return 0;
  }
  
+static int write_one_cache_group(struct btrfs_trans_handle *trans,
+                                struct btrfs_root *root,
+                                struct btrfs_path *path,
+                                struct btrfs_block_group_cache *cache)
+{
+       int ret;
+       int pending_ret;
+       struct btrfs_root *extent_root = root->fs_info->extent_root;
+       struct btrfs_block_group_item *bi;
+       struct btrfs_key ins;
+
+       find_free_extent(trans, extent_root, 0, 0, (u64)-1, &ins);
+       ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
+       BUG_ON(ret);
+       bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
+                           struct btrfs_block_group_item);
+       memcpy(bi, &cache->item, sizeof(*bi));
+       mark_buffer_dirty(path->nodes[0]);
+       btrfs_release_path(extent_root, path);
+
+       finish_current_insert(trans, extent_root);
+       pending_ret = del_pending_extents(trans, extent_root);
+       if (ret)
+               return ret;
+       if (pending_ret)
+               return pending_ret;
+       return 0;
+
+}
+
+int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
+                                   struct btrfs_root *root)
+{
+       struct btrfs_block_group_cache *cache[8];
+       int ret;
+       int err = 0;
+       int werr = 0;
+       struct radix_tree_root *radix = &root->fs_info->block_group_radix;
+       int i;
+       struct btrfs_path *path;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       while(1) {
+               ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
+                                                0, ARRAY_SIZE(cache),
+                                                BTRFS_BLOCK_GROUP_DIRTY);
+               if (!ret)
+                       break;
+               for (i = 0; i < ret; i++) {
+                       radix_tree_tag_clear(radix, cache[i]->key.objectid +
+                                            cache[i]->key.offset - 1,
+                                            BTRFS_BLOCK_GROUP_DIRTY);
+                       err = write_one_cache_group(trans, root,
+                                                   path, cache[i]);
+                       if (err)
+                               werr = err;
+               }
+       }
+       btrfs_free_path(path);
+       return werr;
+}
+
+static int update_block_group(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root,
+                             u64 blocknr, u64 num, int alloc)
+{
+       struct btrfs_block_group_cache *cache;
+       struct btrfs_fs_info *info = root->fs_info;
+       u64 total = num;
+       u64 old_val;
+       u64 block_in_group;
+       int ret;
+       while(total) {
+               ret = radix_tree_gang_lookup(&info->block_group_radix,
+                                            (void **)&cache, blocknr, 1);
+               if (!ret)
+                       return -1;
+               block_in_group = blocknr - cache->key.objectid;
+               WARN_ON(block_in_group > cache->key.offset);
+               radix_tree_tag_set(&info->block_group_radix,
+                                  cache->key.objectid + cache->key.offset - 1,
+                                  BTRFS_BLOCK_GROUP_DIRTY);
+
+               old_val = btrfs_block_group_used(&cache->item);
+               num = min(total, cache->key.offset - block_in_group);
+               total -= num;
+               blocknr += num;
+               if (alloc)
+                       old_val += num;
+               else
+                       old_val -= num;
+               btrfs_set_block_group_used(&cache->item, old_val);
+       }
+       return 0;
+}
+
  int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
                                btrfs_root *root)
  {
@@ -264,6 +363,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
                 ret = btrfs_del_item(trans, extent_root, path);
                 if (ret)
                         BUG();
+               ret = update_block_group(trans, root, blocknr, num_blocks, 0);
+               BUG_ON(ret);
         }
         btrfs_release_path(extent_root, path);
         btrfs_free_path(path);
@@ -365,21 +466,6 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
                 num_blocks = 1;
                 total_needed = min(level + 2, BTRFS_MAX_LEVEL) * 3;
         }
-       if (info->last_insert.objectid == 0 && search_end == (u64)-1) {
-               struct btrfs_disk_key *last_key;
-               btrfs_init_path(path);
-               ins->objectid = (u64)-1;
-               ins->offset = (u64)-1;
-               ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
-               if (ret < 0)
-                       goto error;
-               BUG_ON(ret == 0);
-               if (path->slots[0] > 0)
-                       path->slots[0]--;
-               l = btrfs_buffer_leaf(path->nodes[0]);
-               last_key = &l->items[path->slots[0]].key;
-               search_start = btrfs_disk_key_objectid(last_key);
-       }
         if (info->last_insert.objectid > search_start)
                 search_start = info->last_insert.objectid;
  
@@ -420,6 +506,8 @@ check_failed:
                         goto check_pending;
                 }
                 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
+               if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
+                       goto next;
                 if (key.objectid >= search_start) {
                         if (start_found) {
                                 if (last_block < search_start)
@@ -434,6 +522,7 @@ check_failed:
                 }
                 start_found = 1;
                 last_block = key.objectid + key.offset;
+next:
                 path->slots[0]++;
         }
         // FIXME -ENOSPC
@@ -498,7 +587,6 @@ error:
         btrfs_free_path(path);
         return ret;
  }
-
  /*
   * finds a free extent and does all the dirty work required for allocation
   * returns the key for the extent through ins, and a tree buffer for
@@ -532,6 +620,9 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
                 ins->objectid = info->extent_tree_prealloc[nr];
                 info->extent_tree_insert[info->extent_tree_insert_nr++] =
                         ins->objectid;
+               ret = update_block_group(trans, root,
+                                        ins->objectid, ins->offset, 1);
+               BUG_ON(ret);
                 return 0;
         }
         /* do the real allocation */
@@ -558,6 +649,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
                 return ret;
         if (pending_ret)
                 return pending_ret;
+       ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
         return 0;
  }
  
@@ -578,6 +670,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
                 BUG();
                 return NULL;
         }
+       BUG_ON(ret);
         buf = btrfs_find_create_tree_block(root, ins.objectid);
         set_buffer_uptodate(buf);
         return buf;
@@ -758,3 +851,82 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
         btrfs_free_path(path);
         return ret;
  }
+
+int btrfs_free_block_groups(struct btrfs_fs_info *info)
+{
+       int ret;
+       struct btrfs_block_group_cache *cache[8];
+       int i;
+
+       while(1) {
+               ret = radix_tree_gang_lookup(&info->block_group_radix,
+                                            (void **)cache, 0,
+                                            ARRAY_SIZE(cache));
+               if (!ret)
+                       break;
+               for (i = 0; i < ret; i++) {
+                       radix_tree_delete(&info->block_group_radix,
+                                         cache[i]->key.objectid +
+                                         cache[i]->key.offset - 1);
+                       kfree(cache[i]);
+               }
+       }
+       return 0;
+}
+
+int btrfs_read_block_groups(struct btrfs_root *root)
+{
+       struct btrfs_path *path;
+       int ret;
+       int err = 0;
+       struct btrfs_block_group_item *bi;
+       struct btrfs_block_group_cache *cache;
+       struct btrfs_key key;
+       struct btrfs_key found_key;
+       struct btrfs_leaf *leaf;
+       u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize;
+
+       root = root->fs_info->extent_root;
+       key.objectid = 0;
+       key.offset = group_size_blocks;
+       key.flags = 0;
+       btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       while(1) {
+               ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
+                                       &key, path, 0, 0);
+               if (ret != 0) {
+                       err = ret;
+                       break;
+               }
+               leaf = btrfs_buffer_leaf(path->nodes[0]);
+               btrfs_disk_key_to_cpu(&found_key,
+                                     &leaf->items[path->slots[0]].key);
+               cache = kmalloc(sizeof(*cache), GFP_NOFS);
+               if (!cache) {
+                       err = -1;
+                       break;
+               }
+               bi = btrfs_item_ptr(leaf, path->slots[0],
+                                   struct btrfs_block_group_item);
+               memcpy(&cache->item, bi, sizeof(*bi));
+               memcpy(&cache->key, &found_key, sizeof(found_key));
+               key.objectid = found_key.objectid + found_key.offset;
+               btrfs_release_path(root, path);
+               ret = radix_tree_insert(&root->fs_info->block_group_radix,
+                                       found_key.objectid +
+                                       found_key.offset - 1,
+                                       (void *)cache);
+               BUG_ON(ret);
+               if (key.objectid >=
+                   btrfs_super_total_blocks(root->fs_info->disk_super))
+                       break;
+       }
+
+       btrfs_free_path(path);
+       return 0;
+}
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c

index 1e7038b070aea3eda3b7c7f877f46d1ba4e798a1..2f95fc67a03629ef7350022ed1b128f7c4163699 100644 (file)
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -11,6 +11,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l)
         struct btrfs_root_item *ri;
         struct btrfs_dir_item *di;
         struct btrfs_inode_item *ii;
+       struct btrfs_block_group_item *bi;
         u32 type;
  
         printk("leaf %Lu total ptrs %d free space %d\n",
@@ -53,6 +54,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct btrfs_leaf *l)
                         printk("\t\textent data refs %u\n",
                                 btrfs_extent_refs(ei));
                         break;
+               case BTRFS_BLOCK_GROUP_ITEM_KEY:
+                       bi = btrfs_item_ptr(l, i,
+                                           struct btrfs_block_group_item);
+                       printk("\t\tblock group used %Lu\n",
+                              btrfs_block_group_used(bi));
+                       break;
                 case BTRFS_STRING_ITEM_KEY:
                         printk("\t\titem data %.*s\n", btrfs_item_size(item),
                                 btrfs_leaf_data(l) + btrfs_item_offset(item));
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c

index 5bbccbc7e3ccd5caf5e013529e874a8c222d5f21..edcebf79b04a0a8d0a7f4a13e9b16b827b0dd419 100644 (file)
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -377,7 +377,6 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
                 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
                         break;
                 if (btrfs_disk_key_type(found_key) != BTRFS_CSUM_ITEM_KEY &&
-                   btrfs_disk_key_type(found_key) != BTRFS_INLINE_DATA_KEY &&
                     btrfs_disk_key_type(found_key) != BTRFS_EXTENT_DATA_KEY)
                         break;
                 if (btrfs_disk_key_offset(found_key) < inode->i_size)
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c

index 078cb9cbf9dd562512880345afd4c90465aa6d6f..8a2545f6a5b6370438b64db9c267d7b84d1ed222 100644 (file)
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -125,6 +125,7 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
                 btrfs_set_super_device_root(fs_info->disk_super,
                                             bh_blocknr(dev_root->node));
         }
+       btrfs_write_dirty_block_groups(trans, extent_root);
         while(1) {
                 old_extent_block = btrfs_root_blocknr(&extent_root->root_item);
                 if (old_extent_block == bh_blocknr(extent_root->node))
@@ -135,6 +136,7 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
                                         &extent_root->root_key,
                                         &extent_root->root_item);
                 BUG_ON(ret);
+               btrfs_write_dirty_block_groups(trans, extent_root);
         }
         return 0;
  }
author	Chris Mason <chris.mason@oracle.com>
	Thu, 26 Apr 2007 20:46:15 +0000 (16:46 -0400)
committer	David Woodhouse <dwmw2@hera.kernel.org>
	Thu, 26 Apr 2007 20:46:15 +0000 (16:46 -0400)
fs/btrfs/ctree.h		patch \| blob \| history
fs/btrfs/disk-io.c		patch \| blob \| history
fs/btrfs/extent-tree.c		patch \| blob \| history
fs/btrfs/print-tree.c		patch \| blob \| history
fs/btrfs/super.c		patch \| blob \| history
fs/btrfs/transaction.c		patch \| blob \| history