libceph: striping framework implementation
authorIlya Dryomov <idryomov@gmail.com>
Fri, 2 Feb 2018 14:23:22 +0000 (15:23 +0100)
committerIlya Dryomov <idryomov@gmail.com>
Mon, 2 Apr 2018 08:12:42 +0000 (10:12 +0200)
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
include/linux/ceph/striper.h [new file with mode: 0644]
net/ceph/Makefile
net/ceph/striper.c [new file with mode: 0644]

diff --git a/include/linux/ceph/striper.h b/include/linux/ceph/striper.h
new file mode 100644 (file)
index 0000000..74134ee
--- /dev/null
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CEPH_STRIPER_H
+#define _LINUX_CEPH_STRIPER_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+
+struct ceph_file_layout;
+
+struct ceph_object_extent {
+       struct list_head oe_item;
+       u64 oe_objno;
+       u64 oe_off;
+       u64 oe_len;
+};
+
+static inline void ceph_object_extent_init(struct ceph_object_extent *ex)
+{
+       INIT_LIST_HEAD(&ex->oe_item);
+}
+
+/*
+ * Called for each mapped stripe unit.
+ *
+ * @bytes: number of bytes mapped, i.e. the minimum of the full length
+ *         requested (file extent length) or the remainder of the stripe
+ *         unit within an object
+ */
+typedef void (*ceph_object_extent_fn_t)(struct ceph_object_extent *ex,
+                                       u32 bytes, void *arg);
+
+int ceph_file_to_extents(struct ceph_file_layout *l, u64 off, u64 len,
+                        struct list_head *object_extents,
+                        struct ceph_object_extent *alloc_fn(void *arg),
+                        void *alloc_arg,
+                        ceph_object_extent_fn_t action_fn,
+                        void *action_arg);
+int ceph_iterate_extents(struct ceph_file_layout *l, u64 off, u64 len,
+                        struct list_head *object_extents,
+                        ceph_object_extent_fn_t action_fn,
+                        void *action_arg);
+
+struct ceph_file_extent {
+       u64 fe_off;
+       u64 fe_len;
+};
+
+static inline u64 ceph_file_extents_bytes(struct ceph_file_extent *file_extents,
+                                         u32 num_file_extents)
+{
+       u64 bytes = 0;
+       u32 i;
+
+       for (i = 0; i < num_file_extents; i++)
+               bytes += file_extents[i].fe_len;
+
+       return bytes;
+}
+
+int ceph_extent_to_file(struct ceph_file_layout *l,
+                       u64 objno, u64 objoff, u64 objlen,
+                       struct ceph_file_extent **file_extents,
+                       u32 *num_file_extents);
+
+#endif
index b4bded4b53960faa19099dbb5c7fd70a31209d83..12bf49772d24b93c90c49ebb87e8770f41bb3918 100644 (file)
@@ -8,6 +8,7 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
        mon_client.o \
        cls_lock_client.o \
        osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \
+       striper.o \
        debugfs.o \
        auth.o auth_none.o \
        crypto.o armor.o \
diff --git a/net/ceph/striper.c b/net/ceph/striper.c
new file mode 100644 (file)
index 0000000..bc1e4de
--- /dev/null
@@ -0,0 +1,226 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/ceph/ceph_debug.h>
+
+#include <linux/math64.h>
+#include <linux/slab.h>
+
+#include <linux/ceph/osdmap.h>
+#include <linux/ceph/striper.h>
+#include <linux/ceph/types.h>
+
+/*
+ * Return the last extent with given objno (@object_extents is sorted
+ * by objno).  If not found, return NULL and set @add_pos so that the
+ * new extent can be added with list_add(add_pos, new_ex).
+ */
+static struct ceph_object_extent *
+lookup_last(struct list_head *object_extents, u64 objno,
+           struct list_head **add_pos)
+{
+       struct list_head *pos;
+
+       list_for_each_prev(pos, object_extents) {
+               struct ceph_object_extent *ex =
+                   list_entry(pos, typeof(*ex), oe_item);
+
+               if (ex->oe_objno == objno)
+                       return ex;
+
+               if (ex->oe_objno < objno)
+                       break;
+       }
+
+       *add_pos = pos;
+       return NULL;
+}
+
+static struct ceph_object_extent *
+lookup_containing(struct list_head *object_extents, u64 objno,
+                 u64 objoff, u32 xlen)
+{
+       struct ceph_object_extent *ex;
+
+       list_for_each_entry(ex, object_extents, oe_item) {
+               if (ex->oe_objno == objno &&
+                   ex->oe_off <= objoff &&
+                   ex->oe_off + ex->oe_len >= objoff + xlen) /* paranoia */
+                       return ex;
+
+               if (ex->oe_objno > objno)
+                       break;
+       }
+
+       return NULL;
+}
+
+/*
+ * Map a file extent to a sorted list of object extents.
+ *
+ * We want only one (or as few as possible) object extents per object.
+ * Adjacent object extents will be merged together, each returned object
+ * extent may reverse map to multiple different file extents.
+ *
+ * Call @alloc_fn for each new object extent and @action_fn for each
+ * mapped stripe unit, whether it was merged into an already allocated
+ * object extent or started a new object extent.
+ *
+ * Newly allocated object extents are added to @object_extents.
+ * To keep @object_extents sorted, successive calls to this function
+ * must map successive file extents (i.e. the list of file extents that
+ * are mapped using the same @object_extents must be sorted).
+ *
+ * The caller is responsible for @object_extents.
+ */
+int ceph_file_to_extents(struct ceph_file_layout *l, u64 off, u64 len,
+                        struct list_head *object_extents,
+                        struct ceph_object_extent *alloc_fn(void *arg),
+                        void *alloc_arg,
+                        ceph_object_extent_fn_t action_fn,
+                        void *action_arg)
+{
+       struct ceph_object_extent *last_ex, *ex;
+
+       while (len) {
+               struct list_head *add_pos = NULL;
+               u64 objno, objoff;
+               u32 xlen;
+
+               ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
+                                             &xlen);
+
+               last_ex = lookup_last(object_extents, objno, &add_pos);
+               if (!last_ex || last_ex->oe_off + last_ex->oe_len != objoff) {
+                       ex = alloc_fn(alloc_arg);
+                       if (!ex)
+                               return -ENOMEM;
+
+                       ex->oe_objno = objno;
+                       ex->oe_off = objoff;
+                       ex->oe_len = xlen;
+                       if (action_fn)
+                               action_fn(ex, xlen, action_arg);
+
+                       if (!last_ex)
+                               list_add(&ex->oe_item, add_pos);
+                       else
+                               list_add(&ex->oe_item, &last_ex->oe_item);
+               } else {
+                       last_ex->oe_len += xlen;
+                       if (action_fn)
+                               action_fn(last_ex, xlen, action_arg);
+               }
+
+               off += xlen;
+               len -= xlen;
+       }
+
+       for (last_ex = list_first_entry(object_extents, typeof(*ex), oe_item),
+            ex = list_next_entry(last_ex, oe_item);
+            &ex->oe_item != object_extents;
+            last_ex = ex, ex = list_next_entry(ex, oe_item)) {
+               if (last_ex->oe_objno > ex->oe_objno ||
+                   (last_ex->oe_objno == ex->oe_objno &&
+                    last_ex->oe_off + last_ex->oe_len >= ex->oe_off)) {
+                       WARN(1, "%s: object_extents list not sorted!\n",
+                            __func__);
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(ceph_file_to_extents);
+
+/*
+ * A stripped down, non-allocating version of ceph_file_to_extents(),
+ * for when @object_extents is already populated.
+ */
+int ceph_iterate_extents(struct ceph_file_layout *l, u64 off, u64 len,
+                        struct list_head *object_extents,
+                        ceph_object_extent_fn_t action_fn,
+                        void *action_arg)
+{
+       while (len) {
+               struct ceph_object_extent *ex;
+               u64 objno, objoff;
+               u32 xlen;
+
+               ceph_calc_file_object_mapping(l, off, len, &objno, &objoff,
+                                             &xlen);
+
+               ex = lookup_containing(object_extents, objno, objoff, xlen);
+               if (!ex) {
+                       WARN(1, "%s: objno %llu %llu~%u not found!\n",
+                            __func__, objno, objoff, xlen);
+                       return -EINVAL;
+               }
+
+               action_fn(ex, xlen, action_arg);
+
+               off += xlen;
+               len -= xlen;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(ceph_iterate_extents);
+
+/*
+ * Reverse map an object extent to a sorted list of file extents.
+ *
+ * On success, the caller is responsible for:
+ *
+ *     kfree(file_extents)
+ */
+int ceph_extent_to_file(struct ceph_file_layout *l,
+                       u64 objno, u64 objoff, u64 objlen,
+                       struct ceph_file_extent **file_extents,
+                       u32 *num_file_extents)
+{
+       u32 stripes_per_object = l->object_size / l->stripe_unit;
+       u64 blockno;    /* which su */
+       u32 blockoff;   /* offset into su */
+       u64 stripeno;   /* which stripe */
+       u32 stripepos;  /* which su in the stripe,
+                          which object in the object set */
+       u64 objsetno;   /* which object set */
+       u32 i = 0;
+
+       if (!objlen) {
+               *file_extents = NULL;
+               *num_file_extents = 0;
+               return 0;
+       }
+
+       *num_file_extents = DIV_ROUND_UP_ULL(objoff + objlen, l->stripe_unit) -
+                                    DIV_ROUND_DOWN_ULL(objoff, l->stripe_unit);
+       *file_extents = kmalloc_array(*num_file_extents, sizeof(**file_extents),
+                                     GFP_NOIO);
+       if (!*file_extents)
+               return -ENOMEM;
+
+       div_u64_rem(objoff, l->stripe_unit, &blockoff);
+       while (objlen) {
+               u64 off, len;
+
+               objsetno = div_u64_rem(objno, l->stripe_count, &stripepos);
+               stripeno = div_u64(objoff, l->stripe_unit) +
+                                               objsetno * stripes_per_object;
+               blockno = stripeno * l->stripe_count + stripepos;
+               off = blockno * l->stripe_unit + blockoff;
+               len = min_t(u64, objlen, l->stripe_unit - blockoff);
+
+               (*file_extents)[i].fe_off = off;
+               (*file_extents)[i].fe_len = len;
+
+               blockoff = 0;
+               objoff += len;
+               objlen -= len;
+               i++;
+       }
+
+       BUG_ON(i != *num_file_extents);
+       return 0;
+}
+EXPORT_SYMBOL(ceph_extent_to_file);