staging: erofs: rename source files for better understanding
authorGao Xiang <gaoxiang25@huawei.com>
Wed, 31 Jul 2019 15:57:32 +0000 (23:57 +0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 2 Aug 2019 11:52:03 +0000 (13:52 +0200)
Keep in line with erofs-outofstaging patchset as well, see
https://lore.kernel.org/linux-fsdevel/20190725095658.155779-1-gaoxiang25@huawei.com/

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Gao Xiang <gaoxiang25@huawei.com>
Link: https://lore.kernel.org/r/20190731155752.210602-3-gaoxiang25@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/staging/erofs/Makefile
drivers/staging/erofs/include/linux/tagptr.h [deleted file]
drivers/staging/erofs/tagptr.h [new file with mode: 0644]
drivers/staging/erofs/unzip_pagevec.h [deleted file]
drivers/staging/erofs/unzip_vle.c [deleted file]
drivers/staging/erofs/unzip_vle.h [deleted file]
drivers/staging/erofs/zdata.c [new file with mode: 0644]
drivers/staging/erofs/zdata.h [new file with mode: 0644]
drivers/staging/erofs/zpvec.h [new file with mode: 0644]

index 3ade87e78d064455a6f6a90e4cf5e5b0239a904c..5cdae21cb5afa6ffc2d1c506080980da3c257378 100644 (file)
@@ -9,5 +9,5 @@ obj-$(CONFIG_EROFS_FS) += erofs.o
 ccflags-y += -I $(srctree)/$(src)/include
 erofs-objs := super.o inode.o data.o namei.o dir.o utils.o
 erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
-erofs-$(CONFIG_EROFS_FS_ZIP) += unzip_vle.o zmap.o decompressor.o
+erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o
 
diff --git a/drivers/staging/erofs/include/linux/tagptr.h b/drivers/staging/erofs/include/linux/tagptr.h
deleted file mode 100644 (file)
index b3f1377..0000000
+++ /dev/null
@@ -1,110 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * A tagged pointer implementation
- *
- * Copyright (C) 2018 Gao Xiang <gaoxiang25@huawei.com>
- */
-#ifndef _LINUX_TAGPTR_H
-#define _LINUX_TAGPTR_H
-
-#include <linux/types.h>
-#include <linux/build_bug.h>
-
-/*
- * the name of tagged pointer types are tagptr{1, 2, 3...}_t
- * avoid directly using the internal structs __tagptr{1, 2, 3...}
- */
-#define __MAKE_TAGPTR(n) \
-typedef struct __tagptr##n {   \
-       uintptr_t v;    \
-} tagptr##n##_t;
-
-__MAKE_TAGPTR(1)
-__MAKE_TAGPTR(2)
-__MAKE_TAGPTR(3)
-__MAKE_TAGPTR(4)
-
-#undef __MAKE_TAGPTR
-
-extern void __compiletime_error("bad tagptr tags")
-       __bad_tagptr_tags(void);
-
-extern void __compiletime_error("bad tagptr type")
-       __bad_tagptr_type(void);
-
-/* fix the broken usage of "#define tagptr2_t tagptr3_t" by users */
-#define __tagptr_mask_1(ptr, n)        \
-       __builtin_types_compatible_p(typeof(ptr), struct __tagptr##n) ? \
-               (1UL << (n)) - 1 :
-
-#define __tagptr_mask(ptr)     (\
-       __tagptr_mask_1(ptr, 1) ( \
-       __tagptr_mask_1(ptr, 2) ( \
-       __tagptr_mask_1(ptr, 3) ( \
-       __tagptr_mask_1(ptr, 4) ( \
-       __bad_tagptr_type(), 0)))))
-
-/* generate a tagged pointer from a raw value */
-#define tagptr_init(type, val) \
-       ((typeof(type)){ .v = (uintptr_t)(val) })
-
-/*
- * directly cast a tagged pointer to the native pointer type, which
- * could be used for backward compatibility of existing code.
- */
-#define tagptr_cast_ptr(tptr) ((void *)(tptr).v)
-
-/* encode tagged pointers */
-#define tagptr_fold(type, ptr, _tags) ({ \
-       const typeof(_tags) tags = (_tags); \
-       if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(type))) \
-               __bad_tagptr_tags(); \
-tagptr_init(type, (uintptr_t)(ptr) | tags); })
-
-/* decode tagged pointers */
-#define tagptr_unfold_ptr(tptr) \
-       ((void *)((tptr).v & ~__tagptr_mask(tptr)))
-
-#define tagptr_unfold_tags(tptr) \
-       ((tptr).v & __tagptr_mask(tptr))
-
-/* operations for the tagger pointer */
-#define tagptr_eq(_tptr1, _tptr2) ({ \
-       typeof(_tptr1) tptr1 = (_tptr1); \
-       typeof(_tptr2) tptr2 = (_tptr2); \
-       (void)(&tptr1 == &tptr2); \
-(tptr1).v == (tptr2).v; })
-
-/* lock-free CAS operation */
-#define tagptr_cmpxchg(_ptptr, _o, _n) ({ \
-       typeof(_ptptr) ptptr = (_ptptr); \
-       typeof(_o) o = (_o); \
-       typeof(_n) n = (_n); \
-       (void)(&o == &n); \
-       (void)(&o == ptptr); \
-tagptr_init(o, cmpxchg(&ptptr->v, o.v, n.v)); })
-
-/* wrap WRITE_ONCE if atomic update is needed */
-#define tagptr_replace_tags(_ptptr, tags) ({ \
-       typeof(_ptptr) ptptr = (_ptptr); \
-       *ptptr = tagptr_fold(*ptptr, tagptr_unfold_ptr(*ptptr), tags); \
-*ptptr; })
-
-#define tagptr_set_tags(_ptptr, _tags) ({ \
-       typeof(_ptptr) ptptr = (_ptptr); \
-       const typeof(_tags) tags = (_tags); \
-       if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(*ptptr))) \
-               __bad_tagptr_tags(); \
-       ptptr->v |= tags; \
-*ptptr; })
-
-#define tagptr_clear_tags(_ptptr, _tags) ({ \
-       typeof(_ptptr) ptptr = (_ptptr); \
-       const typeof(_tags) tags = (_tags); \
-       if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(*ptptr))) \
-               __bad_tagptr_tags(); \
-       ptptr->v &= ~tags; \
-*ptptr; })
-
-#endif
-
diff --git a/drivers/staging/erofs/tagptr.h b/drivers/staging/erofs/tagptr.h
new file mode 100644 (file)
index 0000000..a72897c
--- /dev/null
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * A tagged pointer implementation
+ *
+ * Copyright (C) 2018 Gao Xiang <gaoxiang25@huawei.com>
+ */
+#ifndef __EROFS_FS_TAGPTR_H
+#define __EROFS_FS_TAGPTR_H
+
+#include <linux/types.h>
+#include <linux/build_bug.h>
+
+/*
+ * the name of tagged pointer types are tagptr{1, 2, 3...}_t
+ * avoid directly using the internal structs __tagptr{1, 2, 3...}
+ */
+#define __MAKE_TAGPTR(n) \
+typedef struct __tagptr##n {   \
+       uintptr_t v;    \
+} tagptr##n##_t;
+
+__MAKE_TAGPTR(1)
+__MAKE_TAGPTR(2)
+__MAKE_TAGPTR(3)
+__MAKE_TAGPTR(4)
+
+#undef __MAKE_TAGPTR
+
+extern void __compiletime_error("bad tagptr tags")
+       __bad_tagptr_tags(void);
+
+extern void __compiletime_error("bad tagptr type")
+       __bad_tagptr_type(void);
+
+/* fix the broken usage of "#define tagptr2_t tagptr3_t" by users */
+#define __tagptr_mask_1(ptr, n)        \
+       __builtin_types_compatible_p(typeof(ptr), struct __tagptr##n) ? \
+               (1UL << (n)) - 1 :
+
+#define __tagptr_mask(ptr)     (\
+       __tagptr_mask_1(ptr, 1) ( \
+       __tagptr_mask_1(ptr, 2) ( \
+       __tagptr_mask_1(ptr, 3) ( \
+       __tagptr_mask_1(ptr, 4) ( \
+       __bad_tagptr_type(), 0)))))
+
+/* generate a tagged pointer from a raw value */
+#define tagptr_init(type, val) \
+       ((typeof(type)){ .v = (uintptr_t)(val) })
+
+/*
+ * directly cast a tagged pointer to the native pointer type, which
+ * could be used for backward compatibility of existing code.
+ */
+#define tagptr_cast_ptr(tptr) ((void *)(tptr).v)
+
+/* encode tagged pointers */
+#define tagptr_fold(type, ptr, _tags) ({ \
+       const typeof(_tags) tags = (_tags); \
+       if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(type))) \
+               __bad_tagptr_tags(); \
+tagptr_init(type, (uintptr_t)(ptr) | tags); })
+
+/* decode tagged pointers */
+#define tagptr_unfold_ptr(tptr) \
+       ((void *)((tptr).v & ~__tagptr_mask(tptr)))
+
+#define tagptr_unfold_tags(tptr) \
+       ((tptr).v & __tagptr_mask(tptr))
+
+/* operations for the tagger pointer */
+#define tagptr_eq(_tptr1, _tptr2) ({ \
+       typeof(_tptr1) tptr1 = (_tptr1); \
+       typeof(_tptr2) tptr2 = (_tptr2); \
+       (void)(&tptr1 == &tptr2); \
+(tptr1).v == (tptr2).v; })
+
+/* lock-free CAS operation */
+#define tagptr_cmpxchg(_ptptr, _o, _n) ({ \
+       typeof(_ptptr) ptptr = (_ptptr); \
+       typeof(_o) o = (_o); \
+       typeof(_n) n = (_n); \
+       (void)(&o == &n); \
+       (void)(&o == ptptr); \
+tagptr_init(o, cmpxchg(&ptptr->v, o.v, n.v)); })
+
+/* wrap WRITE_ONCE if atomic update is needed */
+#define tagptr_replace_tags(_ptptr, tags) ({ \
+       typeof(_ptptr) ptptr = (_ptptr); \
+       *ptptr = tagptr_fold(*ptptr, tagptr_unfold_ptr(*ptptr), tags); \
+*ptptr; })
+
+#define tagptr_set_tags(_ptptr, _tags) ({ \
+       typeof(_ptptr) ptptr = (_ptptr); \
+       const typeof(_tags) tags = (_tags); \
+       if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(*ptptr))) \
+               __bad_tagptr_tags(); \
+       ptptr->v |= tags; \
+*ptptr; })
+
+#define tagptr_clear_tags(_ptptr, _tags) ({ \
+       typeof(_ptptr) ptptr = (_ptptr); \
+       const typeof(_tags) tags = (_tags); \
+       if (__builtin_constant_p(tags) && (tags & ~__tagptr_mask(*ptptr))) \
+               __bad_tagptr_tags(); \
+       ptptr->v &= ~tags; \
+*ptptr; })
+
+#endif /* __EROFS_FS_TAGPTR_H */
+
diff --git a/drivers/staging/erofs/unzip_pagevec.h b/drivers/staging/erofs/unzip_pagevec.h
deleted file mode 100644 (file)
index f07302c..0000000
+++ /dev/null
@@ -1,166 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/drivers/staging/erofs/unzip_pagevec.h
- *
- * Copyright (C) 2018 HUAWEI, Inc.
- *             http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- */
-#ifndef __EROFS_UNZIP_PAGEVEC_H
-#define __EROFS_UNZIP_PAGEVEC_H
-
-#include <linux/tagptr.h>
-
-/* page type in pagevec for unzip subsystem */
-enum z_erofs_page_type {
-       /* including Z_EROFS_VLE_PAGE_TAIL_EXCLUSIVE */
-       Z_EROFS_PAGE_TYPE_EXCLUSIVE,
-
-       Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED,
-
-       Z_EROFS_VLE_PAGE_TYPE_HEAD,
-       Z_EROFS_VLE_PAGE_TYPE_MAX
-};
-
-extern void __compiletime_error("Z_EROFS_PAGE_TYPE_EXCLUSIVE != 0")
-       __bad_page_type_exclusive(void);
-
-/* pagevec tagged pointer */
-typedef tagptr2_t      erofs_vtptr_t;
-
-/* pagevec collector */
-struct z_erofs_pagevec_ctor {
-       struct page *curr, *next;
-       erofs_vtptr_t *pages;
-
-       unsigned int nr, index;
-};
-
-static inline void z_erofs_pagevec_ctor_exit(struct z_erofs_pagevec_ctor *ctor,
-                                            bool atomic)
-{
-       if (!ctor->curr)
-               return;
-
-       if (atomic)
-               kunmap_atomic(ctor->pages);
-       else
-               kunmap(ctor->curr);
-}
-
-static inline struct page *
-z_erofs_pagevec_ctor_next_page(struct z_erofs_pagevec_ctor *ctor,
-                              unsigned int nr)
-{
-       unsigned int index;
-
-       /* keep away from occupied pages */
-       if (ctor->next)
-               return ctor->next;
-
-       for (index = 0; index < nr; ++index) {
-               const erofs_vtptr_t t = ctor->pages[index];
-               const unsigned int tags = tagptr_unfold_tags(t);
-
-               if (tags == Z_EROFS_PAGE_TYPE_EXCLUSIVE)
-                       return tagptr_unfold_ptr(t);
-       }
-       DBG_BUGON(nr >= ctor->nr);
-       return NULL;
-}
-
-static inline void
-z_erofs_pagevec_ctor_pagedown(struct z_erofs_pagevec_ctor *ctor,
-                             bool atomic)
-{
-       struct page *next = z_erofs_pagevec_ctor_next_page(ctor, ctor->nr);
-
-       z_erofs_pagevec_ctor_exit(ctor, atomic);
-
-       ctor->curr = next;
-       ctor->next = NULL;
-       ctor->pages = atomic ?
-               kmap_atomic(ctor->curr) : kmap(ctor->curr);
-
-       ctor->nr = PAGE_SIZE / sizeof(struct page *);
-       ctor->index = 0;
-}
-
-static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor,
-                                            unsigned int nr,
-                                            erofs_vtptr_t *pages,
-                                            unsigned int i)
-{
-       ctor->nr = nr;
-       ctor->curr = ctor->next = NULL;
-       ctor->pages = pages;
-
-       if (i >= nr) {
-               i -= nr;
-               z_erofs_pagevec_ctor_pagedown(ctor, false);
-               while (i > ctor->nr) {
-                       i -= ctor->nr;
-                       z_erofs_pagevec_ctor_pagedown(ctor, false);
-               }
-       }
-
-       ctor->next = z_erofs_pagevec_ctor_next_page(ctor, i);
-       ctor->index = i;
-}
-
-static inline bool
-z_erofs_pagevec_ctor_enqueue(struct z_erofs_pagevec_ctor *ctor,
-                            struct page *page,
-                            enum z_erofs_page_type type,
-                            bool *occupied)
-{
-       *occupied = false;
-       if (unlikely(!ctor->next && type))
-               if (ctor->index + 1 == ctor->nr)
-                       return false;
-
-       if (unlikely(ctor->index >= ctor->nr))
-               z_erofs_pagevec_ctor_pagedown(ctor, false);
-
-       /* exclusive page type must be 0 */
-       if (Z_EROFS_PAGE_TYPE_EXCLUSIVE != (uintptr_t)NULL)
-               __bad_page_type_exclusive();
-
-       /* should remind that collector->next never equal to 1, 2 */
-       if (type == (uintptr_t)ctor->next) {
-               ctor->next = page;
-               *occupied = true;
-       }
-
-       ctor->pages[ctor->index++] =
-               tagptr_fold(erofs_vtptr_t, page, type);
-       return true;
-}
-
-static inline struct page *
-z_erofs_pagevec_ctor_dequeue(struct z_erofs_pagevec_ctor *ctor,
-                            enum z_erofs_page_type *type)
-{
-       erofs_vtptr_t t;
-
-       if (unlikely(ctor->index >= ctor->nr)) {
-               DBG_BUGON(!ctor->next);
-               z_erofs_pagevec_ctor_pagedown(ctor, true);
-       }
-
-       t = ctor->pages[ctor->index];
-
-       *type = tagptr_unfold_tags(t);
-
-       /* should remind that collector->next never equal to 1, 2 */
-       if (*type == (uintptr_t)ctor->next)
-               ctor->next = tagptr_unfold_ptr(t);
-
-       ctor->pages[ctor->index++] =
-               tagptr_fold(erofs_vtptr_t, NULL, 0);
-
-       return tagptr_unfold_ptr(t);
-}
-
-#endif
-
diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c
deleted file mode 100644 (file)
index 28a98e7..0000000
+++ /dev/null
@@ -1,1587 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/drivers/staging/erofs/unzip_vle.c
- *
- * Copyright (C) 2018 HUAWEI, Inc.
- *             http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- */
-#include "unzip_vle.h"
-#include "compress.h"
-#include <linux/prefetch.h>
-
-#include <trace/events/erofs.h>
-
-/*
- * a compressed_pages[] placeholder in order to avoid
- * being filled with file pages for in-place decompression.
- */
-#define PAGE_UNALLOCATED     ((void *)0x5F0E4B1D)
-
-/* how to allocate cached pages for a workgroup */
-enum z_erofs_cache_alloctype {
-       DONTALLOC,      /* don't allocate any cached pages */
-       DELAYEDALLOC,   /* delayed allocation (at the time of submitting io) */
-};
-
-/*
- * tagged pointer with 1-bit tag for all compressed pages
- * tag 0 - the page is just found with an extra page reference
- */
-typedef tagptr1_t compressed_page_t;
-
-#define tag_compressed_page_justfound(page) \
-       tagptr_fold(compressed_page_t, page, 1)
-
-static struct workqueue_struct *z_erofs_workqueue __read_mostly;
-static struct kmem_cache *z_erofs_workgroup_cachep __read_mostly;
-
-void z_erofs_exit_zip_subsystem(void)
-{
-       destroy_workqueue(z_erofs_workqueue);
-       kmem_cache_destroy(z_erofs_workgroup_cachep);
-}
-
-static inline int init_unzip_workqueue(void)
-{
-       const unsigned int onlinecpus = num_possible_cpus();
-
-       /*
-        * we don't need too many threads, limiting threads
-        * could improve scheduling performance.
-        */
-       z_erofs_workqueue =
-               alloc_workqueue("erofs_unzipd",
-                               WQ_UNBOUND | WQ_HIGHPRI | WQ_CPU_INTENSIVE,
-                               onlinecpus + onlinecpus / 4);
-
-       return z_erofs_workqueue ? 0 : -ENOMEM;
-}
-
-static void init_once(void *ptr)
-{
-       struct z_erofs_vle_workgroup *grp = ptr;
-       struct z_erofs_vle_work *const work =
-               z_erofs_vle_grab_primary_work(grp);
-       unsigned int i;
-
-       mutex_init(&work->lock);
-       work->nr_pages = 0;
-       work->vcnt = 0;
-       for (i = 0; i < Z_EROFS_CLUSTER_MAX_PAGES; ++i)
-               grp->compressed_pages[i] = NULL;
-}
-
-static void init_always(struct z_erofs_vle_workgroup *grp)
-{
-       struct z_erofs_vle_work *const work =
-               z_erofs_vle_grab_primary_work(grp);
-
-       atomic_set(&grp->obj.refcount, 1);
-       grp->flags = 0;
-
-       DBG_BUGON(work->nr_pages);
-       DBG_BUGON(work->vcnt);
-}
-
-int __init z_erofs_init_zip_subsystem(void)
-{
-       z_erofs_workgroup_cachep =
-               kmem_cache_create("erofs_compress",
-                                 Z_EROFS_WORKGROUP_SIZE, 0,
-                                 SLAB_RECLAIM_ACCOUNT, init_once);
-
-       if (z_erofs_workgroup_cachep) {
-               if (!init_unzip_workqueue())
-                       return 0;
-
-               kmem_cache_destroy(z_erofs_workgroup_cachep);
-       }
-       return -ENOMEM;
-}
-
-enum z_erofs_vle_work_role {
-       Z_EROFS_VLE_WORK_SECONDARY,
-       Z_EROFS_VLE_WORK_PRIMARY,
-       /*
-        * The current work was the tail of an exist chain, and the previous
-        * processed chained works are all decided to be hooked up to it.
-        * A new chain should be created for the remaining unprocessed works,
-        * therefore different from Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED,
-        * the next work cannot reuse the whole page in the following scenario:
-        *  ________________________________________________________________
-        * |      tail (partial) page     |       head (partial) page       |
-        * |  (belongs to the next work)  |  (belongs to the current work)  |
-        * |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________|
-        */
-       Z_EROFS_VLE_WORK_PRIMARY_HOOKED,
-       /*
-        * The current work has been linked with the processed chained works,
-        * and could be also linked with the potential remaining works, which
-        * means if the processing page is the tail partial page of the work,
-        * the current work can safely use the whole page (since the next work
-        * is under control) for in-place decompression, as illustrated below:
-        *  ________________________________________________________________
-        * |  tail (partial) page  |          head (partial) page           |
-        * | (of the current work) |         (of the previous work)         |
-        * |  PRIMARY_FOLLOWED or  |                                        |
-        * |_____PRIMARY_HOOKED____|____________PRIMARY_FOLLOWED____________|
-        *
-        * [  (*) the above page can be used for the current work itself.  ]
-        */
-       Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED,
-       Z_EROFS_VLE_WORK_MAX
-};
-
-struct z_erofs_vle_work_builder {
-       enum z_erofs_vle_work_role role;
-       /*
-        * 'hosted = false' means that the current workgroup doesn't belong to
-        * the owned chained workgroups. In the other words, it is none of our
-        * business to submit this workgroup.
-        */
-       bool hosted;
-
-       struct z_erofs_vle_workgroup *grp;
-       struct z_erofs_vle_work *work;
-       struct z_erofs_pagevec_ctor vector;
-
-       /* pages used for reading the compressed data */
-       struct page **compressed_pages;
-       unsigned int compressed_deficit;
-};
-
-#define VLE_WORK_BUILDER_INIT()        \
-       { .work = NULL, .role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED }
-
-#ifdef EROFS_FS_HAS_MANAGED_CACHE
-static void preload_compressed_pages(struct z_erofs_vle_work_builder *bl,
-                                    struct address_space *mc,
-                                    pgoff_t index,
-                                    unsigned int clusterpages,
-                                    enum z_erofs_cache_alloctype type,
-                                    struct list_head *pagepool,
-                                    gfp_t gfp)
-{
-       struct page **const pages = bl->compressed_pages;
-       const unsigned int remaining = bl->compressed_deficit;
-       bool standalone = true;
-       unsigned int i, j = 0;
-
-       if (bl->role < Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED)
-               return;
-
-       gfp = mapping_gfp_constraint(mc, gfp) & ~__GFP_RECLAIM;
-
-       index += clusterpages - remaining;
-
-       for (i = 0; i < remaining; ++i) {
-               struct page *page;
-               compressed_page_t t;
-
-               /* the compressed page was loaded before */
-               if (READ_ONCE(pages[i]))
-                       continue;
-
-               page = find_get_page(mc, index + i);
-
-               if (page) {
-                       t = tag_compressed_page_justfound(page);
-               } else if (type == DELAYEDALLOC) {
-                       t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED);
-               } else {        /* DONTALLOC */
-                       if (standalone)
-                               j = i;
-                       standalone = false;
-                       continue;
-               }
-
-               if (!cmpxchg_relaxed(&pages[i], NULL, tagptr_cast_ptr(t)))
-                       continue;
-
-               if (page)
-                       put_page(page);
-       }
-       bl->compressed_pages += j;
-       bl->compressed_deficit = remaining - j;
-
-       if (standalone)
-               bl->role = Z_EROFS_VLE_WORK_PRIMARY;
-}
-
-/* called by erofs_shrinker to get rid of all compressed_pages */
-int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
-                                      struct erofs_workgroup *egrp)
-{
-       struct z_erofs_vle_workgroup *const grp =
-               container_of(egrp, struct z_erofs_vle_workgroup, obj);
-       struct address_space *const mapping = MNGD_MAPPING(sbi);
-       const int clusterpages = erofs_clusterpages(sbi);
-       int i;
-
-       /*
-        * refcount of workgroup is now freezed as 1,
-        * therefore no need to worry about available decompression users.
-        */
-       for (i = 0; i < clusterpages; ++i) {
-               struct page *page = grp->compressed_pages[i];
-
-               if (!page || page->mapping != mapping)
-                       continue;
-
-               /* block other users from reclaiming or migrating the page */
-               if (!trylock_page(page))
-                       return -EBUSY;
-
-               /* barrier is implied in the following 'unlock_page' */
-               WRITE_ONCE(grp->compressed_pages[i], NULL);
-
-               set_page_private(page, 0);
-               ClearPagePrivate(page);
-
-               unlock_page(page);
-               put_page(page);
-       }
-       return 0;
-}
-
-int erofs_try_to_free_cached_page(struct address_space *mapping,
-                                 struct page *page)
-{
-       struct erofs_sb_info *const sbi = EROFS_SB(mapping->host->i_sb);
-       const unsigned int clusterpages = erofs_clusterpages(sbi);
-       struct z_erofs_vle_workgroup *const grp = (void *)page_private(page);
-       int ret = 0;    /* 0 - busy */
-
-       if (erofs_workgroup_try_to_freeze(&grp->obj, 1)) {
-               unsigned int i;
-
-               for (i = 0; i < clusterpages; ++i) {
-                       if (grp->compressed_pages[i] == page) {
-                               WRITE_ONCE(grp->compressed_pages[i], NULL);
-                               ret = 1;
-                               break;
-                       }
-               }
-               erofs_workgroup_unfreeze(&grp->obj, 1);
-
-               if (ret) {
-                       ClearPagePrivate(page);
-                       put_page(page);
-               }
-       }
-       return ret;
-}
-#else
-static void preload_compressed_pages(struct z_erofs_vle_work_builder *bl,
-                                    struct address_space *mc,
-                                    pgoff_t index,
-                                    unsigned int clusterpages,
-                                    enum z_erofs_cache_alloctype type,
-                                    struct list_head *pagepool,
-                                    gfp_t gfp)
-{
-       /* nowhere to load compressed pages from */
-}
-#endif
-
-/* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */
-static inline bool try_to_reuse_as_compressed_page(
-       struct z_erofs_vle_work_builder *b,
-       struct page *page)
-{
-       while (b->compressed_deficit) {
-               --b->compressed_deficit;
-               if (!cmpxchg(b->compressed_pages++, NULL, page))
-                       return true;
-       }
-
-       return false;
-}
-
-/* callers must be with work->lock held */
-static int z_erofs_vle_work_add_page(
-       struct z_erofs_vle_work_builder *builder,
-       struct page *page,
-       enum z_erofs_page_type type)
-{
-       int ret;
-       bool occupied;
-
-       /* give priority for the compressed data storage */
-       if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY &&
-           type == Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
-           try_to_reuse_as_compressed_page(builder, page))
-               return 0;
-
-       ret = z_erofs_pagevec_ctor_enqueue(&builder->vector,
-                                          page, type, &occupied);
-       builder->work->vcnt += (unsigned int)ret;
-
-       return ret ? 0 : -EAGAIN;
-}
-
-static enum z_erofs_vle_work_role
-try_to_claim_workgroup(struct z_erofs_vle_workgroup *grp,
-                      z_erofs_vle_owned_workgrp_t *owned_head,
-                      bool *hosted)
-{
-       DBG_BUGON(*hosted);
-
-       /* let's claim these following types of workgroup */
-retry:
-       if (grp->next == Z_EROFS_VLE_WORKGRP_NIL) {
-               /* type 1, nil workgroup */
-               if (cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_NIL,
-                           *owned_head) != Z_EROFS_VLE_WORKGRP_NIL)
-                       goto retry;
-
-               *owned_head = &grp->next;
-               *hosted = true;
-               /* lucky, I am the followee :) */
-               return Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED;
-
-       } else if (grp->next == Z_EROFS_VLE_WORKGRP_TAIL) {
-               /*
-                * type 2, link to the end of a existing open chain,
-                * be careful that its submission itself is governed
-                * by the original owned chain.
-                */
-               if (cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_TAIL,
-                           *owned_head) != Z_EROFS_VLE_WORKGRP_TAIL)
-                       goto retry;
-               *owned_head = Z_EROFS_VLE_WORKGRP_TAIL;
-               return Z_EROFS_VLE_WORK_PRIMARY_HOOKED;
-       }
-
-       return Z_EROFS_VLE_WORK_PRIMARY; /* :( better luck next time */
-}
-
-struct z_erofs_vle_work_finder {
-       struct super_block *sb;
-       pgoff_t idx;
-       unsigned int pageofs;
-
-       struct z_erofs_vle_workgroup **grp_ret;
-       enum z_erofs_vle_work_role *role;
-       z_erofs_vle_owned_workgrp_t *owned_head;
-       bool *hosted;
-};
-
-static struct z_erofs_vle_work *
-z_erofs_vle_work_lookup(const struct z_erofs_vle_work_finder *f)
-{
-       bool tag, primary;
-       struct erofs_workgroup *egrp;
-       struct z_erofs_vle_workgroup *grp;
-       struct z_erofs_vle_work *work;
-
-       egrp = erofs_find_workgroup(f->sb, f->idx, &tag);
-       if (!egrp) {
-               *f->grp_ret = NULL;
-               return NULL;
-       }
-
-       grp = container_of(egrp, struct z_erofs_vle_workgroup, obj);
-       *f->grp_ret = grp;
-
-       work = z_erofs_vle_grab_work(grp, f->pageofs);
-       /* if multiref is disabled, `primary' is always true */
-       primary = true;
-
-       DBG_BUGON(work->pageofs != f->pageofs);
-
-       /*
-        * lock must be taken first to avoid grp->next == NIL between
-        * claiming workgroup and adding pages:
-        *                        grp->next != NIL
-        *   grp->next = NIL
-        *   mutex_unlock_all
-        *                        mutex_lock(&work->lock)
-        *                        add all pages to pagevec
-        *
-        * [correct locking case 1]:
-        *   mutex_lock(grp->work[a])
-        *   ...
-        *   mutex_lock(grp->work[b])     mutex_lock(grp->work[c])
-        *   ...                          *role = SECONDARY
-        *                                add all pages to pagevec
-        *                                ...
-        *                                mutex_unlock(grp->work[c])
-        *   mutex_lock(grp->work[c])
-        *   ...
-        *   grp->next = NIL
-        *   mutex_unlock_all
-        *
-        * [correct locking case 2]:
-        *   mutex_lock(grp->work[b])
-        *   ...
-        *   mutex_lock(grp->work[a])
-        *   ...
-        *   mutex_lock(grp->work[c])
-        *   ...
-        *   grp->next = NIL
-        *   mutex_unlock_all
-        *                                mutex_lock(grp->work[a])
-        *                                *role = PRIMARY_OWNER
-        *                                add all pages to pagevec
-        *                                ...
-        */
-       mutex_lock(&work->lock);
-
-       *f->hosted = false;
-       if (!primary)
-               *f->role = Z_EROFS_VLE_WORK_SECONDARY;
-       else    /* claim the workgroup if possible */
-               *f->role = try_to_claim_workgroup(grp, f->owned_head,
-                                                 f->hosted);
-       return work;
-}
-
-static struct z_erofs_vle_work *
-z_erofs_vle_work_register(const struct z_erofs_vle_work_finder *f,
-                         struct erofs_map_blocks *map)
-{
-       bool gnew = false;
-       struct z_erofs_vle_workgroup *grp = *f->grp_ret;
-       struct z_erofs_vle_work *work;
-
-       /* if multiref is disabled, grp should never be nullptr */
-       if (unlikely(grp)) {
-               DBG_BUGON(1);
-               return ERR_PTR(-EINVAL);
-       }
-
-       /* no available workgroup, let's allocate one */
-       grp = kmem_cache_alloc(z_erofs_workgroup_cachep, GFP_NOFS);
-       if (unlikely(!grp))
-               return ERR_PTR(-ENOMEM);
-
-       init_always(grp);
-       grp->obj.index = f->idx;
-       grp->llen = map->m_llen;
-
-       z_erofs_vle_set_workgrp_fmt(grp, (map->m_flags & EROFS_MAP_ZIPPED) ?
-                                   Z_EROFS_VLE_WORKGRP_FMT_LZ4 :
-                                   Z_EROFS_VLE_WORKGRP_FMT_PLAIN);
-
-       if (map->m_flags & EROFS_MAP_FULL_MAPPED)
-               grp->flags |= Z_EROFS_VLE_WORKGRP_FULL_LENGTH;
-
-       /* new workgrps have been claimed as type 1 */
-       WRITE_ONCE(grp->next, *f->owned_head);
-       /* primary and followed work for all new workgrps */
-       *f->role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED;
-       /* it should be submitted by ourselves */
-       *f->hosted = true;
-
-       gnew = true;
-       work = z_erofs_vle_grab_primary_work(grp);
-       work->pageofs = f->pageofs;
-
-       /*
-        * lock all primary followed works before visible to others
-        * and mutex_trylock *never* fails for a new workgroup.
-        */
-       mutex_trylock(&work->lock);
-
-       if (gnew) {
-               int err = erofs_register_workgroup(f->sb, &grp->obj, 0);
-
-               if (err) {
-                       mutex_unlock(&work->lock);
-                       kmem_cache_free(z_erofs_workgroup_cachep, grp);
-                       return ERR_PTR(-EAGAIN);
-               }
-       }
-
-       *f->owned_head = &grp->next;
-       *f->grp_ret = grp;
-       return work;
-}
-
-#define builder_is_hooked(builder) \
-       ((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_HOOKED)
-
-#define builder_is_followed(builder) \
-       ((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED)
-
-static int z_erofs_vle_work_iter_begin(struct z_erofs_vle_work_builder *builder,
-                                      struct super_block *sb,
-                                      struct erofs_map_blocks *map,
-                                      z_erofs_vle_owned_workgrp_t *owned_head)
-{
-       const unsigned int clusterpages = erofs_clusterpages(EROFS_SB(sb));
-       struct z_erofs_vle_workgroup *grp;
-       const struct z_erofs_vle_work_finder finder = {
-               .sb = sb,
-               .idx = erofs_blknr(map->m_pa),
-               .pageofs = map->m_la & ~PAGE_MASK,
-               .grp_ret = &grp,
-               .role = &builder->role,
-               .owned_head = owned_head,
-               .hosted = &builder->hosted
-       };
-       struct z_erofs_vle_work *work;
-
-       DBG_BUGON(builder->work);
-
-       /* must be Z_EROFS_WORK_TAIL or the next chained work */
-       DBG_BUGON(*owned_head == Z_EROFS_VLE_WORKGRP_NIL);
-       DBG_BUGON(*owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
-
-       DBG_BUGON(erofs_blkoff(map->m_pa));
-
-repeat:
-       work = z_erofs_vle_work_lookup(&finder);
-       if (work) {
-               unsigned int orig_llen;
-
-               /* increase workgroup `llen' if needed */
-               while ((orig_llen = READ_ONCE(grp->llen)) < map->m_llen &&
-                      orig_llen != cmpxchg_relaxed(&grp->llen,
-                                                   orig_llen, map->m_llen))
-                       cpu_relax();
-               goto got_it;
-       }
-
-       work = z_erofs_vle_work_register(&finder, map);
-       if (unlikely(work == ERR_PTR(-EAGAIN)))
-               goto repeat;
-
-       if (IS_ERR(work))
-               return PTR_ERR(work);
-got_it:
-       z_erofs_pagevec_ctor_init(&builder->vector, Z_EROFS_NR_INLINE_PAGEVECS,
-                                 work->pagevec, work->vcnt);
-
-       if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY) {
-               /* enable possibly in-place decompression */
-               builder->compressed_pages = grp->compressed_pages;
-               builder->compressed_deficit = clusterpages;
-       } else {
-               builder->compressed_pages = NULL;
-               builder->compressed_deficit = 0;
-       }
-
-       builder->grp = grp;
-       builder->work = work;
-       return 0;
-}
-
-/*
- * keep in mind that no referenced workgroups will be freed
- * only after a RCU grace period, so rcu_read_lock() could
- * prevent a workgroup from being freed.
- */
-static void z_erofs_rcu_callback(struct rcu_head *head)
-{
-       struct z_erofs_vle_work *work = container_of(head,
-               struct z_erofs_vle_work, rcu);
-       struct z_erofs_vle_workgroup *grp =
-               z_erofs_vle_work_workgroup(work, true);
-
-       kmem_cache_free(z_erofs_workgroup_cachep, grp);
-}
-
-void erofs_workgroup_free_rcu(struct erofs_workgroup *grp)
-{
-       struct z_erofs_vle_workgroup *const vgrp = container_of(grp,
-               struct z_erofs_vle_workgroup, obj);
-       struct z_erofs_vle_work *const work = &vgrp->work;
-
-       call_rcu(&work->rcu, z_erofs_rcu_callback);
-}
-
-static void
-__z_erofs_vle_work_release(struct z_erofs_vle_workgroup *grp,
-                          struct z_erofs_vle_work *work __maybe_unused)
-{
-       erofs_workgroup_put(&grp->obj);
-}
-
-static void z_erofs_vle_work_release(struct z_erofs_vle_work *work)
-{
-       struct z_erofs_vle_workgroup *grp =
-               z_erofs_vle_work_workgroup(work, true);
-
-       __z_erofs_vle_work_release(grp, work);
-}
-
-static inline bool
-z_erofs_vle_work_iter_end(struct z_erofs_vle_work_builder *builder)
-{
-       struct z_erofs_vle_work *work = builder->work;
-
-       if (!work)
-               return false;
-
-       z_erofs_pagevec_ctor_exit(&builder->vector, false);
-       mutex_unlock(&work->lock);
-
-       /*
-        * if all pending pages are added, don't hold work reference
-        * any longer if the current work isn't hosted by ourselves.
-        */
-       if (!builder->hosted)
-               __z_erofs_vle_work_release(builder->grp, work);
-
-       builder->work = NULL;
-       builder->grp = NULL;
-       return true;
-}
-
-static inline struct page *__stagingpage_alloc(struct list_head *pagepool,
-                                              gfp_t gfp)
-{
-       struct page *page = erofs_allocpage(pagepool, gfp);
-
-       if (unlikely(!page))
-               return NULL;
-
-       page->mapping = Z_EROFS_MAPPING_STAGING;
-       return page;
-}
-
-struct z_erofs_vle_frontend {
-       struct inode *const inode;
-
-       struct z_erofs_vle_work_builder builder;
-       struct erofs_map_blocks map;
-
-       z_erofs_vle_owned_workgrp_t owned_head;
-
-       /* used for applying cache strategy on the fly */
-       bool backmost;
-       erofs_off_t headoffset;
-};
-
-#define VLE_FRONTEND_INIT(__i) { \
-       .inode = __i, \
-       .map = { \
-               .m_llen = 0, \
-               .m_plen = 0, \
-               .mpage = NULL \
-       }, \
-       .builder = VLE_WORK_BUILDER_INIT(), \
-       .owned_head = Z_EROFS_VLE_WORKGRP_TAIL, \
-       .backmost = true, }
-
-#ifdef EROFS_FS_HAS_MANAGED_CACHE
-static inline bool
-should_alloc_managed_pages(struct z_erofs_vle_frontend *fe, erofs_off_t la)
-{
-       if (fe->backmost)
-               return true;
-
-       if (EROFS_FS_ZIP_CACHE_LVL >= 2)
-               return la < fe->headoffset;
-
-       return false;
-}
-#else
-static inline bool
-should_alloc_managed_pages(struct z_erofs_vle_frontend *fe, erofs_off_t la)
-{
-       return false;
-}
-#endif
-
-static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe,
-                               struct page *page,
-                               struct list_head *page_pool)
-{
-       struct super_block *const sb = fe->inode->i_sb;
-       struct erofs_sb_info *const sbi __maybe_unused = EROFS_SB(sb);
-       struct erofs_map_blocks *const map = &fe->map;
-       struct z_erofs_vle_work_builder *const builder = &fe->builder;
-       const loff_t offset = page_offset(page);
-
-       bool tight = builder_is_hooked(builder);
-       struct z_erofs_vle_work *work = builder->work;
-
-       enum z_erofs_cache_alloctype cache_strategy;
-       enum z_erofs_page_type page_type;
-       unsigned int cur, end, spiltted, index;
-       int err = 0;
-
-       /* register locked file pages as online pages in pack */
-       z_erofs_onlinepage_init(page);
-
-       spiltted = 0;
-       end = PAGE_SIZE;
-repeat:
-       cur = end - 1;
-
-       /* lucky, within the range of the current map_blocks */
-       if (offset + cur >= map->m_la &&
-           offset + cur < map->m_la + map->m_llen) {
-               /* didn't get a valid unzip work previously (very rare) */
-               if (!builder->work)
-                       goto restart_now;
-               goto hitted;
-       }
-
-       /* go ahead the next map_blocks */
-       debugln("%s: [out-of-range] pos %llu", __func__, offset + cur);
-
-       if (z_erofs_vle_work_iter_end(builder))
-               fe->backmost = false;
-
-       map->m_la = offset + cur;
-       map->m_llen = 0;
-       err = z_erofs_map_blocks_iter(fe->inode, map, 0);
-       if (unlikely(err))
-               goto err_out;
-
-restart_now:
-       if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED)))
-               goto hitted;
-
-       DBG_BUGON(map->m_plen != 1 << sbi->clusterbits);
-       DBG_BUGON(erofs_blkoff(map->m_pa));
-
-       err = z_erofs_vle_work_iter_begin(builder, sb, map, &fe->owned_head);
-       if (unlikely(err))
-               goto err_out;
-
-       /* preload all compressed pages (maybe downgrade role if necessary) */
-       if (should_alloc_managed_pages(fe, map->m_la))
-               cache_strategy = DELAYEDALLOC;
-       else
-               cache_strategy = DONTALLOC;
-
-       preload_compressed_pages(builder, MNGD_MAPPING(sbi),
-                                map->m_pa / PAGE_SIZE,
-                                map->m_plen / PAGE_SIZE,
-                                cache_strategy, page_pool, GFP_KERNEL);
-
-       tight &= builder_is_hooked(builder);
-       work = builder->work;
-hitted:
-       cur = end - min_t(unsigned int, offset + end - map->m_la, end);
-       if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED))) {
-               zero_user_segment(page, cur, end);
-               goto next_part;
-       }
-
-       /* let's derive page type */
-       page_type = cur ? Z_EROFS_VLE_PAGE_TYPE_HEAD :
-               (!spiltted ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
-                       (tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
-                               Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
-
-       if (cur)
-               tight &= builder_is_followed(builder);
-
-retry:
-       err = z_erofs_vle_work_add_page(builder, page, page_type);
-       /* should allocate an additional staging page for pagevec */
-       if (err == -EAGAIN) {
-               struct page *const newpage =
-                       __stagingpage_alloc(page_pool, GFP_NOFS);
-
-               err = z_erofs_vle_work_add_page(builder, newpage,
-                                               Z_EROFS_PAGE_TYPE_EXCLUSIVE);
-               if (likely(!err))
-                       goto retry;
-       }
-
-       if (unlikely(err))
-               goto err_out;
-
-       index = page->index - map->m_la / PAGE_SIZE;
-
-       /* FIXME! avoid the last relundant fixup & endio */
-       z_erofs_onlinepage_fixup(page, index, true);
-
-       /* bump up the number of spiltted parts of a page */
-       ++spiltted;
-       /* also update nr_pages */
-       work->nr_pages = max_t(pgoff_t, work->nr_pages, index + 1);
-next_part:
-       /* can be used for verification */
-       map->m_llen = offset + cur - map->m_la;
-
-       end = cur;
-       if (end > 0)
-               goto repeat;
-
-out:
-       /* FIXME! avoid the last relundant fixup & endio */
-       z_erofs_onlinepage_endio(page);
-
-       debugln("%s, finish page: %pK spiltted: %u map->m_llen %llu",
-               __func__, page, spiltted, map->m_llen);
-       return err;
-
-       /* if some error occurred while processing this page */
-err_out:
-       SetPageError(page);
-       goto out;
-}
-
-static void z_erofs_vle_unzip_kickoff(void *ptr, int bios)
-{
-       tagptr1_t t = tagptr_init(tagptr1_t, ptr);
-       struct z_erofs_vle_unzip_io *io = tagptr_unfold_ptr(t);
-       bool background = tagptr_unfold_tags(t);
-
-       if (!background) {
-               unsigned long flags;
-
-               spin_lock_irqsave(&io->u.wait.lock, flags);
-               if (!atomic_add_return(bios, &io->pending_bios))
-                       wake_up_locked(&io->u.wait);
-               spin_unlock_irqrestore(&io->u.wait.lock, flags);
-               return;
-       }
-
-       if (!atomic_add_return(bios, &io->pending_bios))
-               queue_work(z_erofs_workqueue, &io->u.work);
-}
-
-static inline void z_erofs_vle_read_endio(struct bio *bio)
-{
-       struct erofs_sb_info *sbi = NULL;
-       blk_status_t err = bio->bi_status;
-       struct bio_vec *bvec;
-       struct bvec_iter_all iter_all;
-
-       bio_for_each_segment_all(bvec, bio, iter_all) {
-               struct page *page = bvec->bv_page;
-               bool cachemngd = false;
-
-               DBG_BUGON(PageUptodate(page));
-               DBG_BUGON(!page->mapping);
-
-               if (unlikely(!sbi && !z_erofs_page_is_staging(page))) {
-                       sbi = EROFS_SB(page->mapping->host->i_sb);
-
-                       if (time_to_inject(sbi, FAULT_READ_IO)) {
-                               erofs_show_injection_info(FAULT_READ_IO);
-                               err = BLK_STS_IOERR;
-                       }
-               }
-
-               /* sbi should already be gotten if the page is managed */
-               if (sbi)
-                       cachemngd = erofs_page_is_managed(sbi, page);
-
-               if (unlikely(err))
-                       SetPageError(page);
-               else if (cachemngd)
-                       SetPageUptodate(page);
-
-               if (cachemngd)
-                       unlock_page(page);
-       }
-
-       z_erofs_vle_unzip_kickoff(bio->bi_private, -1);
-       bio_put(bio);
-}
-
-static struct page *z_pagemap_global[Z_EROFS_VLE_VMAP_GLOBAL_PAGES];
-static DEFINE_MUTEX(z_pagemap_global_lock);
-
-static int z_erofs_vle_unzip(struct super_block *sb,
-                            struct z_erofs_vle_workgroup *grp,
-                            struct list_head *page_pool)
-{
-       struct erofs_sb_info *const sbi = EROFS_SB(sb);
-       const unsigned int clusterpages = erofs_clusterpages(sbi);
-
-       struct z_erofs_pagevec_ctor ctor;
-       unsigned int nr_pages;
-       unsigned int sparsemem_pages = 0;
-       struct page *pages_onstack[Z_EROFS_VLE_VMAP_ONSTACK_PAGES];
-       struct page **pages, **compressed_pages, *page;
-       unsigned int algorithm;
-       unsigned int i, outputsize;
-
-       enum z_erofs_page_type page_type;
-       bool overlapped, partial;
-       struct z_erofs_vle_work *work;
-       int err;
-
-       might_sleep();
-       work = z_erofs_vle_grab_primary_work(grp);
-       DBG_BUGON(!READ_ONCE(work->nr_pages));
-
-       mutex_lock(&work->lock);
-       nr_pages = work->nr_pages;
-
-       if (likely(nr_pages <= Z_EROFS_VLE_VMAP_ONSTACK_PAGES))
-               pages = pages_onstack;
-       else if (nr_pages <= Z_EROFS_VLE_VMAP_GLOBAL_PAGES &&
-                mutex_trylock(&z_pagemap_global_lock))
-               pages = z_pagemap_global;
-       else {
-               gfp_t gfp_flags = GFP_KERNEL;
-
-               if (nr_pages > Z_EROFS_VLE_VMAP_GLOBAL_PAGES)
-                       gfp_flags |= __GFP_NOFAIL;
-
-               pages = kvmalloc_array(nr_pages, sizeof(struct page *),
-                                      gfp_flags);
-
-               /* fallback to global pagemap for the lowmem scenario */
-               if (unlikely(!pages)) {
-                       mutex_lock(&z_pagemap_global_lock);
-                       pages = z_pagemap_global;
-               }
-       }
-
-       for (i = 0; i < nr_pages; ++i)
-               pages[i] = NULL;
-
-       z_erofs_pagevec_ctor_init(&ctor, Z_EROFS_NR_INLINE_PAGEVECS,
-                                 work->pagevec, 0);
-
-       for (i = 0; i < work->vcnt; ++i) {
-               unsigned int pagenr;
-
-               page = z_erofs_pagevec_ctor_dequeue(&ctor, &page_type);
-
-               /* all pages in pagevec ought to be valid */
-               DBG_BUGON(!page);
-               DBG_BUGON(!page->mapping);
-
-               if (z_erofs_put_stagingpage(page_pool, page))
-                       continue;
-
-               if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
-                       pagenr = 0;
-               else
-                       pagenr = z_erofs_onlinepage_index(page);
-
-               DBG_BUGON(pagenr >= nr_pages);
-               DBG_BUGON(pages[pagenr]);
-
-               pages[pagenr] = page;
-       }
-       sparsemem_pages = i;
-
-       z_erofs_pagevec_ctor_exit(&ctor, true);
-
-       overlapped = false;
-       compressed_pages = grp->compressed_pages;
-
-       err = 0;
-       for (i = 0; i < clusterpages; ++i) {
-               unsigned int pagenr;
-
-               page = compressed_pages[i];
-
-               /* all compressed pages ought to be valid */
-               DBG_BUGON(!page);
-               DBG_BUGON(!page->mapping);
-
-               if (!z_erofs_page_is_staging(page)) {
-                       if (erofs_page_is_managed(sbi, page)) {
-                               if (unlikely(!PageUptodate(page)))
-                                       err = -EIO;
-                               continue;
-                       }
-
-                       /*
-                        * only if non-head page can be selected
-                        * for inplace decompression
-                        */
-                       pagenr = z_erofs_onlinepage_index(page);
-
-                       DBG_BUGON(pagenr >= nr_pages);
-                       DBG_BUGON(pages[pagenr]);
-                       ++sparsemem_pages;
-                       pages[pagenr] = page;
-
-                       overlapped = true;
-               }
-
-               /* PG_error needs checking for inplaced and staging pages */
-               if (unlikely(PageError(page))) {
-                       DBG_BUGON(PageUptodate(page));
-                       err = -EIO;
-               }
-       }
-
-       if (unlikely(err))
-               goto out;
-
-       if (nr_pages << PAGE_SHIFT >= work->pageofs + grp->llen) {
-               outputsize = grp->llen;
-               partial = !(grp->flags & Z_EROFS_VLE_WORKGRP_FULL_LENGTH);
-       } else {
-               outputsize = (nr_pages << PAGE_SHIFT) - work->pageofs;
-               partial = true;
-       }
-
-       if (z_erofs_vle_workgrp_fmt(grp) == Z_EROFS_VLE_WORKGRP_FMT_PLAIN)
-               algorithm = Z_EROFS_COMPRESSION_SHIFTED;
-       else
-               algorithm = Z_EROFS_COMPRESSION_LZ4;
-
-       err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
-                                       .sb = sb,
-                                       .in = compressed_pages,
-                                       .out = pages,
-                                       .pageofs_out = work->pageofs,
-                                       .inputsize = PAGE_SIZE,
-                                       .outputsize = outputsize,
-                                       .alg = algorithm,
-                                       .inplace_io = overlapped,
-                                       .partial_decoding = partial
-                                }, page_pool);
-
-out:
-       /* must handle all compressed pages before endding pages */
-       for (i = 0; i < clusterpages; ++i) {
-               page = compressed_pages[i];
-
-               if (erofs_page_is_managed(sbi, page))
-                       continue;
-
-               /* recycle all individual staging pages */
-               (void)z_erofs_put_stagingpage(page_pool, page);
-
-               WRITE_ONCE(compressed_pages[i], NULL);
-       }
-
-       for (i = 0; i < nr_pages; ++i) {
-               page = pages[i];
-               if (!page)
-                       continue;
-
-               DBG_BUGON(!page->mapping);
-
-               /* recycle all individual staging pages */
-               if (z_erofs_put_stagingpage(page_pool, page))
-                       continue;
-
-               if (unlikely(err < 0))
-                       SetPageError(page);
-
-               z_erofs_onlinepage_endio(page);
-       }
-
-       if (pages == z_pagemap_global)
-               mutex_unlock(&z_pagemap_global_lock);
-       else if (unlikely(pages != pages_onstack))
-               kvfree(pages);
-
-       work->nr_pages = 0;
-       work->vcnt = 0;
-
-       /* all work locks MUST be taken before the following line */
-
-       WRITE_ONCE(grp->next, Z_EROFS_VLE_WORKGRP_NIL);
-
-       /* all work locks SHOULD be released right now */
-       mutex_unlock(&work->lock);
-
-       z_erofs_vle_work_release(work);
-       return err;
-}
-
-static void z_erofs_vle_unzip_all(struct super_block *sb,
-                                 struct z_erofs_vle_unzip_io *io,
-                                 struct list_head *page_pool)
-{
-       z_erofs_vle_owned_workgrp_t owned = io->head;
-
-       while (owned != Z_EROFS_VLE_WORKGRP_TAIL_CLOSED) {
-               struct z_erofs_vle_workgroup *grp;
-
-               /* no possible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */
-               DBG_BUGON(owned == Z_EROFS_VLE_WORKGRP_TAIL);
-
-               /* no possible that 'owned' equals NULL */
-               DBG_BUGON(owned == Z_EROFS_VLE_WORKGRP_NIL);
-
-               grp = container_of(owned, struct z_erofs_vle_workgroup, next);
-               owned = READ_ONCE(grp->next);
-
-               z_erofs_vle_unzip(sb, grp, page_pool);
-       }
-}
-
-static void z_erofs_vle_unzip_wq(struct work_struct *work)
-{
-       struct z_erofs_vle_unzip_io_sb *iosb = container_of(work,
-               struct z_erofs_vle_unzip_io_sb, io.u.work);
-       LIST_HEAD(page_pool);
-
-       DBG_BUGON(iosb->io.head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
-       z_erofs_vle_unzip_all(iosb->sb, &iosb->io, &page_pool);
-
-       put_pages_list(&page_pool);
-       kvfree(iosb);
-}
-
-static struct page *
-pickup_page_for_submission(struct z_erofs_vle_workgroup *grp,
-                          unsigned int nr,
-                          struct list_head *pagepool,
-                          struct address_space *mc,
-                          gfp_t gfp)
-{
-       /* determined at compile time to avoid too many #ifdefs */
-       const bool nocache = __builtin_constant_p(mc) ? !mc : false;
-       const pgoff_t index = grp->obj.index;
-       bool tocache = false;
-
-       struct address_space *mapping;
-       struct page *oldpage, *page;
-
-       compressed_page_t t;
-       int justfound;
-
-repeat:
-       page = READ_ONCE(grp->compressed_pages[nr]);
-       oldpage = page;
-
-       if (!page)
-               goto out_allocpage;
-
-       /*
-        * the cached page has not been allocated and
-        * an placeholder is out there, prepare it now.
-        */
-       if (!nocache && page == PAGE_UNALLOCATED) {
-               tocache = true;
-               goto out_allocpage;
-       }
-
-       /* process the target tagged pointer */
-       t = tagptr_init(compressed_page_t, page);
-       justfound = tagptr_unfold_tags(t);
-       page = tagptr_unfold_ptr(t);
-
-       mapping = READ_ONCE(page->mapping);
-
-       /*
-        * if managed cache is disabled, it's no way to
-        * get such a cached-like page.
-        */
-       if (nocache) {
-               /* if managed cache is disabled, it is impossible `justfound' */
-               DBG_BUGON(justfound);
-
-               /* and it should be locked, not uptodate, and not truncated */
-               DBG_BUGON(!PageLocked(page));
-               DBG_BUGON(PageUptodate(page));
-               DBG_BUGON(!mapping);
-               goto out;
-       }
-
-       /*
-        * unmanaged (file) pages are all locked solidly,
-        * therefore it is impossible for `mapping' to be NULL.
-        */
-       if (mapping && mapping != mc)
-               /* ought to be unmanaged pages */
-               goto out;
-
-       lock_page(page);
-
-       /* only true if page reclaim goes wrong, should never happen */
-       DBG_BUGON(justfound && PagePrivate(page));
-
-       /* the page is still in manage cache */
-       if (page->mapping == mc) {
-               WRITE_ONCE(grp->compressed_pages[nr], page);
-
-               ClearPageError(page);
-               if (!PagePrivate(page)) {
-                       /*
-                        * impossible to be !PagePrivate(page) for
-                        * the current restriction as well if
-                        * the page is already in compressed_pages[].
-                        */
-                       DBG_BUGON(!justfound);
-
-                       justfound = 0;
-                       set_page_private(page, (unsigned long)grp);
-                       SetPagePrivate(page);
-               }
-
-               /* no need to submit io if it is already up-to-date */
-               if (PageUptodate(page)) {
-                       unlock_page(page);
-                       page = NULL;
-               }
-               goto out;
-       }
-
-       /*
-        * the managed page has been truncated, it's unsafe to
-        * reuse this one, let's allocate a new cache-managed page.
-        */
-       DBG_BUGON(page->mapping);
-       DBG_BUGON(!justfound);
-
-       tocache = true;
-       unlock_page(page);
-       put_page(page);
-out_allocpage:
-       page = __stagingpage_alloc(pagepool, gfp);
-       if (oldpage != cmpxchg(&grp->compressed_pages[nr], oldpage, page)) {
-               list_add(&page->lru, pagepool);
-               cpu_relax();
-               goto repeat;
-       }
-       if (nocache || !tocache)
-               goto out;
-       if (add_to_page_cache_lru(page, mc, index + nr, gfp)) {
-               page->mapping = Z_EROFS_MAPPING_STAGING;
-               goto out;
-       }
-
-       set_page_private(page, (unsigned long)grp);
-       SetPagePrivate(page);
-out:   /* the only exit (for tracing and debugging) */
-       return page;
-}
-
-static struct z_erofs_vle_unzip_io *
-jobqueue_init(struct super_block *sb,
-             struct z_erofs_vle_unzip_io *io,
-             bool foreground)
-{
-       struct z_erofs_vle_unzip_io_sb *iosb;
-
-       if (foreground) {
-               /* waitqueue available for foreground io */
-               DBG_BUGON(!io);
-
-               init_waitqueue_head(&io->u.wait);
-               atomic_set(&io->pending_bios, 0);
-               goto out;
-       }
-
-       iosb = kvzalloc(sizeof(*iosb), GFP_KERNEL | __GFP_NOFAIL);
-       DBG_BUGON(!iosb);
-
-       /* initialize fields in the allocated descriptor */
-       io = &iosb->io;
-       iosb->sb = sb;
-       INIT_WORK(&io->u.work, z_erofs_vle_unzip_wq);
-out:
-       io->head = Z_EROFS_VLE_WORKGRP_TAIL_CLOSED;
-       return io;
-}
-
-/* define workgroup jobqueue types */
-enum {
-#ifdef EROFS_FS_HAS_MANAGED_CACHE
-       JQ_BYPASS,
-#endif
-       JQ_SUBMIT,
-       NR_JOBQUEUES,
-};
-
-static void *jobqueueset_init(struct super_block *sb,
-                             z_erofs_vle_owned_workgrp_t qtail[],
-                             struct z_erofs_vle_unzip_io *q[],
-                             struct z_erofs_vle_unzip_io *fgq,
-                             bool forcefg)
-{
-#ifdef EROFS_FS_HAS_MANAGED_CACHE
-       /*
-        * if managed cache is enabled, bypass jobqueue is needed,
-        * no need to read from device for all workgroups in this queue.
-        */
-       q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, true);
-       qtail[JQ_BYPASS] = &q[JQ_BYPASS]->head;
-#endif
-
-       q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, forcefg);
-       qtail[JQ_SUBMIT] = &q[JQ_SUBMIT]->head;
-
-       return tagptr_cast_ptr(tagptr_fold(tagptr1_t, q[JQ_SUBMIT], !forcefg));
-}
-
-#ifdef EROFS_FS_HAS_MANAGED_CACHE
-static void move_to_bypass_jobqueue(struct z_erofs_vle_workgroup *grp,
-                                   z_erofs_vle_owned_workgrp_t qtail[],
-                                   z_erofs_vle_owned_workgrp_t owned_head)
-{
-       z_erofs_vle_owned_workgrp_t *const submit_qtail = qtail[JQ_SUBMIT];
-       z_erofs_vle_owned_workgrp_t *const bypass_qtail = qtail[JQ_BYPASS];
-
-       DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
-       if (owned_head == Z_EROFS_VLE_WORKGRP_TAIL)
-               owned_head = Z_EROFS_VLE_WORKGRP_TAIL_CLOSED;
-
-       WRITE_ONCE(grp->next, Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
-
-       WRITE_ONCE(*submit_qtail, owned_head);
-       WRITE_ONCE(*bypass_qtail, &grp->next);
-
-       qtail[JQ_BYPASS] = &grp->next;
-}
-
-static bool postsubmit_is_all_bypassed(struct z_erofs_vle_unzip_io *q[],
-                                      unsigned int nr_bios,
-                                      bool force_fg)
-{
-       /*
-        * although background is preferred, no one is pending for submission.
-        * don't issue workqueue for decompression but drop it directly instead.
-        */
-       if (force_fg || nr_bios)
-               return false;
-
-       kvfree(container_of(q[JQ_SUBMIT],
-                           struct z_erofs_vle_unzip_io_sb,
-                           io));
-       return true;
-}
-#else
-static void move_to_bypass_jobqueue(struct z_erofs_vle_workgroup *grp,
-                                   z_erofs_vle_owned_workgrp_t qtail[],
-                                   z_erofs_vle_owned_workgrp_t owned_head)
-{
-       /* impossible to bypass submission for managed cache disabled */
-       DBG_BUGON(1);
-}
-
-static bool postsubmit_is_all_bypassed(struct z_erofs_vle_unzip_io *q[],
-                                      unsigned int nr_bios,
-                                      bool force_fg)
-{
-       /* bios should be >0 if managed cache is disabled */
-       DBG_BUGON(!nr_bios);
-       return false;
-}
-#endif
-
-static bool z_erofs_vle_submit_all(struct super_block *sb,
-                                  z_erofs_vle_owned_workgrp_t owned_head,
-                                  struct list_head *pagepool,
-                                  struct z_erofs_vle_unzip_io *fgq,
-                                  bool force_fg)
-{
-       struct erofs_sb_info *const sbi = EROFS_SB(sb);
-       const unsigned int clusterpages = erofs_clusterpages(sbi);
-       const gfp_t gfp = GFP_NOFS;
-
-       z_erofs_vle_owned_workgrp_t qtail[NR_JOBQUEUES];
-       struct z_erofs_vle_unzip_io *q[NR_JOBQUEUES];
-       struct bio *bio;
-       void *bi_private;
-       /* since bio will be NULL, no need to initialize last_index */
-       pgoff_t uninitialized_var(last_index);
-       bool force_submit = false;
-       unsigned int nr_bios;
-
-       if (unlikely(owned_head == Z_EROFS_VLE_WORKGRP_TAIL))
-               return false;
-
-       force_submit = false;
-       bio = NULL;
-       nr_bios = 0;
-       bi_private = jobqueueset_init(sb, qtail, q, fgq, force_fg);
-
-       /* by default, all need io submission */
-       q[JQ_SUBMIT]->head = owned_head;
-
-       do {
-               struct z_erofs_vle_workgroup *grp;
-               pgoff_t first_index;
-               struct page *page;
-               unsigned int i = 0, bypass = 0;
-               int err;
-
-               /* no possible 'owned_head' equals the following */
-               DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
-               DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_NIL);
-
-               grp = container_of(owned_head,
-                                  struct z_erofs_vle_workgroup, next);
-
-               /* close the main owned chain at first */
-               owned_head = cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_TAIL,
-                                    Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
-
-               first_index = grp->obj.index;
-               force_submit |= (first_index != last_index + 1);
-
-repeat:
-               page = pickup_page_for_submission(grp, i, pagepool,
-                                                 MNGD_MAPPING(sbi), gfp);
-               if (!page) {
-                       force_submit = true;
-                       ++bypass;
-                       goto skippage;
-               }
-
-               if (bio && force_submit) {
-submit_bio_retry:
-                       __submit_bio(bio, REQ_OP_READ, 0);
-                       bio = NULL;
-               }
-
-               if (!bio) {
-                       bio = erofs_grab_bio(sb, first_index + i,
-                                            BIO_MAX_PAGES, bi_private,
-                                            z_erofs_vle_read_endio, true);
-                       ++nr_bios;
-               }
-
-               err = bio_add_page(bio, page, PAGE_SIZE, 0);
-               if (err < PAGE_SIZE)
-                       goto submit_bio_retry;
-
-               force_submit = false;
-               last_index = first_index + i;
-skippage:
-               if (++i < clusterpages)
-                       goto repeat;
-
-               if (bypass < clusterpages)
-                       qtail[JQ_SUBMIT] = &grp->next;
-               else
-                       move_to_bypass_jobqueue(grp, qtail, owned_head);
-       } while (owned_head != Z_EROFS_VLE_WORKGRP_TAIL);
-
-       if (bio)
-               __submit_bio(bio, REQ_OP_READ, 0);
-
-       if (postsubmit_is_all_bypassed(q, nr_bios, force_fg))
-               return true;
-
-       z_erofs_vle_unzip_kickoff(bi_private, nr_bios);
-       return true;
-}
-
-static void z_erofs_submit_and_unzip(struct z_erofs_vle_frontend *f,
-                                    struct list_head *pagepool,
-                                    bool force_fg)
-{
-       struct super_block *sb = f->inode->i_sb;
-       struct z_erofs_vle_unzip_io io[NR_JOBQUEUES];
-
-       if (!z_erofs_vle_submit_all(sb, f->owned_head, pagepool, io, force_fg))
-               return;
-
-#ifdef EROFS_FS_HAS_MANAGED_CACHE
-       z_erofs_vle_unzip_all(sb, &io[JQ_BYPASS], pagepool);
-#endif
-       if (!force_fg)
-               return;
-
-       /* wait until all bios are completed */
-       wait_event(io[JQ_SUBMIT].u.wait,
-                  !atomic_read(&io[JQ_SUBMIT].pending_bios));
-
-       /* let's synchronous decompression */
-       z_erofs_vle_unzip_all(sb, &io[JQ_SUBMIT], pagepool);
-}
-
-static int z_erofs_vle_normalaccess_readpage(struct file *file,
-                                            struct page *page)
-{
-       struct inode *const inode = page->mapping->host;
-       struct z_erofs_vle_frontend f = VLE_FRONTEND_INIT(inode);
-       int err;
-       LIST_HEAD(pagepool);
-
-       trace_erofs_readpage(page, false);
-
-       f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
-
-       err = z_erofs_do_read_page(&f, page, &pagepool);
-       (void)z_erofs_vle_work_iter_end(&f.builder);
-
-       if (err) {
-               errln("%s, failed to read, err [%d]", __func__, err);
-               goto out;
-       }
-
-       z_erofs_submit_and_unzip(&f, &pagepool, true);
-out:
-       if (f.map.mpage)
-               put_page(f.map.mpage);
-
-       /* clean up the remaining free pages */
-       put_pages_list(&pagepool);
-       return 0;
-}
-
-static int z_erofs_vle_normalaccess_readpages(struct file *filp,
-                                             struct address_space *mapping,
-                                             struct list_head *pages,
-                                             unsigned int nr_pages)
-{
-       struct inode *const inode = mapping->host;
-       struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
-
-       bool sync = __should_decompress_synchronously(sbi, nr_pages);
-       struct z_erofs_vle_frontend f = VLE_FRONTEND_INIT(inode);
-       gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
-       struct page *head = NULL;
-       LIST_HEAD(pagepool);
-
-       trace_erofs_readpages(mapping->host, lru_to_page(pages),
-                             nr_pages, false);
-
-       f.headoffset = (erofs_off_t)lru_to_page(pages)->index << PAGE_SHIFT;
-
-       for (; nr_pages; --nr_pages) {
-               struct page *page = lru_to_page(pages);
-
-               prefetchw(&page->flags);
-               list_del(&page->lru);
-
-               /*
-                * A pure asynchronous readahead is indicated if
-                * a PG_readahead marked page is hitted at first.
-                * Let's also do asynchronous decompression for this case.
-                */
-               sync &= !(PageReadahead(page) && !head);
-
-               if (add_to_page_cache_lru(page, mapping, page->index, gfp)) {
-                       list_add(&page->lru, &pagepool);
-                       continue;
-               }
-
-               set_page_private(page, (unsigned long)head);
-               head = page;
-       }
-
-       while (head) {
-               struct page *page = head;
-               int err;
-
-               /* traversal in reverse order */
-               head = (void *)page_private(page);
-
-               err = z_erofs_do_read_page(&f, page, &pagepool);
-               if (err) {
-                       struct erofs_vnode *vi = EROFS_V(inode);
-
-                       errln("%s, readahead error at page %lu of nid %llu",
-                             __func__, page->index, vi->nid);
-               }
-
-               put_page(page);
-       }
-
-       (void)z_erofs_vle_work_iter_end(&f.builder);
-
-       z_erofs_submit_and_unzip(&f, &pagepool, sync);
-
-       if (f.map.mpage)
-               put_page(f.map.mpage);
-
-       /* clean up the remaining free pages */
-       put_pages_list(&pagepool);
-       return 0;
-}
-
-const struct address_space_operations z_erofs_vle_normalaccess_aops = {
-       .readpage = z_erofs_vle_normalaccess_readpage,
-       .readpages = z_erofs_vle_normalaccess_readpages,
-};
-
diff --git a/drivers/staging/erofs/unzip_vle.h b/drivers/staging/erofs/unzip_vle.h
deleted file mode 100644 (file)
index d92515c..0000000
+++ /dev/null
@@ -1,192 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * linux/drivers/staging/erofs/unzip_vle.h
- *
- * Copyright (C) 2018 HUAWEI, Inc.
- *             http://www.huawei.com/
- * Created by Gao Xiang <gaoxiang25@huawei.com>
- */
-#ifndef __EROFS_FS_UNZIP_VLE_H
-#define __EROFS_FS_UNZIP_VLE_H
-
-#include "internal.h"
-#include "unzip_pagevec.h"
-
-#define Z_EROFS_NR_INLINE_PAGEVECS      3
-
-/*
- * Structure fields follow one of the following exclusion rules.
- *
- * I: Modifiable by initialization/destruction paths and read-only
- *    for everyone else.
- *
- */
-
-struct z_erofs_vle_work {
-       struct mutex lock;
-
-       /* I: decompression offset in page */
-       unsigned short pageofs;
-       unsigned short nr_pages;
-
-       /* L: queued pages in pagevec[] */
-       unsigned int vcnt;
-
-       union {
-               /* L: pagevec */
-               erofs_vtptr_t pagevec[Z_EROFS_NR_INLINE_PAGEVECS];
-               struct rcu_head rcu;
-       };
-};
-
-#define Z_EROFS_VLE_WORKGRP_FMT_PLAIN        0
-#define Z_EROFS_VLE_WORKGRP_FMT_LZ4          1
-#define Z_EROFS_VLE_WORKGRP_FMT_MASK         1
-#define Z_EROFS_VLE_WORKGRP_FULL_LENGTH      2
-
-typedef void *z_erofs_vle_owned_workgrp_t;
-
-struct z_erofs_vle_workgroup {
-       struct erofs_workgroup obj;
-       struct z_erofs_vle_work work;
-
-       /* point to next owned_workgrp_t */
-       z_erofs_vle_owned_workgrp_t next;
-
-       /* compressed pages (including multi-usage pages) */
-       struct page *compressed_pages[Z_EROFS_CLUSTER_MAX_PAGES];
-       unsigned int llen, flags;
-};
-
-/* let's avoid the valid 32-bit kernel addresses */
-
-/* the chained workgroup has't submitted io (still open) */
-#define Z_EROFS_VLE_WORKGRP_TAIL        ((void *)0x5F0ECAFE)
-/* the chained workgroup has already submitted io */
-#define Z_EROFS_VLE_WORKGRP_TAIL_CLOSED ((void *)0x5F0EDEAD)
-
-#define Z_EROFS_VLE_WORKGRP_NIL         (NULL)
-
-#define z_erofs_vle_workgrp_fmt(grp)   \
-       ((grp)->flags & Z_EROFS_VLE_WORKGRP_FMT_MASK)
-
-static inline void z_erofs_vle_set_workgrp_fmt(
-       struct z_erofs_vle_workgroup *grp,
-       unsigned int fmt)
-{
-       grp->flags = fmt | (grp->flags & ~Z_EROFS_VLE_WORKGRP_FMT_MASK);
-}
-
-
-/* definitions if multiref is disabled */
-#define z_erofs_vle_grab_primary_work(grp)     (&(grp)->work)
-#define z_erofs_vle_grab_work(grp, pageofs)    (&(grp)->work)
-#define z_erofs_vle_work_workgroup(wrk, primary)       \
-       ((primary) ? container_of(wrk,  \
-               struct z_erofs_vle_workgroup, work) : \
-               ({ BUG(); (void *)NULL; }))
-
-
-#define Z_EROFS_WORKGROUP_SIZE       sizeof(struct z_erofs_vle_workgroup)
-
-struct z_erofs_vle_unzip_io {
-       atomic_t pending_bios;
-       z_erofs_vle_owned_workgrp_t head;
-
-       union {
-               wait_queue_head_t wait;
-               struct work_struct work;
-       } u;
-};
-
-struct z_erofs_vle_unzip_io_sb {
-       struct z_erofs_vle_unzip_io io;
-       struct super_block *sb;
-};
-
-#define Z_EROFS_ONLINEPAGE_COUNT_BITS 2
-#define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1)
-#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT  (Z_EROFS_ONLINEPAGE_COUNT_BITS)
-
-/*
- * waiters (aka. ongoing_packs): # to unlock the page
- * sub-index: 0 - for partial page, >= 1 full page sub-index
- */
-typedef atomic_t z_erofs_onlinepage_t;
-
-/* type punning */
-union z_erofs_onlinepage_converter {
-       z_erofs_onlinepage_t *o;
-       unsigned long *v;
-};
-
-static inline unsigned int z_erofs_onlinepage_index(struct page *page)
-{
-       union z_erofs_onlinepage_converter u;
-
-       DBG_BUGON(!PagePrivate(page));
-       u.v = &page_private(page);
-
-       return atomic_read(u.o) >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
-}
-
-static inline void z_erofs_onlinepage_init(struct page *page)
-{
-       union {
-               z_erofs_onlinepage_t o;
-               unsigned long v;
-       /* keep from being unlocked in advance */
-       } u = { .o = ATOMIC_INIT(1) };
-
-       set_page_private(page, u.v);
-       smp_wmb();
-       SetPagePrivate(page);
-}
-
-static inline void z_erofs_onlinepage_fixup(struct page *page,
-       uintptr_t index, bool down)
-{
-       unsigned long *p, o, v, id;
-repeat:
-       p = &page_private(page);
-       o = READ_ONCE(*p);
-
-       id = o >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
-       if (id) {
-               if (!index)
-                       return;
-
-               DBG_BUGON(id != index);
-       }
-
-       v = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) |
-               ((o & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down);
-       if (cmpxchg(p, o, v) != o)
-               goto repeat;
-}
-
-static inline void z_erofs_onlinepage_endio(struct page *page)
-{
-       union z_erofs_onlinepage_converter u;
-       unsigned int v;
-
-       DBG_BUGON(!PagePrivate(page));
-       u.v = &page_private(page);
-
-       v = atomic_dec_return(u.o);
-       if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) {
-               ClearPagePrivate(page);
-               if (!PageError(page))
-                       SetPageUptodate(page);
-               unlock_page(page);
-       }
-
-       debugln("%s, page %p value %x", __func__, page, atomic_read(u.o));
-}
-
-#define Z_EROFS_VLE_VMAP_ONSTACK_PAGES \
-       min_t(unsigned int, THREAD_SIZE / 8 / sizeof(struct page *), 96U)
-#define Z_EROFS_VLE_VMAP_GLOBAL_PAGES  2048
-
-#endif
-
diff --git a/drivers/staging/erofs/zdata.c b/drivers/staging/erofs/zdata.c
new file mode 100644 (file)
index 0000000..f766762
--- /dev/null
@@ -0,0 +1,1587 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * linux/drivers/staging/erofs/zdata.c
+ *
+ * Copyright (C) 2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#include "zdata.h"
+#include "compress.h"
+#include <linux/prefetch.h>
+
+#include <trace/events/erofs.h>
+
+/*
+ * a compressed_pages[] placeholder in order to avoid
+ * being filled with file pages for in-place decompression.
+ */
+#define PAGE_UNALLOCATED     ((void *)0x5F0E4B1D)
+
+/* how to allocate cached pages for a workgroup */
+enum z_erofs_cache_alloctype {
+       DONTALLOC,      /* don't allocate any cached pages */
+       DELAYEDALLOC,   /* delayed allocation (at the time of submitting io) */
+};
+
+/*
+ * tagged pointer with 1-bit tag for all compressed pages
+ * tag 0 - the page is just found with an extra page reference
+ */
+typedef tagptr1_t compressed_page_t;
+
+#define tag_compressed_page_justfound(page) \
+       tagptr_fold(compressed_page_t, page, 1)
+
+static struct workqueue_struct *z_erofs_workqueue __read_mostly;
+static struct kmem_cache *z_erofs_workgroup_cachep __read_mostly;
+
+void z_erofs_exit_zip_subsystem(void)
+{
+       destroy_workqueue(z_erofs_workqueue);
+       kmem_cache_destroy(z_erofs_workgroup_cachep);
+}
+
+static inline int init_unzip_workqueue(void)
+{
+       const unsigned int onlinecpus = num_possible_cpus();
+
+       /*
+        * we don't need too many threads, limiting threads
+        * could improve scheduling performance.
+        */
+       z_erofs_workqueue =
+               alloc_workqueue("erofs_unzipd",
+                               WQ_UNBOUND | WQ_HIGHPRI | WQ_CPU_INTENSIVE,
+                               onlinecpus + onlinecpus / 4);
+
+       return z_erofs_workqueue ? 0 : -ENOMEM;
+}
+
+static void init_once(void *ptr)
+{
+       struct z_erofs_vle_workgroup *grp = ptr;
+       struct z_erofs_vle_work *const work =
+               z_erofs_vle_grab_primary_work(grp);
+       unsigned int i;
+
+       mutex_init(&work->lock);
+       work->nr_pages = 0;
+       work->vcnt = 0;
+       for (i = 0; i < Z_EROFS_CLUSTER_MAX_PAGES; ++i)
+               grp->compressed_pages[i] = NULL;
+}
+
+static void init_always(struct z_erofs_vle_workgroup *grp)
+{
+       struct z_erofs_vle_work *const work =
+               z_erofs_vle_grab_primary_work(grp);
+
+       atomic_set(&grp->obj.refcount, 1);
+       grp->flags = 0;
+
+       DBG_BUGON(work->nr_pages);
+       DBG_BUGON(work->vcnt);
+}
+
+int __init z_erofs_init_zip_subsystem(void)
+{
+       z_erofs_workgroup_cachep =
+               kmem_cache_create("erofs_compress",
+                                 Z_EROFS_WORKGROUP_SIZE, 0,
+                                 SLAB_RECLAIM_ACCOUNT, init_once);
+
+       if (z_erofs_workgroup_cachep) {
+               if (!init_unzip_workqueue())
+                       return 0;
+
+               kmem_cache_destroy(z_erofs_workgroup_cachep);
+       }
+       return -ENOMEM;
+}
+
+enum z_erofs_vle_work_role {
+       Z_EROFS_VLE_WORK_SECONDARY,
+       Z_EROFS_VLE_WORK_PRIMARY,
+       /*
+        * The current work was the tail of an exist chain, and the previous
+        * processed chained works are all decided to be hooked up to it.
+        * A new chain should be created for the remaining unprocessed works,
+        * therefore different from Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED,
+        * the next work cannot reuse the whole page in the following scenario:
+        *  ________________________________________________________________
+        * |      tail (partial) page     |       head (partial) page       |
+        * |  (belongs to the next work)  |  (belongs to the current work)  |
+        * |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________|
+        */
+       Z_EROFS_VLE_WORK_PRIMARY_HOOKED,
+       /*
+        * The current work has been linked with the processed chained works,
+        * and could be also linked with the potential remaining works, which
+        * means if the processing page is the tail partial page of the work,
+        * the current work can safely use the whole page (since the next work
+        * is under control) for in-place decompression, as illustrated below:
+        *  ________________________________________________________________
+        * |  tail (partial) page  |          head (partial) page           |
+        * | (of the current work) |         (of the previous work)         |
+        * |  PRIMARY_FOLLOWED or  |                                        |
+        * |_____PRIMARY_HOOKED____|____________PRIMARY_FOLLOWED____________|
+        *
+        * [  (*) the above page can be used for the current work itself.  ]
+        */
+       Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED,
+       Z_EROFS_VLE_WORK_MAX
+};
+
+struct z_erofs_vle_work_builder {
+       enum z_erofs_vle_work_role role;
+       /*
+        * 'hosted = false' means that the current workgroup doesn't belong to
+        * the owned chained workgroups. In the other words, it is none of our
+        * business to submit this workgroup.
+        */
+       bool hosted;
+
+       struct z_erofs_vle_workgroup *grp;
+       struct z_erofs_vle_work *work;
+       struct z_erofs_pagevec_ctor vector;
+
+       /* pages used for reading the compressed data */
+       struct page **compressed_pages;
+       unsigned int compressed_deficit;
+};
+
+#define VLE_WORK_BUILDER_INIT()        \
+       { .work = NULL, .role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED }
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+static void preload_compressed_pages(struct z_erofs_vle_work_builder *bl,
+                                    struct address_space *mc,
+                                    pgoff_t index,
+                                    unsigned int clusterpages,
+                                    enum z_erofs_cache_alloctype type,
+                                    struct list_head *pagepool,
+                                    gfp_t gfp)
+{
+       struct page **const pages = bl->compressed_pages;
+       const unsigned int remaining = bl->compressed_deficit;
+       bool standalone = true;
+       unsigned int i, j = 0;
+
+       if (bl->role < Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED)
+               return;
+
+       gfp = mapping_gfp_constraint(mc, gfp) & ~__GFP_RECLAIM;
+
+       index += clusterpages - remaining;
+
+       for (i = 0; i < remaining; ++i) {
+               struct page *page;
+               compressed_page_t t;
+
+               /* the compressed page was loaded before */
+               if (READ_ONCE(pages[i]))
+                       continue;
+
+               page = find_get_page(mc, index + i);
+
+               if (page) {
+                       t = tag_compressed_page_justfound(page);
+               } else if (type == DELAYEDALLOC) {
+                       t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED);
+               } else {        /* DONTALLOC */
+                       if (standalone)
+                               j = i;
+                       standalone = false;
+                       continue;
+               }
+
+               if (!cmpxchg_relaxed(&pages[i], NULL, tagptr_cast_ptr(t)))
+                       continue;
+
+               if (page)
+                       put_page(page);
+       }
+       bl->compressed_pages += j;
+       bl->compressed_deficit = remaining - j;
+
+       if (standalone)
+               bl->role = Z_EROFS_VLE_WORK_PRIMARY;
+}
+
+/* called by erofs_shrinker to get rid of all compressed_pages */
+int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
+                                      struct erofs_workgroup *egrp)
+{
+       struct z_erofs_vle_workgroup *const grp =
+               container_of(egrp, struct z_erofs_vle_workgroup, obj);
+       struct address_space *const mapping = MNGD_MAPPING(sbi);
+       const int clusterpages = erofs_clusterpages(sbi);
+       int i;
+
+       /*
+        * refcount of workgroup is now freezed as 1,
+        * therefore no need to worry about available decompression users.
+        */
+       for (i = 0; i < clusterpages; ++i) {
+               struct page *page = grp->compressed_pages[i];
+
+               if (!page || page->mapping != mapping)
+                       continue;
+
+               /* block other users from reclaiming or migrating the page */
+               if (!trylock_page(page))
+                       return -EBUSY;
+
+               /* barrier is implied in the following 'unlock_page' */
+               WRITE_ONCE(grp->compressed_pages[i], NULL);
+
+               set_page_private(page, 0);
+               ClearPagePrivate(page);
+
+               unlock_page(page);
+               put_page(page);
+       }
+       return 0;
+}
+
+int erofs_try_to_free_cached_page(struct address_space *mapping,
+                                 struct page *page)
+{
+       struct erofs_sb_info *const sbi = EROFS_SB(mapping->host->i_sb);
+       const unsigned int clusterpages = erofs_clusterpages(sbi);
+       struct z_erofs_vle_workgroup *const grp = (void *)page_private(page);
+       int ret = 0;    /* 0 - busy */
+
+       if (erofs_workgroup_try_to_freeze(&grp->obj, 1)) {
+               unsigned int i;
+
+               for (i = 0; i < clusterpages; ++i) {
+                       if (grp->compressed_pages[i] == page) {
+                               WRITE_ONCE(grp->compressed_pages[i], NULL);
+                               ret = 1;
+                               break;
+                       }
+               }
+               erofs_workgroup_unfreeze(&grp->obj, 1);
+
+               if (ret) {
+                       ClearPagePrivate(page);
+                       put_page(page);
+               }
+       }
+       return ret;
+}
+#else
+static void preload_compressed_pages(struct z_erofs_vle_work_builder *bl,
+                                    struct address_space *mc,
+                                    pgoff_t index,
+                                    unsigned int clusterpages,
+                                    enum z_erofs_cache_alloctype type,
+                                    struct list_head *pagepool,
+                                    gfp_t gfp)
+{
+       /* nowhere to load compressed pages from */
+}
+#endif
+
+/* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */
+static inline bool try_to_reuse_as_compressed_page(
+       struct z_erofs_vle_work_builder *b,
+       struct page *page)
+{
+       while (b->compressed_deficit) {
+               --b->compressed_deficit;
+               if (!cmpxchg(b->compressed_pages++, NULL, page))
+                       return true;
+       }
+
+       return false;
+}
+
+/* callers must be with work->lock held */
+static int z_erofs_vle_work_add_page(
+       struct z_erofs_vle_work_builder *builder,
+       struct page *page,
+       enum z_erofs_page_type type)
+{
+       int ret;
+       bool occupied;
+
+       /* give priority for the compressed data storage */
+       if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY &&
+           type == Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
+           try_to_reuse_as_compressed_page(builder, page))
+               return 0;
+
+       ret = z_erofs_pagevec_ctor_enqueue(&builder->vector,
+                                          page, type, &occupied);
+       builder->work->vcnt += (unsigned int)ret;
+
+       return ret ? 0 : -EAGAIN;
+}
+
+static enum z_erofs_vle_work_role
+try_to_claim_workgroup(struct z_erofs_vle_workgroup *grp,
+                      z_erofs_vle_owned_workgrp_t *owned_head,
+                      bool *hosted)
+{
+       DBG_BUGON(*hosted);
+
+       /* let's claim these following types of workgroup */
+retry:
+       if (grp->next == Z_EROFS_VLE_WORKGRP_NIL) {
+               /* type 1, nil workgroup */
+               if (cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_NIL,
+                           *owned_head) != Z_EROFS_VLE_WORKGRP_NIL)
+                       goto retry;
+
+               *owned_head = &grp->next;
+               *hosted = true;
+               /* lucky, I am the followee :) */
+               return Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED;
+
+       } else if (grp->next == Z_EROFS_VLE_WORKGRP_TAIL) {
+               /*
+                * type 2, link to the end of a existing open chain,
+                * be careful that its submission itself is governed
+                * by the original owned chain.
+                */
+               if (cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_TAIL,
+                           *owned_head) != Z_EROFS_VLE_WORKGRP_TAIL)
+                       goto retry;
+               *owned_head = Z_EROFS_VLE_WORKGRP_TAIL;
+               return Z_EROFS_VLE_WORK_PRIMARY_HOOKED;
+       }
+
+       return Z_EROFS_VLE_WORK_PRIMARY; /* :( better luck next time */
+}
+
+struct z_erofs_vle_work_finder {
+       struct super_block *sb;
+       pgoff_t idx;
+       unsigned int pageofs;
+
+       struct z_erofs_vle_workgroup **grp_ret;
+       enum z_erofs_vle_work_role *role;
+       z_erofs_vle_owned_workgrp_t *owned_head;
+       bool *hosted;
+};
+
+static struct z_erofs_vle_work *
+z_erofs_vle_work_lookup(const struct z_erofs_vle_work_finder *f)
+{
+       bool tag, primary;
+       struct erofs_workgroup *egrp;
+       struct z_erofs_vle_workgroup *grp;
+       struct z_erofs_vle_work *work;
+
+       egrp = erofs_find_workgroup(f->sb, f->idx, &tag);
+       if (!egrp) {
+               *f->grp_ret = NULL;
+               return NULL;
+       }
+
+       grp = container_of(egrp, struct z_erofs_vle_workgroup, obj);
+       *f->grp_ret = grp;
+
+       work = z_erofs_vle_grab_work(grp, f->pageofs);
+       /* if multiref is disabled, `primary' is always true */
+       primary = true;
+
+       DBG_BUGON(work->pageofs != f->pageofs);
+
+       /*
+        * lock must be taken first to avoid grp->next == NIL between
+        * claiming workgroup and adding pages:
+        *                        grp->next != NIL
+        *   grp->next = NIL
+        *   mutex_unlock_all
+        *                        mutex_lock(&work->lock)
+        *                        add all pages to pagevec
+        *
+        * [correct locking case 1]:
+        *   mutex_lock(grp->work[a])
+        *   ...
+        *   mutex_lock(grp->work[b])     mutex_lock(grp->work[c])
+        *   ...                          *role = SECONDARY
+        *                                add all pages to pagevec
+        *                                ...
+        *                                mutex_unlock(grp->work[c])
+        *   mutex_lock(grp->work[c])
+        *   ...
+        *   grp->next = NIL
+        *   mutex_unlock_all
+        *
+        * [correct locking case 2]:
+        *   mutex_lock(grp->work[b])
+        *   ...
+        *   mutex_lock(grp->work[a])
+        *   ...
+        *   mutex_lock(grp->work[c])
+        *   ...
+        *   grp->next = NIL
+        *   mutex_unlock_all
+        *                                mutex_lock(grp->work[a])
+        *                                *role = PRIMARY_OWNER
+        *                                add all pages to pagevec
+        *                                ...
+        */
+       mutex_lock(&work->lock);
+
+       *f->hosted = false;
+       if (!primary)
+               *f->role = Z_EROFS_VLE_WORK_SECONDARY;
+       else    /* claim the workgroup if possible */
+               *f->role = try_to_claim_workgroup(grp, f->owned_head,
+                                                 f->hosted);
+       return work;
+}
+
+static struct z_erofs_vle_work *
+z_erofs_vle_work_register(const struct z_erofs_vle_work_finder *f,
+                         struct erofs_map_blocks *map)
+{
+       bool gnew = false;
+       struct z_erofs_vle_workgroup *grp = *f->grp_ret;
+       struct z_erofs_vle_work *work;
+
+       /* if multiref is disabled, grp should never be nullptr */
+       if (unlikely(grp)) {
+               DBG_BUGON(1);
+               return ERR_PTR(-EINVAL);
+       }
+
+       /* no available workgroup, let's allocate one */
+       grp = kmem_cache_alloc(z_erofs_workgroup_cachep, GFP_NOFS);
+       if (unlikely(!grp))
+               return ERR_PTR(-ENOMEM);
+
+       init_always(grp);
+       grp->obj.index = f->idx;
+       grp->llen = map->m_llen;
+
+       z_erofs_vle_set_workgrp_fmt(grp, (map->m_flags & EROFS_MAP_ZIPPED) ?
+                                   Z_EROFS_VLE_WORKGRP_FMT_LZ4 :
+                                   Z_EROFS_VLE_WORKGRP_FMT_PLAIN);
+
+       if (map->m_flags & EROFS_MAP_FULL_MAPPED)
+               grp->flags |= Z_EROFS_VLE_WORKGRP_FULL_LENGTH;
+
+       /* new workgrps have been claimed as type 1 */
+       WRITE_ONCE(grp->next, *f->owned_head);
+       /* primary and followed work for all new workgrps */
+       *f->role = Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED;
+       /* it should be submitted by ourselves */
+       *f->hosted = true;
+
+       gnew = true;
+       work = z_erofs_vle_grab_primary_work(grp);
+       work->pageofs = f->pageofs;
+
+       /*
+        * lock all primary followed works before visible to others
+        * and mutex_trylock *never* fails for a new workgroup.
+        */
+       mutex_trylock(&work->lock);
+
+       if (gnew) {
+               int err = erofs_register_workgroup(f->sb, &grp->obj, 0);
+
+               if (err) {
+                       mutex_unlock(&work->lock);
+                       kmem_cache_free(z_erofs_workgroup_cachep, grp);
+                       return ERR_PTR(-EAGAIN);
+               }
+       }
+
+       *f->owned_head = &grp->next;
+       *f->grp_ret = grp;
+       return work;
+}
+
+#define builder_is_hooked(builder) \
+       ((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_HOOKED)
+
+#define builder_is_followed(builder) \
+       ((builder)->role >= Z_EROFS_VLE_WORK_PRIMARY_FOLLOWED)
+
+static int z_erofs_vle_work_iter_begin(struct z_erofs_vle_work_builder *builder,
+                                      struct super_block *sb,
+                                      struct erofs_map_blocks *map,
+                                      z_erofs_vle_owned_workgrp_t *owned_head)
+{
+       const unsigned int clusterpages = erofs_clusterpages(EROFS_SB(sb));
+       struct z_erofs_vle_workgroup *grp;
+       const struct z_erofs_vle_work_finder finder = {
+               .sb = sb,
+               .idx = erofs_blknr(map->m_pa),
+               .pageofs = map->m_la & ~PAGE_MASK,
+               .grp_ret = &grp,
+               .role = &builder->role,
+               .owned_head = owned_head,
+               .hosted = &builder->hosted
+       };
+       struct z_erofs_vle_work *work;
+
+       DBG_BUGON(builder->work);
+
+       /* must be Z_EROFS_WORK_TAIL or the next chained work */
+       DBG_BUGON(*owned_head == Z_EROFS_VLE_WORKGRP_NIL);
+       DBG_BUGON(*owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
+
+       DBG_BUGON(erofs_blkoff(map->m_pa));
+
+repeat:
+       work = z_erofs_vle_work_lookup(&finder);
+       if (work) {
+               unsigned int orig_llen;
+
+               /* increase workgroup `llen' if needed */
+               while ((orig_llen = READ_ONCE(grp->llen)) < map->m_llen &&
+                      orig_llen != cmpxchg_relaxed(&grp->llen,
+                                                   orig_llen, map->m_llen))
+                       cpu_relax();
+               goto got_it;
+       }
+
+       work = z_erofs_vle_work_register(&finder, map);
+       if (unlikely(work == ERR_PTR(-EAGAIN)))
+               goto repeat;
+
+       if (IS_ERR(work))
+               return PTR_ERR(work);
+got_it:
+       z_erofs_pagevec_ctor_init(&builder->vector, Z_EROFS_NR_INLINE_PAGEVECS,
+                                 work->pagevec, work->vcnt);
+
+       if (builder->role >= Z_EROFS_VLE_WORK_PRIMARY) {
+               /* enable possibly in-place decompression */
+               builder->compressed_pages = grp->compressed_pages;
+               builder->compressed_deficit = clusterpages;
+       } else {
+               builder->compressed_pages = NULL;
+               builder->compressed_deficit = 0;
+       }
+
+       builder->grp = grp;
+       builder->work = work;
+       return 0;
+}
+
+/*
+ * keep in mind that no referenced workgroups will be freed
+ * only after a RCU grace period, so rcu_read_lock() could
+ * prevent a workgroup from being freed.
+ */
+static void z_erofs_rcu_callback(struct rcu_head *head)
+{
+       struct z_erofs_vle_work *work = container_of(head,
+               struct z_erofs_vle_work, rcu);
+       struct z_erofs_vle_workgroup *grp =
+               z_erofs_vle_work_workgroup(work, true);
+
+       kmem_cache_free(z_erofs_workgroup_cachep, grp);
+}
+
+void erofs_workgroup_free_rcu(struct erofs_workgroup *grp)
+{
+       struct z_erofs_vle_workgroup *const vgrp = container_of(grp,
+               struct z_erofs_vle_workgroup, obj);
+       struct z_erofs_vle_work *const work = &vgrp->work;
+
+       call_rcu(&work->rcu, z_erofs_rcu_callback);
+}
+
+static void
+__z_erofs_vle_work_release(struct z_erofs_vle_workgroup *grp,
+                          struct z_erofs_vle_work *work __maybe_unused)
+{
+       erofs_workgroup_put(&grp->obj);
+}
+
+static void z_erofs_vle_work_release(struct z_erofs_vle_work *work)
+{
+       struct z_erofs_vle_workgroup *grp =
+               z_erofs_vle_work_workgroup(work, true);
+
+       __z_erofs_vle_work_release(grp, work);
+}
+
+static inline bool
+z_erofs_vle_work_iter_end(struct z_erofs_vle_work_builder *builder)
+{
+       struct z_erofs_vle_work *work = builder->work;
+
+       if (!work)
+               return false;
+
+       z_erofs_pagevec_ctor_exit(&builder->vector, false);
+       mutex_unlock(&work->lock);
+
+       /*
+        * if all pending pages are added, don't hold work reference
+        * any longer if the current work isn't hosted by ourselves.
+        */
+       if (!builder->hosted)
+               __z_erofs_vle_work_release(builder->grp, work);
+
+       builder->work = NULL;
+       builder->grp = NULL;
+       return true;
+}
+
+static inline struct page *__stagingpage_alloc(struct list_head *pagepool,
+                                              gfp_t gfp)
+{
+       struct page *page = erofs_allocpage(pagepool, gfp);
+
+       if (unlikely(!page))
+               return NULL;
+
+       page->mapping = Z_EROFS_MAPPING_STAGING;
+       return page;
+}
+
+struct z_erofs_vle_frontend {
+       struct inode *const inode;
+
+       struct z_erofs_vle_work_builder builder;
+       struct erofs_map_blocks map;
+
+       z_erofs_vle_owned_workgrp_t owned_head;
+
+       /* used for applying cache strategy on the fly */
+       bool backmost;
+       erofs_off_t headoffset;
+};
+
+#define VLE_FRONTEND_INIT(__i) { \
+       .inode = __i, \
+       .map = { \
+               .m_llen = 0, \
+               .m_plen = 0, \
+               .mpage = NULL \
+       }, \
+       .builder = VLE_WORK_BUILDER_INIT(), \
+       .owned_head = Z_EROFS_VLE_WORKGRP_TAIL, \
+       .backmost = true, }
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+static inline bool
+should_alloc_managed_pages(struct z_erofs_vle_frontend *fe, erofs_off_t la)
+{
+       if (fe->backmost)
+               return true;
+
+       if (EROFS_FS_ZIP_CACHE_LVL >= 2)
+               return la < fe->headoffset;
+
+       return false;
+}
+#else
+static inline bool
+should_alloc_managed_pages(struct z_erofs_vle_frontend *fe, erofs_off_t la)
+{
+       return false;
+}
+#endif
+
+static int z_erofs_do_read_page(struct z_erofs_vle_frontend *fe,
+                               struct page *page,
+                               struct list_head *page_pool)
+{
+       struct super_block *const sb = fe->inode->i_sb;
+       struct erofs_sb_info *const sbi __maybe_unused = EROFS_SB(sb);
+       struct erofs_map_blocks *const map = &fe->map;
+       struct z_erofs_vle_work_builder *const builder = &fe->builder;
+       const loff_t offset = page_offset(page);
+
+       bool tight = builder_is_hooked(builder);
+       struct z_erofs_vle_work *work = builder->work;
+
+       enum z_erofs_cache_alloctype cache_strategy;
+       enum z_erofs_page_type page_type;
+       unsigned int cur, end, spiltted, index;
+       int err = 0;
+
+       /* register locked file pages as online pages in pack */
+       z_erofs_onlinepage_init(page);
+
+       spiltted = 0;
+       end = PAGE_SIZE;
+repeat:
+       cur = end - 1;
+
+       /* lucky, within the range of the current map_blocks */
+       if (offset + cur >= map->m_la &&
+           offset + cur < map->m_la + map->m_llen) {
+               /* didn't get a valid unzip work previously (very rare) */
+               if (!builder->work)
+                       goto restart_now;
+               goto hitted;
+       }
+
+       /* go ahead the next map_blocks */
+       debugln("%s: [out-of-range] pos %llu", __func__, offset + cur);
+
+       if (z_erofs_vle_work_iter_end(builder))
+               fe->backmost = false;
+
+       map->m_la = offset + cur;
+       map->m_llen = 0;
+       err = z_erofs_map_blocks_iter(fe->inode, map, 0);
+       if (unlikely(err))
+               goto err_out;
+
+restart_now:
+       if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED)))
+               goto hitted;
+
+       DBG_BUGON(map->m_plen != 1 << sbi->clusterbits);
+       DBG_BUGON(erofs_blkoff(map->m_pa));
+
+       err = z_erofs_vle_work_iter_begin(builder, sb, map, &fe->owned_head);
+       if (unlikely(err))
+               goto err_out;
+
+       /* preload all compressed pages (maybe downgrade role if necessary) */
+       if (should_alloc_managed_pages(fe, map->m_la))
+               cache_strategy = DELAYEDALLOC;
+       else
+               cache_strategy = DONTALLOC;
+
+       preload_compressed_pages(builder, MNGD_MAPPING(sbi),
+                                map->m_pa / PAGE_SIZE,
+                                map->m_plen / PAGE_SIZE,
+                                cache_strategy, page_pool, GFP_KERNEL);
+
+       tight &= builder_is_hooked(builder);
+       work = builder->work;
+hitted:
+       cur = end - min_t(unsigned int, offset + end - map->m_la, end);
+       if (unlikely(!(map->m_flags & EROFS_MAP_MAPPED))) {
+               zero_user_segment(page, cur, end);
+               goto next_part;
+       }
+
+       /* let's derive page type */
+       page_type = cur ? Z_EROFS_VLE_PAGE_TYPE_HEAD :
+               (!spiltted ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
+                       (tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
+                               Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
+
+       if (cur)
+               tight &= builder_is_followed(builder);
+
+retry:
+       err = z_erofs_vle_work_add_page(builder, page, page_type);
+       /* should allocate an additional staging page for pagevec */
+       if (err == -EAGAIN) {
+               struct page *const newpage =
+                       __stagingpage_alloc(page_pool, GFP_NOFS);
+
+               err = z_erofs_vle_work_add_page(builder, newpage,
+                                               Z_EROFS_PAGE_TYPE_EXCLUSIVE);
+               if (likely(!err))
+                       goto retry;
+       }
+
+       if (unlikely(err))
+               goto err_out;
+
+       index = page->index - map->m_la / PAGE_SIZE;
+
+       /* FIXME! avoid the last relundant fixup & endio */
+       z_erofs_onlinepage_fixup(page, index, true);
+
+       /* bump up the number of spiltted parts of a page */
+       ++spiltted;
+       /* also update nr_pages */
+       work->nr_pages = max_t(pgoff_t, work->nr_pages, index + 1);
+next_part:
+       /* can be used for verification */
+       map->m_llen = offset + cur - map->m_la;
+
+       end = cur;
+       if (end > 0)
+               goto repeat;
+
+out:
+       /* FIXME! avoid the last relundant fixup & endio */
+       z_erofs_onlinepage_endio(page);
+
+       debugln("%s, finish page: %pK spiltted: %u map->m_llen %llu",
+               __func__, page, spiltted, map->m_llen);
+       return err;
+
+       /* if some error occurred while processing this page */
+err_out:
+       SetPageError(page);
+       goto out;
+}
+
+static void z_erofs_vle_unzip_kickoff(void *ptr, int bios)
+{
+       tagptr1_t t = tagptr_init(tagptr1_t, ptr);
+       struct z_erofs_vle_unzip_io *io = tagptr_unfold_ptr(t);
+       bool background = tagptr_unfold_tags(t);
+
+       if (!background) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&io->u.wait.lock, flags);
+               if (!atomic_add_return(bios, &io->pending_bios))
+                       wake_up_locked(&io->u.wait);
+               spin_unlock_irqrestore(&io->u.wait.lock, flags);
+               return;
+       }
+
+       if (!atomic_add_return(bios, &io->pending_bios))
+               queue_work(z_erofs_workqueue, &io->u.work);
+}
+
+static inline void z_erofs_vle_read_endio(struct bio *bio)
+{
+       struct erofs_sb_info *sbi = NULL;
+       blk_status_t err = bio->bi_status;
+       struct bio_vec *bvec;
+       struct bvec_iter_all iter_all;
+
+       bio_for_each_segment_all(bvec, bio, iter_all) {
+               struct page *page = bvec->bv_page;
+               bool cachemngd = false;
+
+               DBG_BUGON(PageUptodate(page));
+               DBG_BUGON(!page->mapping);
+
+               if (unlikely(!sbi && !z_erofs_page_is_staging(page))) {
+                       sbi = EROFS_SB(page->mapping->host->i_sb);
+
+                       if (time_to_inject(sbi, FAULT_READ_IO)) {
+                               erofs_show_injection_info(FAULT_READ_IO);
+                               err = BLK_STS_IOERR;
+                       }
+               }
+
+               /* sbi should already be gotten if the page is managed */
+               if (sbi)
+                       cachemngd = erofs_page_is_managed(sbi, page);
+
+               if (unlikely(err))
+                       SetPageError(page);
+               else if (cachemngd)
+                       SetPageUptodate(page);
+
+               if (cachemngd)
+                       unlock_page(page);
+       }
+
+       z_erofs_vle_unzip_kickoff(bio->bi_private, -1);
+       bio_put(bio);
+}
+
+static struct page *z_pagemap_global[Z_EROFS_VLE_VMAP_GLOBAL_PAGES];
+static DEFINE_MUTEX(z_pagemap_global_lock);
+
+static int z_erofs_vle_unzip(struct super_block *sb,
+                            struct z_erofs_vle_workgroup *grp,
+                            struct list_head *page_pool)
+{
+       struct erofs_sb_info *const sbi = EROFS_SB(sb);
+       const unsigned int clusterpages = erofs_clusterpages(sbi);
+
+       struct z_erofs_pagevec_ctor ctor;
+       unsigned int nr_pages;
+       unsigned int sparsemem_pages = 0;
+       struct page *pages_onstack[Z_EROFS_VLE_VMAP_ONSTACK_PAGES];
+       struct page **pages, **compressed_pages, *page;
+       unsigned int algorithm;
+       unsigned int i, outputsize;
+
+       enum z_erofs_page_type page_type;
+       bool overlapped, partial;
+       struct z_erofs_vle_work *work;
+       int err;
+
+       might_sleep();
+       work = z_erofs_vle_grab_primary_work(grp);
+       DBG_BUGON(!READ_ONCE(work->nr_pages));
+
+       mutex_lock(&work->lock);
+       nr_pages = work->nr_pages;
+
+       if (likely(nr_pages <= Z_EROFS_VLE_VMAP_ONSTACK_PAGES))
+               pages = pages_onstack;
+       else if (nr_pages <= Z_EROFS_VLE_VMAP_GLOBAL_PAGES &&
+                mutex_trylock(&z_pagemap_global_lock))
+               pages = z_pagemap_global;
+       else {
+               gfp_t gfp_flags = GFP_KERNEL;
+
+               if (nr_pages > Z_EROFS_VLE_VMAP_GLOBAL_PAGES)
+                       gfp_flags |= __GFP_NOFAIL;
+
+               pages = kvmalloc_array(nr_pages, sizeof(struct page *),
+                                      gfp_flags);
+
+               /* fallback to global pagemap for the lowmem scenario */
+               if (unlikely(!pages)) {
+                       mutex_lock(&z_pagemap_global_lock);
+                       pages = z_pagemap_global;
+               }
+       }
+
+       for (i = 0; i < nr_pages; ++i)
+               pages[i] = NULL;
+
+       z_erofs_pagevec_ctor_init(&ctor, Z_EROFS_NR_INLINE_PAGEVECS,
+                                 work->pagevec, 0);
+
+       for (i = 0; i < work->vcnt; ++i) {
+               unsigned int pagenr;
+
+               page = z_erofs_pagevec_ctor_dequeue(&ctor, &page_type);
+
+               /* all pages in pagevec ought to be valid */
+               DBG_BUGON(!page);
+               DBG_BUGON(!page->mapping);
+
+               if (z_erofs_put_stagingpage(page_pool, page))
+                       continue;
+
+               if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
+                       pagenr = 0;
+               else
+                       pagenr = z_erofs_onlinepage_index(page);
+
+               DBG_BUGON(pagenr >= nr_pages);
+               DBG_BUGON(pages[pagenr]);
+
+               pages[pagenr] = page;
+       }
+       sparsemem_pages = i;
+
+       z_erofs_pagevec_ctor_exit(&ctor, true);
+
+       overlapped = false;
+       compressed_pages = grp->compressed_pages;
+
+       err = 0;
+       for (i = 0; i < clusterpages; ++i) {
+               unsigned int pagenr;
+
+               page = compressed_pages[i];
+
+               /* all compressed pages ought to be valid */
+               DBG_BUGON(!page);
+               DBG_BUGON(!page->mapping);
+
+               if (!z_erofs_page_is_staging(page)) {
+                       if (erofs_page_is_managed(sbi, page)) {
+                               if (unlikely(!PageUptodate(page)))
+                                       err = -EIO;
+                               continue;
+                       }
+
+                       /*
+                        * only if non-head page can be selected
+                        * for inplace decompression
+                        */
+                       pagenr = z_erofs_onlinepage_index(page);
+
+                       DBG_BUGON(pagenr >= nr_pages);
+                       DBG_BUGON(pages[pagenr]);
+                       ++sparsemem_pages;
+                       pages[pagenr] = page;
+
+                       overlapped = true;
+               }
+
+               /* PG_error needs checking for inplaced and staging pages */
+               if (unlikely(PageError(page))) {
+                       DBG_BUGON(PageUptodate(page));
+                       err = -EIO;
+               }
+       }
+
+       if (unlikely(err))
+               goto out;
+
+       if (nr_pages << PAGE_SHIFT >= work->pageofs + grp->llen) {
+               outputsize = grp->llen;
+               partial = !(grp->flags & Z_EROFS_VLE_WORKGRP_FULL_LENGTH);
+       } else {
+               outputsize = (nr_pages << PAGE_SHIFT) - work->pageofs;
+               partial = true;
+       }
+
+       if (z_erofs_vle_workgrp_fmt(grp) == Z_EROFS_VLE_WORKGRP_FMT_PLAIN)
+               algorithm = Z_EROFS_COMPRESSION_SHIFTED;
+       else
+               algorithm = Z_EROFS_COMPRESSION_LZ4;
+
+       err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
+                                       .sb = sb,
+                                       .in = compressed_pages,
+                                       .out = pages,
+                                       .pageofs_out = work->pageofs,
+                                       .inputsize = PAGE_SIZE,
+                                       .outputsize = outputsize,
+                                       .alg = algorithm,
+                                       .inplace_io = overlapped,
+                                       .partial_decoding = partial
+                                }, page_pool);
+
+out:
+       /* must handle all compressed pages before endding pages */
+       for (i = 0; i < clusterpages; ++i) {
+               page = compressed_pages[i];
+
+               if (erofs_page_is_managed(sbi, page))
+                       continue;
+
+               /* recycle all individual staging pages */
+               (void)z_erofs_put_stagingpage(page_pool, page);
+
+               WRITE_ONCE(compressed_pages[i], NULL);
+       }
+
+       for (i = 0; i < nr_pages; ++i) {
+               page = pages[i];
+               if (!page)
+                       continue;
+
+               DBG_BUGON(!page->mapping);
+
+               /* recycle all individual staging pages */
+               if (z_erofs_put_stagingpage(page_pool, page))
+                       continue;
+
+               if (unlikely(err < 0))
+                       SetPageError(page);
+
+               z_erofs_onlinepage_endio(page);
+       }
+
+       if (pages == z_pagemap_global)
+               mutex_unlock(&z_pagemap_global_lock);
+       else if (unlikely(pages != pages_onstack))
+               kvfree(pages);
+
+       work->nr_pages = 0;
+       work->vcnt = 0;
+
+       /* all work locks MUST be taken before the following line */
+
+       WRITE_ONCE(grp->next, Z_EROFS_VLE_WORKGRP_NIL);
+
+       /* all work locks SHOULD be released right now */
+       mutex_unlock(&work->lock);
+
+       z_erofs_vle_work_release(work);
+       return err;
+}
+
+static void z_erofs_vle_unzip_all(struct super_block *sb,
+                                 struct z_erofs_vle_unzip_io *io,
+                                 struct list_head *page_pool)
+{
+       z_erofs_vle_owned_workgrp_t owned = io->head;
+
+       while (owned != Z_EROFS_VLE_WORKGRP_TAIL_CLOSED) {
+               struct z_erofs_vle_workgroup *grp;
+
+               /* no possible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */
+               DBG_BUGON(owned == Z_EROFS_VLE_WORKGRP_TAIL);
+
+               /* no possible that 'owned' equals NULL */
+               DBG_BUGON(owned == Z_EROFS_VLE_WORKGRP_NIL);
+
+               grp = container_of(owned, struct z_erofs_vle_workgroup, next);
+               owned = READ_ONCE(grp->next);
+
+               z_erofs_vle_unzip(sb, grp, page_pool);
+       }
+}
+
+static void z_erofs_vle_unzip_wq(struct work_struct *work)
+{
+       struct z_erofs_vle_unzip_io_sb *iosb = container_of(work,
+               struct z_erofs_vle_unzip_io_sb, io.u.work);
+       LIST_HEAD(page_pool);
+
+       DBG_BUGON(iosb->io.head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
+       z_erofs_vle_unzip_all(iosb->sb, &iosb->io, &page_pool);
+
+       put_pages_list(&page_pool);
+       kvfree(iosb);
+}
+
+static struct page *
+pickup_page_for_submission(struct z_erofs_vle_workgroup *grp,
+                          unsigned int nr,
+                          struct list_head *pagepool,
+                          struct address_space *mc,
+                          gfp_t gfp)
+{
+       /* determined at compile time to avoid too many #ifdefs */
+       const bool nocache = __builtin_constant_p(mc) ? !mc : false;
+       const pgoff_t index = grp->obj.index;
+       bool tocache = false;
+
+       struct address_space *mapping;
+       struct page *oldpage, *page;
+
+       compressed_page_t t;
+       int justfound;
+
+repeat:
+       page = READ_ONCE(grp->compressed_pages[nr]);
+       oldpage = page;
+
+       if (!page)
+               goto out_allocpage;
+
+       /*
+        * the cached page has not been allocated and
+        * an placeholder is out there, prepare it now.
+        */
+       if (!nocache && page == PAGE_UNALLOCATED) {
+               tocache = true;
+               goto out_allocpage;
+       }
+
+       /* process the target tagged pointer */
+       t = tagptr_init(compressed_page_t, page);
+       justfound = tagptr_unfold_tags(t);
+       page = tagptr_unfold_ptr(t);
+
+       mapping = READ_ONCE(page->mapping);
+
+       /*
+        * if managed cache is disabled, it's no way to
+        * get such a cached-like page.
+        */
+       if (nocache) {
+               /* if managed cache is disabled, it is impossible `justfound' */
+               DBG_BUGON(justfound);
+
+               /* and it should be locked, not uptodate, and not truncated */
+               DBG_BUGON(!PageLocked(page));
+               DBG_BUGON(PageUptodate(page));
+               DBG_BUGON(!mapping);
+               goto out;
+       }
+
+       /*
+        * unmanaged (file) pages are all locked solidly,
+        * therefore it is impossible for `mapping' to be NULL.
+        */
+       if (mapping && mapping != mc)
+               /* ought to be unmanaged pages */
+               goto out;
+
+       lock_page(page);
+
+       /* only true if page reclaim goes wrong, should never happen */
+       DBG_BUGON(justfound && PagePrivate(page));
+
+       /* the page is still in manage cache */
+       if (page->mapping == mc) {
+               WRITE_ONCE(grp->compressed_pages[nr], page);
+
+               ClearPageError(page);
+               if (!PagePrivate(page)) {
+                       /*
+                        * impossible to be !PagePrivate(page) for
+                        * the current restriction as well if
+                        * the page is already in compressed_pages[].
+                        */
+                       DBG_BUGON(!justfound);
+
+                       justfound = 0;
+                       set_page_private(page, (unsigned long)grp);
+                       SetPagePrivate(page);
+               }
+
+               /* no need to submit io if it is already up-to-date */
+               if (PageUptodate(page)) {
+                       unlock_page(page);
+                       page = NULL;
+               }
+               goto out;
+       }
+
+       /*
+        * the managed page has been truncated, it's unsafe to
+        * reuse this one, let's allocate a new cache-managed page.
+        */
+       DBG_BUGON(page->mapping);
+       DBG_BUGON(!justfound);
+
+       tocache = true;
+       unlock_page(page);
+       put_page(page);
+out_allocpage:
+       page = __stagingpage_alloc(pagepool, gfp);
+       if (oldpage != cmpxchg(&grp->compressed_pages[nr], oldpage, page)) {
+               list_add(&page->lru, pagepool);
+               cpu_relax();
+               goto repeat;
+       }
+       if (nocache || !tocache)
+               goto out;
+       if (add_to_page_cache_lru(page, mc, index + nr, gfp)) {
+               page->mapping = Z_EROFS_MAPPING_STAGING;
+               goto out;
+       }
+
+       set_page_private(page, (unsigned long)grp);
+       SetPagePrivate(page);
+out:   /* the only exit (for tracing and debugging) */
+       return page;
+}
+
+static struct z_erofs_vle_unzip_io *
+jobqueue_init(struct super_block *sb,
+             struct z_erofs_vle_unzip_io *io,
+             bool foreground)
+{
+       struct z_erofs_vle_unzip_io_sb *iosb;
+
+       if (foreground) {
+               /* waitqueue available for foreground io */
+               DBG_BUGON(!io);
+
+               init_waitqueue_head(&io->u.wait);
+               atomic_set(&io->pending_bios, 0);
+               goto out;
+       }
+
+       iosb = kvzalloc(sizeof(*iosb), GFP_KERNEL | __GFP_NOFAIL);
+       DBG_BUGON(!iosb);
+
+       /* initialize fields in the allocated descriptor */
+       io = &iosb->io;
+       iosb->sb = sb;
+       INIT_WORK(&io->u.work, z_erofs_vle_unzip_wq);
+out:
+       io->head = Z_EROFS_VLE_WORKGRP_TAIL_CLOSED;
+       return io;
+}
+
+/* define workgroup jobqueue types */
+enum {
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+       JQ_BYPASS,
+#endif
+       JQ_SUBMIT,
+       NR_JOBQUEUES,
+};
+
+static void *jobqueueset_init(struct super_block *sb,
+                             z_erofs_vle_owned_workgrp_t qtail[],
+                             struct z_erofs_vle_unzip_io *q[],
+                             struct z_erofs_vle_unzip_io *fgq,
+                             bool forcefg)
+{
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+       /*
+        * if managed cache is enabled, bypass jobqueue is needed,
+        * no need to read from device for all workgroups in this queue.
+        */
+       q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, true);
+       qtail[JQ_BYPASS] = &q[JQ_BYPASS]->head;
+#endif
+
+       q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, forcefg);
+       qtail[JQ_SUBMIT] = &q[JQ_SUBMIT]->head;
+
+       return tagptr_cast_ptr(tagptr_fold(tagptr1_t, q[JQ_SUBMIT], !forcefg));
+}
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+static void move_to_bypass_jobqueue(struct z_erofs_vle_workgroup *grp,
+                                   z_erofs_vle_owned_workgrp_t qtail[],
+                                   z_erofs_vle_owned_workgrp_t owned_head)
+{
+       z_erofs_vle_owned_workgrp_t *const submit_qtail = qtail[JQ_SUBMIT];
+       z_erofs_vle_owned_workgrp_t *const bypass_qtail = qtail[JQ_BYPASS];
+
+       DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
+       if (owned_head == Z_EROFS_VLE_WORKGRP_TAIL)
+               owned_head = Z_EROFS_VLE_WORKGRP_TAIL_CLOSED;
+
+       WRITE_ONCE(grp->next, Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
+
+       WRITE_ONCE(*submit_qtail, owned_head);
+       WRITE_ONCE(*bypass_qtail, &grp->next);
+
+       qtail[JQ_BYPASS] = &grp->next;
+}
+
+static bool postsubmit_is_all_bypassed(struct z_erofs_vle_unzip_io *q[],
+                                      unsigned int nr_bios,
+                                      bool force_fg)
+{
+       /*
+        * although background is preferred, no one is pending for submission.
+        * don't issue workqueue for decompression but drop it directly instead.
+        */
+       if (force_fg || nr_bios)
+               return false;
+
+       kvfree(container_of(q[JQ_SUBMIT],
+                           struct z_erofs_vle_unzip_io_sb,
+                           io));
+       return true;
+}
+#else
+static void move_to_bypass_jobqueue(struct z_erofs_vle_workgroup *grp,
+                                   z_erofs_vle_owned_workgrp_t qtail[],
+                                   z_erofs_vle_owned_workgrp_t owned_head)
+{
+       /* impossible to bypass submission for managed cache disabled */
+       DBG_BUGON(1);
+}
+
+static bool postsubmit_is_all_bypassed(struct z_erofs_vle_unzip_io *q[],
+                                      unsigned int nr_bios,
+                                      bool force_fg)
+{
+       /* bios should be >0 if managed cache is disabled */
+       DBG_BUGON(!nr_bios);
+       return false;
+}
+#endif
+
+static bool z_erofs_vle_submit_all(struct super_block *sb,
+                                  z_erofs_vle_owned_workgrp_t owned_head,
+                                  struct list_head *pagepool,
+                                  struct z_erofs_vle_unzip_io *fgq,
+                                  bool force_fg)
+{
+       struct erofs_sb_info *const sbi = EROFS_SB(sb);
+       const unsigned int clusterpages = erofs_clusterpages(sbi);
+       const gfp_t gfp = GFP_NOFS;
+
+       z_erofs_vle_owned_workgrp_t qtail[NR_JOBQUEUES];
+       struct z_erofs_vle_unzip_io *q[NR_JOBQUEUES];
+       struct bio *bio;
+       void *bi_private;
+       /* since bio will be NULL, no need to initialize last_index */
+       pgoff_t uninitialized_var(last_index);
+       bool force_submit = false;
+       unsigned int nr_bios;
+
+       if (unlikely(owned_head == Z_EROFS_VLE_WORKGRP_TAIL))
+               return false;
+
+       force_submit = false;
+       bio = NULL;
+       nr_bios = 0;
+       bi_private = jobqueueset_init(sb, qtail, q, fgq, force_fg);
+
+       /* by default, all need io submission */
+       q[JQ_SUBMIT]->head = owned_head;
+
+       do {
+               struct z_erofs_vle_workgroup *grp;
+               pgoff_t first_index;
+               struct page *page;
+               unsigned int i = 0, bypass = 0;
+               int err;
+
+               /* no possible 'owned_head' equals the following */
+               DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
+               DBG_BUGON(owned_head == Z_EROFS_VLE_WORKGRP_NIL);
+
+               grp = container_of(owned_head,
+                                  struct z_erofs_vle_workgroup, next);
+
+               /* close the main owned chain at first */
+               owned_head = cmpxchg(&grp->next, Z_EROFS_VLE_WORKGRP_TAIL,
+                                    Z_EROFS_VLE_WORKGRP_TAIL_CLOSED);
+
+               first_index = grp->obj.index;
+               force_submit |= (first_index != last_index + 1);
+
+repeat:
+               page = pickup_page_for_submission(grp, i, pagepool,
+                                                 MNGD_MAPPING(sbi), gfp);
+               if (!page) {
+                       force_submit = true;
+                       ++bypass;
+                       goto skippage;
+               }
+
+               if (bio && force_submit) {
+submit_bio_retry:
+                       __submit_bio(bio, REQ_OP_READ, 0);
+                       bio = NULL;
+               }
+
+               if (!bio) {
+                       bio = erofs_grab_bio(sb, first_index + i,
+                                            BIO_MAX_PAGES, bi_private,
+                                            z_erofs_vle_read_endio, true);
+                       ++nr_bios;
+               }
+
+               err = bio_add_page(bio, page, PAGE_SIZE, 0);
+               if (err < PAGE_SIZE)
+                       goto submit_bio_retry;
+
+               force_submit = false;
+               last_index = first_index + i;
+skippage:
+               if (++i < clusterpages)
+                       goto repeat;
+
+               if (bypass < clusterpages)
+                       qtail[JQ_SUBMIT] = &grp->next;
+               else
+                       move_to_bypass_jobqueue(grp, qtail, owned_head);
+       } while (owned_head != Z_EROFS_VLE_WORKGRP_TAIL);
+
+       if (bio)
+               __submit_bio(bio, REQ_OP_READ, 0);
+
+       if (postsubmit_is_all_bypassed(q, nr_bios, force_fg))
+               return true;
+
+       z_erofs_vle_unzip_kickoff(bi_private, nr_bios);
+       return true;
+}
+
+static void z_erofs_submit_and_unzip(struct z_erofs_vle_frontend *f,
+                                    struct list_head *pagepool,
+                                    bool force_fg)
+{
+       struct super_block *sb = f->inode->i_sb;
+       struct z_erofs_vle_unzip_io io[NR_JOBQUEUES];
+
+       if (!z_erofs_vle_submit_all(sb, f->owned_head, pagepool, io, force_fg))
+               return;
+
+#ifdef EROFS_FS_HAS_MANAGED_CACHE
+       z_erofs_vle_unzip_all(sb, &io[JQ_BYPASS], pagepool);
+#endif
+       if (!force_fg)
+               return;
+
+       /* wait until all bios are completed */
+       wait_event(io[JQ_SUBMIT].u.wait,
+                  !atomic_read(&io[JQ_SUBMIT].pending_bios));
+
+       /* let's synchronous decompression */
+       z_erofs_vle_unzip_all(sb, &io[JQ_SUBMIT], pagepool);
+}
+
+static int z_erofs_vle_normalaccess_readpage(struct file *file,
+                                            struct page *page)
+{
+       struct inode *const inode = page->mapping->host;
+       struct z_erofs_vle_frontend f = VLE_FRONTEND_INIT(inode);
+       int err;
+       LIST_HEAD(pagepool);
+
+       trace_erofs_readpage(page, false);
+
+       f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
+
+       err = z_erofs_do_read_page(&f, page, &pagepool);
+       (void)z_erofs_vle_work_iter_end(&f.builder);
+
+       if (err) {
+               errln("%s, failed to read, err [%d]", __func__, err);
+               goto out;
+       }
+
+       z_erofs_submit_and_unzip(&f, &pagepool, true);
+out:
+       if (f.map.mpage)
+               put_page(f.map.mpage);
+
+       /* clean up the remaining free pages */
+       put_pages_list(&pagepool);
+       return 0;
+}
+
+static int z_erofs_vle_normalaccess_readpages(struct file *filp,
+                                             struct address_space *mapping,
+                                             struct list_head *pages,
+                                             unsigned int nr_pages)
+{
+       struct inode *const inode = mapping->host;
+       struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
+
+       bool sync = __should_decompress_synchronously(sbi, nr_pages);
+       struct z_erofs_vle_frontend f = VLE_FRONTEND_INIT(inode);
+       gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
+       struct page *head = NULL;
+       LIST_HEAD(pagepool);
+
+       trace_erofs_readpages(mapping->host, lru_to_page(pages),
+                             nr_pages, false);
+
+       f.headoffset = (erofs_off_t)lru_to_page(pages)->index << PAGE_SHIFT;
+
+       for (; nr_pages; --nr_pages) {
+               struct page *page = lru_to_page(pages);
+
+               prefetchw(&page->flags);
+               list_del(&page->lru);
+
+               /*
+                * A pure asynchronous readahead is indicated if
+                * a PG_readahead marked page is hitted at first.
+                * Let's also do asynchronous decompression for this case.
+                */
+               sync &= !(PageReadahead(page) && !head);
+
+               if (add_to_page_cache_lru(page, mapping, page->index, gfp)) {
+                       list_add(&page->lru, &pagepool);
+                       continue;
+               }
+
+               set_page_private(page, (unsigned long)head);
+               head = page;
+       }
+
+       while (head) {
+               struct page *page = head;
+               int err;
+
+               /* traversal in reverse order */
+               head = (void *)page_private(page);
+
+               err = z_erofs_do_read_page(&f, page, &pagepool);
+               if (err) {
+                       struct erofs_vnode *vi = EROFS_V(inode);
+
+                       errln("%s, readahead error at page %lu of nid %llu",
+                             __func__, page->index, vi->nid);
+               }
+
+               put_page(page);
+       }
+
+       (void)z_erofs_vle_work_iter_end(&f.builder);
+
+       z_erofs_submit_and_unzip(&f, &pagepool, sync);
+
+       if (f.map.mpage)
+               put_page(f.map.mpage);
+
+       /* clean up the remaining free pages */
+       put_pages_list(&pagepool);
+       return 0;
+}
+
+const struct address_space_operations z_erofs_vle_normalaccess_aops = {
+       .readpage = z_erofs_vle_normalaccess_readpage,
+       .readpages = z_erofs_vle_normalaccess_readpages,
+};
+
diff --git a/drivers/staging/erofs/zdata.h b/drivers/staging/erofs/zdata.h
new file mode 100644 (file)
index 0000000..8d0119d
--- /dev/null
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/drivers/staging/erofs/zdata.h
+ *
+ * Copyright (C) 2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#ifndef __EROFS_FS_ZDATA_H
+#define __EROFS_FS_ZDATA_H
+
+#include "internal.h"
+#include "zpvec.h"
+
+#define Z_EROFS_NR_INLINE_PAGEVECS      3
+
+/*
+ * Structure fields follow one of the following exclusion rules.
+ *
+ * I: Modifiable by initialization/destruction paths and read-only
+ *    for everyone else.
+ *
+ */
+
+struct z_erofs_vle_work {
+       struct mutex lock;
+
+       /* I: decompression offset in page */
+       unsigned short pageofs;
+       unsigned short nr_pages;
+
+       /* L: queued pages in pagevec[] */
+       unsigned int vcnt;
+
+       union {
+               /* L: pagevec */
+               erofs_vtptr_t pagevec[Z_EROFS_NR_INLINE_PAGEVECS];
+               struct rcu_head rcu;
+       };
+};
+
+#define Z_EROFS_VLE_WORKGRP_FMT_PLAIN        0
+#define Z_EROFS_VLE_WORKGRP_FMT_LZ4          1
+#define Z_EROFS_VLE_WORKGRP_FMT_MASK         1
+#define Z_EROFS_VLE_WORKGRP_FULL_LENGTH      2
+
+typedef void *z_erofs_vle_owned_workgrp_t;
+
+struct z_erofs_vle_workgroup {
+       struct erofs_workgroup obj;
+       struct z_erofs_vle_work work;
+
+       /* point to next owned_workgrp_t */
+       z_erofs_vle_owned_workgrp_t next;
+
+       /* compressed pages (including multi-usage pages) */
+       struct page *compressed_pages[Z_EROFS_CLUSTER_MAX_PAGES];
+       unsigned int llen, flags;
+};
+
+/* let's avoid the valid 32-bit kernel addresses */
+
+/* the chained workgroup has't submitted io (still open) */
+#define Z_EROFS_VLE_WORKGRP_TAIL        ((void *)0x5F0ECAFE)
+/* the chained workgroup has already submitted io */
+#define Z_EROFS_VLE_WORKGRP_TAIL_CLOSED ((void *)0x5F0EDEAD)
+
+#define Z_EROFS_VLE_WORKGRP_NIL         (NULL)
+
+#define z_erofs_vle_workgrp_fmt(grp)   \
+       ((grp)->flags & Z_EROFS_VLE_WORKGRP_FMT_MASK)
+
+static inline void z_erofs_vle_set_workgrp_fmt(
+       struct z_erofs_vle_workgroup *grp,
+       unsigned int fmt)
+{
+       grp->flags = fmt | (grp->flags & ~Z_EROFS_VLE_WORKGRP_FMT_MASK);
+}
+
+
+/* definitions if multiref is disabled */
+#define z_erofs_vle_grab_primary_work(grp)     (&(grp)->work)
+#define z_erofs_vle_grab_work(grp, pageofs)    (&(grp)->work)
+#define z_erofs_vle_work_workgroup(wrk, primary)       \
+       ((primary) ? container_of(wrk,  \
+               struct z_erofs_vle_workgroup, work) : \
+               ({ BUG(); (void *)NULL; }))
+
+
+#define Z_EROFS_WORKGROUP_SIZE       sizeof(struct z_erofs_vle_workgroup)
+
+struct z_erofs_vle_unzip_io {
+       atomic_t pending_bios;
+       z_erofs_vle_owned_workgrp_t head;
+
+       union {
+               wait_queue_head_t wait;
+               struct work_struct work;
+       } u;
+};
+
+struct z_erofs_vle_unzip_io_sb {
+       struct z_erofs_vle_unzip_io io;
+       struct super_block *sb;
+};
+
+#define Z_EROFS_ONLINEPAGE_COUNT_BITS 2
+#define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1)
+#define Z_EROFS_ONLINEPAGE_INDEX_SHIFT  (Z_EROFS_ONLINEPAGE_COUNT_BITS)
+
+/*
+ * waiters (aka. ongoing_packs): # to unlock the page
+ * sub-index: 0 - for partial page, >= 1 full page sub-index
+ */
+typedef atomic_t z_erofs_onlinepage_t;
+
+/* type punning */
+union z_erofs_onlinepage_converter {
+       z_erofs_onlinepage_t *o;
+       unsigned long *v;
+};
+
+static inline unsigned int z_erofs_onlinepage_index(struct page *page)
+{
+       union z_erofs_onlinepage_converter u;
+
+       DBG_BUGON(!PagePrivate(page));
+       u.v = &page_private(page);
+
+       return atomic_read(u.o) >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
+}
+
+static inline void z_erofs_onlinepage_init(struct page *page)
+{
+       union {
+               z_erofs_onlinepage_t o;
+               unsigned long v;
+       /* keep from being unlocked in advance */
+       } u = { .o = ATOMIC_INIT(1) };
+
+       set_page_private(page, u.v);
+       smp_wmb();
+       SetPagePrivate(page);
+}
+
+static inline void z_erofs_onlinepage_fixup(struct page *page,
+       uintptr_t index, bool down)
+{
+       unsigned long *p, o, v, id;
+repeat:
+       p = &page_private(page);
+       o = READ_ONCE(*p);
+
+       id = o >> Z_EROFS_ONLINEPAGE_INDEX_SHIFT;
+       if (id) {
+               if (!index)
+                       return;
+
+               DBG_BUGON(id != index);
+       }
+
+       v = (index << Z_EROFS_ONLINEPAGE_INDEX_SHIFT) |
+               ((o & Z_EROFS_ONLINEPAGE_COUNT_MASK) + (unsigned int)down);
+       if (cmpxchg(p, o, v) != o)
+               goto repeat;
+}
+
+static inline void z_erofs_onlinepage_endio(struct page *page)
+{
+       union z_erofs_onlinepage_converter u;
+       unsigned int v;
+
+       DBG_BUGON(!PagePrivate(page));
+       u.v = &page_private(page);
+
+       v = atomic_dec_return(u.o);
+       if (!(v & Z_EROFS_ONLINEPAGE_COUNT_MASK)) {
+               ClearPagePrivate(page);
+               if (!PageError(page))
+                       SetPageUptodate(page);
+               unlock_page(page);
+       }
+
+       debugln("%s, page %p value %x", __func__, page, atomic_read(u.o));
+}
+
+#define Z_EROFS_VLE_VMAP_ONSTACK_PAGES \
+       min_t(unsigned int, THREAD_SIZE / 8 / sizeof(struct page *), 96U)
+#define Z_EROFS_VLE_VMAP_GLOBAL_PAGES  2048
+
+#endif
+
diff --git a/drivers/staging/erofs/zpvec.h b/drivers/staging/erofs/zpvec.h
new file mode 100644 (file)
index 0000000..77bf687
--- /dev/null
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * linux/drivers/staging/erofs/zpvec.h
+ *
+ * Copyright (C) 2018 HUAWEI, Inc.
+ *             http://www.huawei.com/
+ * Created by Gao Xiang <gaoxiang25@huawei.com>
+ */
+#ifndef __EROFS_FS_ZPVEC_H
+#define __EROFS_FS_ZPVEC_H
+
+#include "tagptr.h"
+
+/* page type in pagevec for unzip subsystem */
+enum z_erofs_page_type {
+       /* including Z_EROFS_VLE_PAGE_TAIL_EXCLUSIVE */
+       Z_EROFS_PAGE_TYPE_EXCLUSIVE,
+
+       Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED,
+
+       Z_EROFS_VLE_PAGE_TYPE_HEAD,
+       Z_EROFS_VLE_PAGE_TYPE_MAX
+};
+
+extern void __compiletime_error("Z_EROFS_PAGE_TYPE_EXCLUSIVE != 0")
+       __bad_page_type_exclusive(void);
+
+/* pagevec tagged pointer */
+typedef tagptr2_t      erofs_vtptr_t;
+
+/* pagevec collector */
+struct z_erofs_pagevec_ctor {
+       struct page *curr, *next;
+       erofs_vtptr_t *pages;
+
+       unsigned int nr, index;
+};
+
+static inline void z_erofs_pagevec_ctor_exit(struct z_erofs_pagevec_ctor *ctor,
+                                            bool atomic)
+{
+       if (!ctor->curr)
+               return;
+
+       if (atomic)
+               kunmap_atomic(ctor->pages);
+       else
+               kunmap(ctor->curr);
+}
+
+static inline struct page *
+z_erofs_pagevec_ctor_next_page(struct z_erofs_pagevec_ctor *ctor,
+                              unsigned int nr)
+{
+       unsigned int index;
+
+       /* keep away from occupied pages */
+       if (ctor->next)
+               return ctor->next;
+
+       for (index = 0; index < nr; ++index) {
+               const erofs_vtptr_t t = ctor->pages[index];
+               const unsigned int tags = tagptr_unfold_tags(t);
+
+               if (tags == Z_EROFS_PAGE_TYPE_EXCLUSIVE)
+                       return tagptr_unfold_ptr(t);
+       }
+       DBG_BUGON(nr >= ctor->nr);
+       return NULL;
+}
+
+static inline void
+z_erofs_pagevec_ctor_pagedown(struct z_erofs_pagevec_ctor *ctor,
+                             bool atomic)
+{
+       struct page *next = z_erofs_pagevec_ctor_next_page(ctor, ctor->nr);
+
+       z_erofs_pagevec_ctor_exit(ctor, atomic);
+
+       ctor->curr = next;
+       ctor->next = NULL;
+       ctor->pages = atomic ?
+               kmap_atomic(ctor->curr) : kmap(ctor->curr);
+
+       ctor->nr = PAGE_SIZE / sizeof(struct page *);
+       ctor->index = 0;
+}
+
+static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor,
+                                            unsigned int nr,
+                                            erofs_vtptr_t *pages,
+                                            unsigned int i)
+{
+       ctor->nr = nr;
+       ctor->curr = ctor->next = NULL;
+       ctor->pages = pages;
+
+       if (i >= nr) {
+               i -= nr;
+               z_erofs_pagevec_ctor_pagedown(ctor, false);
+               while (i > ctor->nr) {
+                       i -= ctor->nr;
+                       z_erofs_pagevec_ctor_pagedown(ctor, false);
+               }
+       }
+
+       ctor->next = z_erofs_pagevec_ctor_next_page(ctor, i);
+       ctor->index = i;
+}
+
+static inline bool
+z_erofs_pagevec_ctor_enqueue(struct z_erofs_pagevec_ctor *ctor,
+                            struct page *page,
+                            enum z_erofs_page_type type,
+                            bool *occupied)
+{
+       *occupied = false;
+       if (unlikely(!ctor->next && type))
+               if (ctor->index + 1 == ctor->nr)
+                       return false;
+
+       if (unlikely(ctor->index >= ctor->nr))
+               z_erofs_pagevec_ctor_pagedown(ctor, false);
+
+       /* exclusive page type must be 0 */
+       if (Z_EROFS_PAGE_TYPE_EXCLUSIVE != (uintptr_t)NULL)
+               __bad_page_type_exclusive();
+
+       /* should remind that collector->next never equal to 1, 2 */
+       if (type == (uintptr_t)ctor->next) {
+               ctor->next = page;
+               *occupied = true;
+       }
+
+       ctor->pages[ctor->index++] =
+               tagptr_fold(erofs_vtptr_t, page, type);
+       return true;
+}
+
+static inline struct page *
+z_erofs_pagevec_ctor_dequeue(struct z_erofs_pagevec_ctor *ctor,
+                            enum z_erofs_page_type *type)
+{
+       erofs_vtptr_t t;
+
+       if (unlikely(ctor->index >= ctor->nr)) {
+               DBG_BUGON(!ctor->next);
+               z_erofs_pagevec_ctor_pagedown(ctor, true);
+       }
+
+       t = ctor->pages[ctor->index];
+
+       *type = tagptr_unfold_tags(t);
+
+       /* should remind that collector->next never equal to 1, 2 */
+       if (*type == (uintptr_t)ctor->next)
+               ctor->next = tagptr_unfold_ptr(t);
+
+       ctor->pages[ctor->index++] =
+               tagptr_fold(erofs_vtptr_t, NULL, 0);
+
+       return tagptr_unfold_ptr(t);
+}
+
+#endif
+