Merge branch 'for-6.0/dax' into libnvdimm-fixes

[people/ms/linux.git] / fs / erofs / zdata.c
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c

index 724bb57075f61e3f7a7524dee80a4e8a61f083e2..5792ca9e0d5efaf59a4b9abb65a1e1dd1c503078 100644 (file)
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -2,6 +2,7 @@
  /*
   * Copyright (C) 2018 HUAWEI, Inc.
   *             https://www.huawei.com/
+ * Copyright (C) 2022 Alibaba Cloud
   */
  #include "zdata.h"
  #include "compress.h"
@@ -26,6 +27,82 @@ static struct z_erofs_pcluster_slab pcluster_pool[] __read_mostly = {
         _PCLP(Z_EROFS_PCLUSTER_MAX_PAGES)
  };
  
+struct z_erofs_bvec_iter {
+       struct page *bvpage;
+       struct z_erofs_bvset *bvset;
+       unsigned int nr, cur;
+};
+
+static struct page *z_erofs_bvec_iter_end(struct z_erofs_bvec_iter *iter)
+{
+       if (iter->bvpage)
+               kunmap_local(iter->bvset);
+       return iter->bvpage;
+}
+
+static struct page *z_erofs_bvset_flip(struct z_erofs_bvec_iter *iter)
+{
+       unsigned long base = (unsigned long)((struct z_erofs_bvset *)0)->bvec;
+       /* have to access nextpage in advance, otherwise it will be unmapped */
+       struct page *nextpage = iter->bvset->nextpage;
+       struct page *oldpage;
+
+       DBG_BUGON(!nextpage);
+       oldpage = z_erofs_bvec_iter_end(iter);
+       iter->bvpage = nextpage;
+       iter->bvset = kmap_local_page(nextpage);
+       iter->nr = (PAGE_SIZE - base) / sizeof(struct z_erofs_bvec);
+       iter->cur = 0;
+       return oldpage;
+}
+
+static void z_erofs_bvec_iter_begin(struct z_erofs_bvec_iter *iter,
+                                   struct z_erofs_bvset_inline *bvset,
+                                   unsigned int bootstrap_nr,
+                                   unsigned int cur)
+{
+       *iter = (struct z_erofs_bvec_iter) {
+               .nr = bootstrap_nr,
+               .bvset = (struct z_erofs_bvset *)bvset,
+       };
+
+       while (cur > iter->nr) {
+               cur -= iter->nr;
+               z_erofs_bvset_flip(iter);
+       }
+       iter->cur = cur;
+}
+
+static int z_erofs_bvec_enqueue(struct z_erofs_bvec_iter *iter,
+                               struct z_erofs_bvec *bvec,
+                               struct page **candidate_bvpage)
+{
+       if (iter->cur == iter->nr) {
+               if (!*candidate_bvpage)
+                       return -EAGAIN;
+
+               DBG_BUGON(iter->bvset->nextpage);
+               iter->bvset->nextpage = *candidate_bvpage;
+               z_erofs_bvset_flip(iter);
+
+               iter->bvset->nextpage = NULL;
+               *candidate_bvpage = NULL;
+       }
+       iter->bvset->bvec[iter->cur++] = *bvec;
+       return 0;
+}
+
+static void z_erofs_bvec_dequeue(struct z_erofs_bvec_iter *iter,
+                                struct z_erofs_bvec *bvec,
+                                struct page **old_bvpage)
+{
+       if (iter->cur == iter->nr)
+               *old_bvpage = z_erofs_bvset_flip(iter);
+       else
+               *old_bvpage = NULL;
+       *bvec = iter->bvset->bvec[iter->cur++];
+}
+
  static void z_erofs_destroy_pcluster_pool(void)
  {
         int i;
@@ -46,7 +123,7 @@ static int z_erofs_create_pcluster_pool(void)
  
         for (pcs = pcluster_pool;
              pcs < pcluster_pool + ARRAY_SIZE(pcluster_pool); ++pcs) {
-               size = struct_size(a, compressed_pages, pcs->maxpages);
+               size = struct_size(a, compressed_bvecs, pcs->maxpages);
  
                 sprintf(pcs->name, "erofs_pcluster-%u", pcs->maxpages);
                 pcs->slab = kmem_cache_create(pcs->name, size, 0,
@@ -150,30 +227,29 @@ int __init z_erofs_init_zip_subsystem(void)
         return err;
  }
  
-enum z_erofs_collectmode {
-       COLLECT_SECONDARY,
-       COLLECT_PRIMARY,
+enum z_erofs_pclustermode {
+       Z_EROFS_PCLUSTER_INFLIGHT,
         /*
-        * The current collection was the tail of an exist chain, in addition
-        * that the previous processed chained collections are all decided to
+        * The current pclusters was the tail of an exist chain, in addition
+        * that the previous processed chained pclusters are all decided to
          * be hooked up to it.
-        * A new chain will be created for the remaining collections which are
-        * not processed yet, therefore different from COLLECT_PRIMARY_FOLLOWED,
-        * the next collection cannot reuse the whole page safely in
-        * the following scenario:
+        * A new chain will be created for the remaining pclusters which are
+        * not processed yet, so different from Z_EROFS_PCLUSTER_FOLLOWED,
+        * the next pcluster cannot reuse the whole page safely for inplace I/O
+        * in the following scenario:
          *  ________________________________________________________________
          * |      tail (partial) page     |       head (partial) page       |
-        * |   (belongs to the next cl)   |   (belongs to the current cl)   |
-        * |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________|
+        * |   (belongs to the next pcl)  |   (belongs to the current pcl)  |
+        * |_______PCLUSTER_FOLLOWED______|________PCLUSTER_HOOKED__________|
          */
-       COLLECT_PRIMARY_HOOKED,
+       Z_EROFS_PCLUSTER_HOOKED,
         /*
-        * a weak form of COLLECT_PRIMARY_FOLLOWED, the difference is that it
+        * a weak form of Z_EROFS_PCLUSTER_FOLLOWED, the difference is that it
          * could be dispatched into bypass queue later due to uptodated managed
          * pages. All related online pages cannot be reused for inplace I/O (or
-        * pagevec) since it can be directly decoded without I/O submission.
+        * bvpage) since it can be directly decoded without I/O submission.
          */
-       COLLECT_PRIMARY_FOLLOWED_NOINPLACE,
+       Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE,
         /*
          * The current collection has been linked with the owned chain, and
          * could also be linked with the remaining collections, which means
@@ -184,39 +260,36 @@ enum z_erofs_collectmode {
          *  ________________________________________________________________
          * |  tail (partial) page |          head (partial) page           |
          * |  (of the current cl) |      (of the previous collection)      |
-        * |  PRIMARY_FOLLOWED or |                                        |
-        * |_____PRIMARY_HOOKED___|____________PRIMARY_FOLLOWED____________|
+        * | PCLUSTER_FOLLOWED or |                                        |
+        * |_____PCLUSTER_HOOKED__|___________PCLUSTER_FOLLOWED____________|
          *
          * [  (*) the above page can be used as inplace I/O.               ]
          */
-       COLLECT_PRIMARY_FOLLOWED,
+       Z_EROFS_PCLUSTER_FOLLOWED,
  };
  
  struct z_erofs_decompress_frontend {
         struct inode *const inode;
         struct erofs_map_blocks map;
+       struct z_erofs_bvec_iter biter;
  
-       struct z_erofs_pagevec_ctor vector;
-
+       struct page *candidate_bvpage;
         struct z_erofs_pcluster *pcl, *tailpcl;
-       /* a pointer used to pick up inplace I/O pages */
-       struct page **icpage_ptr;
         z_erofs_next_pcluster_t owned_head;
-
-       enum z_erofs_collectmode mode;
+       enum z_erofs_pclustermode mode;
  
         bool readahead;
         /* used for applying cache strategy on the fly */
         bool backmost;
         erofs_off_t headoffset;
+
+       /* a pointer used to pick up inplace I/O pages */
+       unsigned int icur;
  };
  
  #define DECOMPRESS_FRONTEND_INIT(__i) { \
         .inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \
-       .mode = COLLECT_PRIMARY_FOLLOWED, .backmost = true }
-
-static struct page *z_pagemap_global[Z_EROFS_VMAP_GLOBAL_PAGES];
-static DEFINE_MUTEX(z_pagemap_global_lock);
+       .mode = Z_EROFS_PCLUSTER_FOLLOWED, .backmost = true }
  
  static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
                                enum z_erofs_cache_alloctype type,
@@ -231,24 +304,21 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
          */
         gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) |
                         __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
-       struct page **pages;
-       pgoff_t index;
+       unsigned int i;
  
-       if (fe->mode < COLLECT_PRIMARY_FOLLOWED)
+       if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED)
                 return;
  
-       pages = pcl->compressed_pages;
-       index = pcl->obj.index;
-       for (; index < pcl->obj.index + pcl->pclusterpages; ++index, ++pages) {
+       for (i = 0; i < pcl->pclusterpages; ++i) {
                 struct page *page;
                 compressed_page_t t;
                 struct page *newpage = NULL;
  
                 /* the compressed page was loaded before */
-               if (READ_ONCE(*pages))
+               if (READ_ONCE(pcl->compressed_bvecs[i].page))
                         continue;
  
-               page = find_get_page(mc, index);
+               page = find_get_page(mc, pcl->obj.index + i);
  
                 if (page) {
                         t = tag_compressed_page_justfound(page);
@@ -269,7 +339,8 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
                         }
                 }
  
-               if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t)))
+               if (!cmpxchg_relaxed(&pcl->compressed_bvecs[i].page, NULL,
+                                    tagptr_cast_ptr(t)))
                         continue;
  
                 if (page)
@@ -283,7 +354,7 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe,
          * managed cache since it can be moved to the bypass queue instead.
          */
         if (standalone)
-               fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
+               fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
  }
  
  /* called by erofs_shrinker to get rid of all compressed_pages */
@@ -300,7 +371,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
          * therefore no need to worry about available decompression users.
          */
         for (i = 0; i < pcl->pclusterpages; ++i) {
-               struct page *page = pcl->compressed_pages[i];
+               struct page *page = pcl->compressed_bvecs[i].page;
  
                 if (!page)
                         continue;
@@ -313,7 +384,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
                         continue;
  
                 /* barrier is implied in the following 'unlock_page' */
-               WRITE_ONCE(pcl->compressed_pages[i], NULL);
+               WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
                 detach_page_private(page);
                 unlock_page(page);
         }
@@ -323,56 +394,59 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
  int erofs_try_to_free_cached_page(struct page *page)
  {
         struct z_erofs_pcluster *const pcl = (void *)page_private(page);
-       int ret = 0;    /* 0 - busy */
+       int ret, i;
  
-       if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) {
-               unsigned int i;
+       if (!erofs_workgroup_try_to_freeze(&pcl->obj, 1))
+               return 0;
  
-               DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
-               for (i = 0; i < pcl->pclusterpages; ++i) {
-                       if (pcl->compressed_pages[i] == page) {
-                               WRITE_ONCE(pcl->compressed_pages[i], NULL);
-                               ret = 1;
-                               break;
-                       }
+       ret = 0;
+       DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
+       for (i = 0; i < pcl->pclusterpages; ++i) {
+               if (pcl->compressed_bvecs[i].page == page) {
+                       WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
+                       ret = 1;
+                       break;
                 }
-               erofs_workgroup_unfreeze(&pcl->obj, 1);
-
-               if (ret)
-                       detach_page_private(page);
         }
+       erofs_workgroup_unfreeze(&pcl->obj, 1);
+       if (ret)
+               detach_page_private(page);
         return ret;
  }
  
-/* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */
  static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe,
-                                  struct page *page)
+                                  struct z_erofs_bvec *bvec)
  {
         struct z_erofs_pcluster *const pcl = fe->pcl;
  
-       while (fe->icpage_ptr > pcl->compressed_pages)
-               if (!cmpxchg(--fe->icpage_ptr, NULL, page))
+       while (fe->icur > 0) {
+               if (!cmpxchg(&pcl->compressed_bvecs[--fe->icur].page,
+                            NULL, bvec->page)) {
+                       pcl->compressed_bvecs[fe->icur] = *bvec;
                         return true;
+               }
+       }
         return false;
  }
  
  /* callers must be with pcluster lock held */
  static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe,
-                              struct page *page, enum z_erofs_page_type type,
-                              bool pvec_safereuse)
+                              struct z_erofs_bvec *bvec, bool exclusive)
  {
         int ret;
  
-       /* give priority for inplaceio */
-       if (fe->mode >= COLLECT_PRIMARY &&
-           type == Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
-           z_erofs_try_inplace_io(fe, page))
-               return 0;
-
-       ret = z_erofs_pagevec_enqueue(&fe->vector, page, type,
-                                     pvec_safereuse);
-       fe->pcl->vcnt += (unsigned int)ret;
-       return ret ? 0 : -EAGAIN;
+       if (exclusive) {
+               /* give priority for inplaceio to use file pages first */
+               if (z_erofs_try_inplace_io(fe, bvec))
+                       return 0;
+               /* otherwise, check if it can be used as a bvpage */
+               if (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED &&
+                   !fe->candidate_bvpage)
+                       fe->candidate_bvpage = bvec->page;
+       }
+       ret = z_erofs_bvec_enqueue(&fe->biter, bvec, &fe->candidate_bvpage);
+       fe->pcl->vcnt += (ret >= 0);
+       return ret;
  }
  
  static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f)
@@ -385,7 +459,7 @@ static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f)
                     *owned_head) == Z_EROFS_PCLUSTER_NIL) {
                 *owned_head = &pcl->next;
                 /* so we can attach this pcluster to our submission chain. */
-               f->mode = COLLECT_PRIMARY_FOLLOWED;
+               f->mode = Z_EROFS_PCLUSTER_FOLLOWED;
                 return;
         }
  
@@ -393,66 +467,21 @@ static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f)
          * type 2, link to the end of an existing open chain, be careful
          * that its submission is controlled by the original attached chain.
          */
-       if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
+       if (*owned_head != &pcl->next && pcl != f->tailpcl &&
+           cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL,
                     *owned_head) == Z_EROFS_PCLUSTER_TAIL) {
                 *owned_head = Z_EROFS_PCLUSTER_TAIL;
-               f->mode = COLLECT_PRIMARY_HOOKED;
+               f->mode = Z_EROFS_PCLUSTER_HOOKED;
                 f->tailpcl = NULL;
                 return;
         }
         /* type 3, it belongs to a chain, but it isn't the end of the chain */
-       f->mode = COLLECT_PRIMARY;
+       f->mode = Z_EROFS_PCLUSTER_INFLIGHT;
  }
  
-static int z_erofs_lookup_pcluster(struct z_erofs_decompress_frontend *fe,
-                                  struct inode *inode,
-                                  struct erofs_map_blocks *map)
-{
-       struct z_erofs_pcluster *pcl = fe->pcl;
-       unsigned int length;
-
-       /* to avoid unexpected loop formed by corrupted images */
-       if (fe->owned_head == &pcl->next || pcl == fe->tailpcl) {
-               DBG_BUGON(1);
-               return -EFSCORRUPTED;
-       }
-
-       if (pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) {
-               DBG_BUGON(1);
-               return -EFSCORRUPTED;
-       }
-
-       length = READ_ONCE(pcl->length);
-       if (length & Z_EROFS_PCLUSTER_FULL_LENGTH) {
-               if ((map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) > length) {
-                       DBG_BUGON(1);
-                       return -EFSCORRUPTED;
-               }
-       } else {
-               unsigned int llen = map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT;
-
-               if (map->m_flags & EROFS_MAP_FULL_MAPPED)
-                       llen |= Z_EROFS_PCLUSTER_FULL_LENGTH;
-
-               while (llen > length &&
-                      length != cmpxchg_relaxed(&pcl->length, length, llen)) {
-                       cpu_relax();
-                       length = READ_ONCE(pcl->length);
-               }
-       }
-       mutex_lock(&pcl->lock);
-       /* used to check tail merging loop due to corrupted images */
-       if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL)
-               fe->tailpcl = pcl;
-
-       z_erofs_try_to_claim_pcluster(fe);
-       return 0;
-}
-
-static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe,
-                                    struct inode *inode,
-                                    struct erofs_map_blocks *map)
+static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
  {
+       struct erofs_map_blocks *map = &fe->map;
         bool ztailpacking = map->m_flags & EROFS_MAP_META;
         struct z_erofs_pcluster *pcl;
         struct erofs_workgroup *grp;
@@ -471,14 +500,13 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe,
  
         atomic_set(&pcl->obj.refcount, 1);
         pcl->algorithmformat = map->m_algorithmformat;
-       pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) |
-               (map->m_flags & EROFS_MAP_FULL_MAPPED ?
-                       Z_EROFS_PCLUSTER_FULL_LENGTH : 0);
+       pcl->length = 0;
+       pcl->partial = true;
  
         /* new pclusters should be claimed as type 1, primary and followed */
         pcl->next = fe->owned_head;
         pcl->pageofs_out = map->m_la & ~PAGE_MASK;
-       fe->mode = COLLECT_PRIMARY_FOLLOWED;
+       fe->mode = Z_EROFS_PCLUSTER_FOLLOWED;
  
         /*
          * lock all primary followed works before visible to others
@@ -494,7 +522,7 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe,
         } else {
                 pcl->obj.index = map->m_pa >> PAGE_SHIFT;
  
-               grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj);
+               grp = erofs_insert_workgroup(fe->inode->i_sb, &pcl->obj);
                 if (IS_ERR(grp)) {
                         err = PTR_ERR(grp);
                         goto err_out;
@@ -520,11 +548,10 @@ err_out:
         return err;
  }
  
-static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe,
-                                  struct inode *inode,
-                                  struct erofs_map_blocks *map)
+static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe)
  {
-       struct erofs_workgroup *grp;
+       struct erofs_map_blocks *map = &fe->map;
+       struct erofs_workgroup *grp = NULL;
         int ret;
  
         DBG_BUGON(fe->pcl);
@@ -533,38 +560,35 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe,
         DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL);
         DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_TAIL_CLOSED);
  
-       if (map->m_flags & EROFS_MAP_META) {
-               if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
-                       DBG_BUGON(1);
-                       return -EFSCORRUPTED;
-               }
-               goto tailpacking;
+       if (!(map->m_flags & EROFS_MAP_META)) {
+               grp = erofs_find_workgroup(fe->inode->i_sb,
+                                          map->m_pa >> PAGE_SHIFT);
+       } else if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
+               DBG_BUGON(1);
+               return -EFSCORRUPTED;
         }
  
-       grp = erofs_find_workgroup(inode->i_sb, map->m_pa >> PAGE_SHIFT);
         if (grp) {
                 fe->pcl = container_of(grp, struct z_erofs_pcluster, obj);
+               ret = -EEXIST;
         } else {
-tailpacking:
-               ret = z_erofs_register_pcluster(fe, inode, map);
-               if (!ret)
-                       goto out;
-               if (ret != -EEXIST)
-                       return ret;
+               ret = z_erofs_register_pcluster(fe);
         }
  
-       ret = z_erofs_lookup_pcluster(fe, inode, map);
-       if (ret) {
-               erofs_workgroup_put(&fe->pcl->obj);
+       if (ret == -EEXIST) {
+               mutex_lock(&fe->pcl->lock);
+               /* used to check tail merging loop due to corrupted images */
+               if (fe->owned_head == Z_EROFS_PCLUSTER_TAIL)
+                       fe->tailpcl = fe->pcl;
+
+               z_erofs_try_to_claim_pcluster(fe);
+       } else if (ret) {
                 return ret;
         }
-
-out:
-       z_erofs_pagevec_ctor_init(&fe->vector, Z_EROFS_NR_INLINE_PAGEVECS,
-                                 fe->pcl->pagevec, fe->pcl->vcnt);
+       z_erofs_bvec_iter_begin(&fe->biter, &fe->pcl->bvset,
+                               Z_EROFS_INLINE_BVECS, fe->pcl->vcnt);
         /* since file-backed online pages are traversed in reverse order */
-       fe->icpage_ptr = fe->pcl->compressed_pages +
-                       z_erofs_pclusterpages(fe->pcl);
+       fe->icur = z_erofs_pclusterpages(fe->pcl);
         return 0;
  }
  
@@ -593,14 +617,19 @@ static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe)
         if (!pcl)
                 return false;
  
-       z_erofs_pagevec_ctor_exit(&fe->vector, false);
+       z_erofs_bvec_iter_end(&fe->biter);
         mutex_unlock(&pcl->lock);
  
+       if (fe->candidate_bvpage) {
+               DBG_BUGON(z_erofs_is_shortlived_page(fe->candidate_bvpage));
+               fe->candidate_bvpage = NULL;
+       }
+
         /*
          * if all pending pages are added, don't hold its reference
          * any longer if the pcluster isn't hosted by ourselves.
          */
-       if (fe->mode < COLLECT_PRIMARY_FOLLOWED_NOINPLACE)
+       if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE)
                 erofs_workgroup_put(&pcl->obj);
  
         fe->pcl = NULL;
@@ -628,11 +657,10 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
         struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
         struct erofs_map_blocks *const map = &fe->map;
         const loff_t offset = page_offset(page);
-       bool tight = true;
+       bool tight = true, exclusive;
  
         enum z_erofs_cache_alloctype cache_strategy;
-       enum z_erofs_page_type page_type;
-       unsigned int cur, end, spiltted, index;
+       unsigned int cur, end, spiltted;
         int err = 0;
  
         /* register locked file pages as online pages in pack */
@@ -653,7 +681,7 @@ repeat:
                 map->m_llen = 0;
                 err = z_erofs_map_blocks_iter(inode, map, 0);
                 if (err)
-                       goto err_out;
+                       goto out;
         } else {
                 if (fe->pcl)
                         goto hitted;
@@ -663,9 +691,9 @@ repeat:
         if (!(map->m_flags & EROFS_MAP_MAPPED))
                 goto hitted;
  
-       err = z_erofs_collector_begin(fe, inode, map);
+       err = z_erofs_collector_begin(fe);
         if (err)
-               goto err_out;
+               goto out;
  
         if (z_erofs_is_inline_pcluster(fe->pcl)) {
                 void *mp;
@@ -676,11 +704,12 @@ repeat:
                         err = PTR_ERR(mp);
                         erofs_err(inode->i_sb,
                                   "failed to get inline page, err %d", err);
-                       goto err_out;
+                       goto out;
                 }
                 get_page(fe->map.buf.page);
-               WRITE_ONCE(fe->pcl->compressed_pages[0], fe->map.buf.page);
-               fe->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE;
+               WRITE_ONCE(fe->pcl->compressed_bvecs[0].page,
+                          fe->map.buf.page);
+               fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
         } else {
                 /* bind cache first when cached decompression is preferred */
                 if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy,
@@ -696,10 +725,10 @@ hitted:
          * Ensure the current partial page belongs to this submit chain rather
          * than other concurrent submit chains or the noio(bypass) chain since
          * those chains are handled asynchronously thus the page cannot be used
-        * for inplace I/O or pagevec (should be processed in strict order.)
+        * for inplace I/O or bvpage (should be processed in a strict order.)
          */
-       tight &= (fe->mode >= COLLECT_PRIMARY_HOOKED &&
-                 fe->mode != COLLECT_PRIMARY_FOLLOWED_NOINPLACE);
+       tight &= (fe->mode >= Z_EROFS_PCLUSTER_HOOKED &&
+                 fe->mode != Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE);
  
         cur = end - min_t(unsigned int, offset + end - map->m_la, end);
         if (!(map->m_flags & EROFS_MAP_MAPPED)) {
@@ -707,60 +736,59 @@ hitted:
                 goto next_part;
         }
  
-       /* let's derive page type */
-       page_type = cur ? Z_EROFS_VLE_PAGE_TYPE_HEAD :
-               (!spiltted ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
-                       (tight ? Z_EROFS_PAGE_TYPE_EXCLUSIVE :
-                               Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED));
-
+       exclusive = (!cur && (!spiltted || tight));
         if (cur)
-               tight &= (fe->mode >= COLLECT_PRIMARY_FOLLOWED);
+               tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED);
  
  retry:
-       err = z_erofs_attach_page(fe, page, page_type,
-                                 fe->mode >= COLLECT_PRIMARY_FOLLOWED);
-       /* should allocate an additional short-lived page for pagevec */
-       if (err == -EAGAIN) {
-               struct page *const newpage =
-                               alloc_page(GFP_NOFS | __GFP_NOFAIL);
-
-               set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE);
-               err = z_erofs_attach_page(fe, newpage,
-                                         Z_EROFS_PAGE_TYPE_EXCLUSIVE, true);
-               if (!err)
-                       goto retry;
+       err = z_erofs_attach_page(fe, &((struct z_erofs_bvec) {
+                                       .page = page,
+                                       .offset = offset - map->m_la,
+                                       .end = end,
+                                 }), exclusive);
+       /* should allocate an additional short-lived page for bvset */
+       if (err == -EAGAIN && !fe->candidate_bvpage) {
+               fe->candidate_bvpage = alloc_page(GFP_NOFS | __GFP_NOFAIL);
+               set_page_private(fe->candidate_bvpage,
+                                Z_EROFS_SHORTLIVED_PAGE);
+               goto retry;
         }
  
-       if (err)
-               goto err_out;
-
-       index = page->index - (map->m_la >> PAGE_SHIFT);
-
-       z_erofs_onlinepage_fixup(page, index, true);
+       if (err) {
+               DBG_BUGON(err == -EAGAIN && fe->candidate_bvpage);
+               goto out;
+       }
  
+       z_erofs_onlinepage_split(page);
         /* bump up the number of spiltted parts of a page */
         ++spiltted;
-       /* also update nr_pages */
-       fe->pcl->nr_pages = max_t(pgoff_t, fe->pcl->nr_pages, index + 1);
+       if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK))
+               fe->pcl->multibases = true;
+
+       if ((map->m_flags & EROFS_MAP_FULL_MAPPED) &&
+           fe->pcl->length == map->m_llen)
+               fe->pcl->partial = false;
+       if (fe->pcl->length < offset + end - map->m_la) {
+               fe->pcl->length = offset + end - map->m_la;
+               fe->pcl->pageofs_out = map->m_la & ~PAGE_MASK;
+       }
  next_part:
-       /* can be used for verification */
+       /* shorten the remaining extent to update progress */
         map->m_llen = offset + cur - map->m_la;
+       map->m_flags &= ~EROFS_MAP_FULL_MAPPED;
  
         end = cur;
         if (end > 0)
                 goto repeat;
  
  out:
+       if (err)
+               z_erofs_page_mark_eio(page);
         z_erofs_onlinepage_endio(page);
  
         erofs_dbg("%s, finish page: %pK spiltted: %u map->m_llen %llu",
                   __func__, page, spiltted, map->m_llen);
         return err;
-
-       /* if some error occurred while processing this page */
-err_out:
-       SetPageError(page);
-       goto out;
  }
  
  static bool z_erofs_get_sync_decompress_policy(struct erofs_sb_info *sbi,
@@ -783,97 +811,137 @@ static bool z_erofs_page_is_invalidated(struct page *page)
         return !page->mapping && !z_erofs_is_shortlived_page(page);
  }
  
-static int z_erofs_decompress_pcluster(struct super_block *sb,
-                                      struct z_erofs_pcluster *pcl,
-                                      struct page **pagepool)
-{
-       struct erofs_sb_info *const sbi = EROFS_SB(sb);
-       unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
-       struct z_erofs_pagevec_ctor ctor;
-       unsigned int i, inputsize, outputsize, llen, nr_pages;
-       struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES];
-       struct page **pages, **compressed_pages, *page;
+struct z_erofs_decompress_backend {
+       struct page *onstack_pages[Z_EROFS_ONSTACK_PAGES];
+       struct super_block *sb;
+       struct z_erofs_pcluster *pcl;
  
-       enum z_erofs_page_type page_type;
-       bool overlapped, partial;
-       int err;
+       /* pages with the longest decompressed length for deduplication */
+       struct page **decompressed_pages;
+       /* pages to keep the compressed data */
+       struct page **compressed_pages;
  
-       might_sleep();
-       DBG_BUGON(!READ_ONCE(pcl->nr_pages));
+       struct list_head decompressed_secondary_bvecs;
+       struct page **pagepool;
+       unsigned int onstack_used, nr_pages;
+};
  
-       mutex_lock(&pcl->lock);
-       nr_pages = pcl->nr_pages;
+struct z_erofs_bvec_item {
+       struct z_erofs_bvec bvec;
+       struct list_head list;
+};
  
-       if (nr_pages <= Z_EROFS_VMAP_ONSTACK_PAGES) {
-               pages = pages_onstack;
-       } else if (nr_pages <= Z_EROFS_VMAP_GLOBAL_PAGES &&
-                  mutex_trylock(&z_pagemap_global_lock)) {
-               pages = z_pagemap_global;
-       } else {
-               gfp_t gfp_flags = GFP_KERNEL;
+static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be,
+                                        struct z_erofs_bvec *bvec)
+{
+       struct z_erofs_bvec_item *item;
  
-               if (nr_pages > Z_EROFS_VMAP_GLOBAL_PAGES)
-                       gfp_flags |= __GFP_NOFAIL;
+       if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK)) {
+               unsigned int pgnr;
+               struct page *oldpage;
  
-               pages = kvmalloc_array(nr_pages, sizeof(struct page *),
-                                      gfp_flags);
+               pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT;
+               DBG_BUGON(pgnr >= be->nr_pages);
+               oldpage = be->decompressed_pages[pgnr];
+               be->decompressed_pages[pgnr] = bvec->page;
  
-               /* fallback to global pagemap for the lowmem scenario */
-               if (!pages) {
-                       mutex_lock(&z_pagemap_global_lock);
-                       pages = z_pagemap_global;
-               }
+               if (!oldpage)
+                       return;
         }
  
-       for (i = 0; i < nr_pages; ++i)
-               pages[i] = NULL;
-
-       err = 0;
-       z_erofs_pagevec_ctor_init(&ctor, Z_EROFS_NR_INLINE_PAGEVECS,
-                                 pcl->pagevec, 0);
-
-       for (i = 0; i < pcl->vcnt; ++i) {
-               unsigned int pagenr;
+       /* (cold path) one pcluster is requested multiple times */
+       item = kmalloc(sizeof(*item), GFP_KERNEL | __GFP_NOFAIL);
+       item->bvec = *bvec;
+       list_add(&item->list, &be->decompressed_secondary_bvecs);
+}
  
-               page = z_erofs_pagevec_dequeue(&ctor, &page_type);
+static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be,
+                                     int err)
+{
+       unsigned int off0 = be->pcl->pageofs_out;
+       struct list_head *p, *n;
+
+       list_for_each_safe(p, n, &be->decompressed_secondary_bvecs) {
+               struct z_erofs_bvec_item *bvi;
+               unsigned int end, cur;
+               void *dst, *src;
+
+               bvi = container_of(p, struct z_erofs_bvec_item, list);
+               cur = bvi->bvec.offset < 0 ? -bvi->bvec.offset : 0;
+               end = min_t(unsigned int, be->pcl->length - bvi->bvec.offset,
+                           bvi->bvec.end);
+               dst = kmap_local_page(bvi->bvec.page);
+               while (cur < end) {
+                       unsigned int pgnr, scur, len;
+
+                       pgnr = (bvi->bvec.offset + cur + off0) >> PAGE_SHIFT;
+                       DBG_BUGON(pgnr >= be->nr_pages);
+
+                       scur = bvi->bvec.offset + cur -
+                                       ((pgnr << PAGE_SHIFT) - off0);
+                       len = min_t(unsigned int, end - cur, PAGE_SIZE - scur);
+                       if (!be->decompressed_pages[pgnr]) {
+                               err = -EFSCORRUPTED;
+                               cur += len;
+                               continue;
+                       }
+                       src = kmap_local_page(be->decompressed_pages[pgnr]);
+                       memcpy(dst + cur, src + scur, len);
+                       kunmap_local(src);
+                       cur += len;
+               }
+               kunmap_local(dst);
+               if (err)
+                       z_erofs_page_mark_eio(bvi->bvec.page);
+               z_erofs_onlinepage_endio(bvi->bvec.page);
+               list_del(p);
+               kfree(bvi);
+       }
+}
  
-               /* all pages in pagevec ought to be valid */
-               DBG_BUGON(!page);
-               DBG_BUGON(z_erofs_page_is_invalidated(page));
+static void z_erofs_parse_out_bvecs(struct z_erofs_decompress_backend *be)
+{
+       struct z_erofs_pcluster *pcl = be->pcl;
+       struct z_erofs_bvec_iter biter;
+       struct page *old_bvpage;
+       int i;
  
-               if (z_erofs_put_shortlivedpage(pagepool, page))
-                       continue;
+       z_erofs_bvec_iter_begin(&biter, &pcl->bvset, Z_EROFS_INLINE_BVECS, 0);
+       for (i = 0; i < pcl->vcnt; ++i) {
+               struct z_erofs_bvec bvec;
  
-               if (page_type == Z_EROFS_VLE_PAGE_TYPE_HEAD)
-                       pagenr = 0;
-               else
-                       pagenr = z_erofs_onlinepage_index(page);
+               z_erofs_bvec_dequeue(&biter, &bvec, &old_bvpage);
  
-               DBG_BUGON(pagenr >= nr_pages);
+               if (old_bvpage)
+                       z_erofs_put_shortlivedpage(be->pagepool, old_bvpage);
  
-               /*
-                * currently EROFS doesn't support multiref(dedup),
-                * so here erroring out one multiref page.
-                */
-               if (pages[pagenr]) {
-                       DBG_BUGON(1);
-                       SetPageError(pages[pagenr]);
-                       z_erofs_onlinepage_endio(pages[pagenr]);
-                       err = -EFSCORRUPTED;
-               }
-               pages[pagenr] = page;
+               DBG_BUGON(z_erofs_page_is_invalidated(bvec.page));
+               z_erofs_do_decompressed_bvec(be, &bvec);
         }
-       z_erofs_pagevec_ctor_exit(&ctor, true);
  
-       overlapped = false;
-       compressed_pages = pcl->compressed_pages;
+       old_bvpage = z_erofs_bvec_iter_end(&biter);
+       if (old_bvpage)
+               z_erofs_put_shortlivedpage(be->pagepool, old_bvpage);
+}
  
+static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be,
+                                 bool *overlapped)
+{
+       struct z_erofs_pcluster *pcl = be->pcl;
+       unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
+       int i, err = 0;
+
+       *overlapped = false;
         for (i = 0; i < pclusterpages; ++i) {
-               unsigned int pagenr;
+               struct z_erofs_bvec *bvec = &pcl->compressed_bvecs[i];
+               struct page *page = bvec->page;
  
-               page = compressed_pages[i];
-               /* all compressed pages ought to be valid */
-               DBG_BUGON(!page);
+               /* compressed pages ought to be present before decompressing */
+               if (!page) {
+                       DBG_BUGON(1);
+                       continue;
+               }
+               be->compressed_pages[i] = page;
  
                 if (z_erofs_is_inline_pcluster(pcl)) {
                         if (!PageUptodate(page))
@@ -883,109 +951,129 @@ static int z_erofs_decompress_pcluster(struct super_block *sb,
  
                 DBG_BUGON(z_erofs_page_is_invalidated(page));
                 if (!z_erofs_is_shortlived_page(page)) {
-                       if (erofs_page_is_managed(sbi, page)) {
+                       if (erofs_page_is_managed(EROFS_SB(be->sb), page)) {
                                 if (!PageUptodate(page))
                                         err = -EIO;
                                 continue;
                         }
+                       z_erofs_do_decompressed_bvec(be, bvec);
+                       *overlapped = true;
+               }
+       }
  
-                       /*
-                        * only if non-head page can be selected
-                        * for inplace decompression
-                        */
-                       pagenr = z_erofs_onlinepage_index(page);
-
-                       DBG_BUGON(pagenr >= nr_pages);
-                       if (pages[pagenr]) {
-                               DBG_BUGON(1);
-                               SetPageError(pages[pagenr]);
-                               z_erofs_onlinepage_endio(pages[pagenr]);
-                               err = -EFSCORRUPTED;
-                       }
-                       pages[pagenr] = page;
+       if (err)
+               return err;
+       return 0;
+}
  
-                       overlapped = true;
-               }
+static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
+                                      int err)
+{
+       struct erofs_sb_info *const sbi = EROFS_SB(be->sb);
+       struct z_erofs_pcluster *pcl = be->pcl;
+       unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
+       unsigned int i, inputsize;
+       int err2;
+       struct page *page;
+       bool overlapped;
  
-               /* PG_error needs checking for all non-managed pages */
-               if (PageError(page)) {
-                       DBG_BUGON(PageUptodate(page));
-                       err = -EIO;
-               }
+       mutex_lock(&pcl->lock);
+       be->nr_pages = PAGE_ALIGN(pcl->length + pcl->pageofs_out) >> PAGE_SHIFT;
+
+       /* allocate (de)compressed page arrays if cannot be kept on stack */
+       be->decompressed_pages = NULL;
+       be->compressed_pages = NULL;
+       be->onstack_used = 0;
+       if (be->nr_pages <= Z_EROFS_ONSTACK_PAGES) {
+               be->decompressed_pages = be->onstack_pages;
+               be->onstack_used = be->nr_pages;
+               memset(be->decompressed_pages, 0,
+                      sizeof(struct page *) * be->nr_pages);
         }
  
+       if (pclusterpages + be->onstack_used <= Z_EROFS_ONSTACK_PAGES)
+               be->compressed_pages = be->onstack_pages + be->onstack_used;
+
+       if (!be->decompressed_pages)
+               be->decompressed_pages =
+                       kvcalloc(be->nr_pages, sizeof(struct page *),
+                                GFP_KERNEL | __GFP_NOFAIL);
+       if (!be->compressed_pages)
+               be->compressed_pages =
+                       kvcalloc(pclusterpages, sizeof(struct page *),
+                                GFP_KERNEL | __GFP_NOFAIL);
+
+       z_erofs_parse_out_bvecs(be);
+       err2 = z_erofs_parse_in_bvecs(be, &overlapped);
+       if (err2)
+               err = err2;
         if (err)
                 goto out;
  
-       llen = pcl->length >> Z_EROFS_PCLUSTER_LENGTH_BIT;
-       if (nr_pages << PAGE_SHIFT >= pcl->pageofs_out + llen) {
-               outputsize = llen;
-               partial = !(pcl->length & Z_EROFS_PCLUSTER_FULL_LENGTH);
-       } else {
-               outputsize = (nr_pages << PAGE_SHIFT) - pcl->pageofs_out;
-               partial = true;
-       }
-
         if (z_erofs_is_inline_pcluster(pcl))
                 inputsize = pcl->tailpacking_size;
         else
                 inputsize = pclusterpages * PAGE_SIZE;
  
         err = z_erofs_decompress(&(struct z_erofs_decompress_req) {
-                                       .sb = sb,
-                                       .in = compressed_pages,
-                                       .out = pages,
+                                       .sb = be->sb,
+                                       .in = be->compressed_pages,
+                                       .out = be->decompressed_pages,
                                         .pageofs_in = pcl->pageofs_in,
                                         .pageofs_out = pcl->pageofs_out,
                                         .inputsize = inputsize,
-                                       .outputsize = outputsize,
+                                       .outputsize = pcl->length,
                                         .alg = pcl->algorithmformat,
                                         .inplace_io = overlapped,
-                                       .partial_decoding = partial
-                                }, pagepool);
+                                       .partial_decoding = pcl->partial,
+                                       .fillgaps = pcl->multibases,
+                                }, be->pagepool);
  
  out:
         /* must handle all compressed pages before actual file pages */
         if (z_erofs_is_inline_pcluster(pcl)) {
-               page = compressed_pages[0];
-               WRITE_ONCE(compressed_pages[0], NULL);
+               page = pcl->compressed_bvecs[0].page;
+               WRITE_ONCE(pcl->compressed_bvecs[0].page, NULL);
                 put_page(page);
         } else {
                 for (i = 0; i < pclusterpages; ++i) {
-                       page = compressed_pages[i];
+                       page = pcl->compressed_bvecs[i].page;
  
                         if (erofs_page_is_managed(sbi, page))
                                 continue;
  
                         /* recycle all individual short-lived pages */
-                       (void)z_erofs_put_shortlivedpage(pagepool, page);
-                       WRITE_ONCE(compressed_pages[i], NULL);
+                       (void)z_erofs_put_shortlivedpage(be->pagepool, page);
+                       WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
                 }
         }
+       if (be->compressed_pages < be->onstack_pages ||
+           be->compressed_pages >= be->onstack_pages + Z_EROFS_ONSTACK_PAGES)
+               kvfree(be->compressed_pages);
+       z_erofs_fill_other_copies(be, err);
  
-       for (i = 0; i < nr_pages; ++i) {
-               page = pages[i];
+       for (i = 0; i < be->nr_pages; ++i) {
+               page = be->decompressed_pages[i];
                 if (!page)
                         continue;
  
                 DBG_BUGON(z_erofs_page_is_invalidated(page));
  
                 /* recycle all individual short-lived pages */
-               if (z_erofs_put_shortlivedpage(pagepool, page))
+               if (z_erofs_put_shortlivedpage(be->pagepool, page))
                         continue;
-
-               if (err < 0)
-                       SetPageError(page);
-
+               if (err)
+                       z_erofs_page_mark_eio(page);
                 z_erofs_onlinepage_endio(page);
         }
  
-       if (pages == z_pagemap_global)
-               mutex_unlock(&z_pagemap_global_lock);
-       else if (pages != pages_onstack)
-               kvfree(pages);
+       if (be->decompressed_pages != be->onstack_pages)
+               kvfree(be->decompressed_pages);
  
-       pcl->nr_pages = 0;
+       pcl->length = 0;
+       pcl->partial = true;
+       pcl->multibases = false;
+       pcl->bvset.nextpage = NULL;
         pcl->vcnt = 0;
  
         /* pcluster lock MUST be taken before the following line */
@@ -997,22 +1085,25 @@ out:
  static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
                                      struct page **pagepool)
  {
+       struct z_erofs_decompress_backend be = {
+               .sb = io->sb,
+               .pagepool = pagepool,
+               .decompressed_secondary_bvecs =
+                       LIST_HEAD_INIT(be.decompressed_secondary_bvecs),
+       };
         z_erofs_next_pcluster_t owned = io->head;
  
         while (owned != Z_EROFS_PCLUSTER_TAIL_CLOSED) {
-               struct z_erofs_pcluster *pcl;
-
-               /* no possible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */
+               /* impossible that 'owned' equals Z_EROFS_WORK_TPTR_TAIL */
                 DBG_BUGON(owned == Z_EROFS_PCLUSTER_TAIL);
-
-               /* no possible that 'owned' equals NULL */
+               /* impossible that 'owned' equals Z_EROFS_PCLUSTER_NIL */
                 DBG_BUGON(owned == Z_EROFS_PCLUSTER_NIL);
  
-               pcl = container_of(owned, struct z_erofs_pcluster, next);
-               owned = READ_ONCE(pcl->next);
+               be.pcl = container_of(owned, struct z_erofs_pcluster, next);
+               owned = READ_ONCE(be.pcl->next);
  
-               z_erofs_decompress_pcluster(io->sb, pcl, pagepool);
-               erofs_workgroup_put(&pcl->obj);
+               z_erofs_decompress_pcluster(&be, io->eio ? -EIO : 0);
+               erofs_workgroup_put(&be.pcl->obj);
         }
  }
  
@@ -1038,7 +1129,6 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
         if (sync) {
                 if (!atomic_add_return(bios, &io->pending_bios))
                         complete(&io->u.done);
-
                 return;
         }
  
@@ -1071,7 +1161,7 @@ static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
         int justfound;
  
  repeat:
-       page = READ_ONCE(pcl->compressed_pages[nr]);
+       page = READ_ONCE(pcl->compressed_bvecs[nr].page);
         oldpage = page;
  
         if (!page)
@@ -1087,7 +1177,7 @@ repeat:
          * otherwise, it will go inplace I/O path instead.
          */
         if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
-               WRITE_ONCE(pcl->compressed_pages[nr], page);
+               WRITE_ONCE(pcl->compressed_bvecs[nr].page, page);
                 set_page_private(page, 0);
                 tocache = true;
                 goto out_tocache;
@@ -1113,14 +1203,13 @@ repeat:
  
         /* the page is still in manage cache */
         if (page->mapping == mc) {
-               WRITE_ONCE(pcl->compressed_pages[nr], page);
+               WRITE_ONCE(pcl->compressed_bvecs[nr].page, page);
  
-               ClearPageError(page);
                 if (!PagePrivate(page)) {
                         /*
                          * impossible to be !PagePrivate(page) for
                          * the current restriction as well if
-                        * the page is already in compressed_pages[].
+                        * the page is already in compressed_bvecs[].
                          */
                         DBG_BUGON(!justfound);
  
@@ -1149,7 +1238,8 @@ repeat:
         put_page(page);
  out_allocpage:
         page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
-       if (oldpage != cmpxchg(&pcl->compressed_pages[nr], oldpage, page)) {
+       if (oldpage != cmpxchg(&pcl->compressed_bvecs[nr].page,
+                              oldpage, page)) {
                 erofs_pagepool_add(pagepool, page);
                 cond_resched();
                 goto repeat;
@@ -1186,6 +1276,7 @@ fg_out:
                 q = fgq;
                 init_completion(&fgq->u.done);
                 atomic_set(&fgq->pending_bios, 0);
+               q->eio = false;
         }
         q->sb = sb;
         q->head = Z_EROFS_PCLUSTER_TAIL_CLOSED;
@@ -1246,26 +1337,25 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)
                 DBG_BUGON(PageUptodate(page));
                 DBG_BUGON(z_erofs_page_is_invalidated(page));
  
-               if (err)
-                       SetPageError(page);
-
                 if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
                         if (!err)
                                 SetPageUptodate(page);
                         unlock_page(page);
                 }
         }
+       if (err)
+               q->eio = true;
         z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1);
         bio_put(bio);
  }
  
-static void z_erofs_submit_queue(struct super_block *sb,
-                                struct z_erofs_decompress_frontend *f,
+static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
                                  struct page **pagepool,
                                  struct z_erofs_decompressqueue *fgq,
                                  bool *force_fg)
  {
-       struct erofs_sb_info *const sbi = EROFS_SB(sb);
+       struct super_block *sb = f->inode->i_sb;
+       struct address_space *mc = MNGD_MAPPING(EROFS_SB(sb));
         z_erofs_next_pcluster_t qtail[NR_JOBQUEUES];
         struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
         void *bi_private;
@@ -1317,7 +1407,7 @@ static void z_erofs_submit_queue(struct super_block *sb,
                         struct page *page;
  
                         page = pickup_page_for_submission(pcl, i++, pagepool,
-                                                         MNGD_MAPPING(sbi));
+                                                         mc);
                         if (!page)
                                 continue;
  
@@ -1369,15 +1459,14 @@ submit_bio_retry:
         z_erofs_decompress_kickoff(q[JQ_SUBMIT], *force_fg, nr_bios);
  }
  
-static void z_erofs_runqueue(struct super_block *sb,
-                            struct z_erofs_decompress_frontend *f,
+static void z_erofs_runqueue(struct z_erofs_decompress_frontend *f,
                              struct page **pagepool, bool force_fg)
  {
         struct z_erofs_decompressqueue io[NR_JOBQUEUES];
  
         if (f->owned_head == Z_EROFS_PCLUSTER_TAIL)
                 return;
-       z_erofs_submit_queue(sb, f, pagepool, io, &force_fg);
+       z_erofs_submit_queue(f, pagepool, io, &force_fg);
  
         /* handle bypass queue (no i/o pclusters) immediately */
         z_erofs_decompress_queue(&io[JQ_BYPASS], pagepool);
@@ -1475,7 +1564,7 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio)
         (void)z_erofs_collector_end(&f);
  
         /* if some compressed cluster ready, need submit them anyway */
-       z_erofs_runqueue(inode->i_sb, &f, &pagepool,
+       z_erofs_runqueue(&f, &pagepool,
                          z_erofs_get_sync_decompress_policy(sbi, 0));
  
         if (err)
@@ -1524,7 +1613,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
         z_erofs_pcluster_readmore(&f, rac, 0, &pagepool, false);
         (void)z_erofs_collector_end(&f);
  
-       z_erofs_runqueue(inode->i_sb, &f, &pagepool,
+       z_erofs_runqueue(&f, &pagepool,
                          z_erofs_get_sync_decompress_policy(sbi, nr_pages));
         erofs_put_metabuf(&f.map.buf);
         erofs_release_pages(&pagepool);