From e33f42b20bcb2f55cb1eeeab9956a503dcf36107 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Thu, 13 Jan 2022 13:18:45 +0800
Subject: erofs: fix fsdax partition offset handling

After seeking time on testing today upstream fsdax, I found it
actually doesn't work well as below:

[  186.492983] ------------[ cut here ]------------
[  186.493629] WARNING: CPU: 1 PID: 205 at fs/iomap/iter.c:33 iomap_iter+0x2f6/0x310

The problem is that m_dax_part_off should be applied to physical
addresses and very sorry about that I didn't catch this eariler.

Anyway, let's fix it up now. Also, I need to find a way to set up
a standalone testcase to look after this later.

Link: https://lore.kernel.org/r/20220113051845.244461-1-hsiangkao@linux.alibaba.com
Fixes: de2051147771 ("fsdax: shift partition offset handling into the file systems")
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/data.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index fa7ddb7ad980..226a57c57ee6 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -252,12 +252,10 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 		return ret;
 
 	iomap->offset = map.m_la;
-	if (flags & IOMAP_DAX) {
+	if (flags & IOMAP_DAX)
 		iomap->dax_dev = mdev.m_daxdev;
-		iomap->offset += mdev.m_dax_part_off;
-	} else {
+	else
 		iomap->bdev = mdev.m_bdev;
-	}
 	iomap->length = map.m_llen;
 	iomap->flags = 0;
 	iomap->private = NULL;
@@ -284,6 +282,8 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 	} else {
 		iomap->type = IOMAP_MAPPED;
 		iomap->addr = mdev.m_pa;
+		if (flags & IOMAP_DAX)
+			iomap->addr += mdev.m_dax_part_off;
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From 7865827c432bf9885ee26e5767697c3d9e21a82c Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Fri, 21 Jan 2022 17:14:12 +0800
Subject: erofs: avoid unnecessary z_erofs_decompressqueue_work() declaration

Just code rearrange. No logic changes.

Link: https://lore.kernel.org/r/20220121091412.86086-1-hsiangkao@linux.alibaba.com
Reviewed-by: Yue Hu <huyue2@yulong.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/zdata.c | 113 +++++++++++++++++++++++++++----------------------------
 1 file changed, 56 insertions(+), 57 deletions(-)

(limited to 'fs')

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 498b7666efe8..423bc1a61da5 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -810,68 +810,11 @@ static bool z_erofs_get_sync_decompress_policy(struct erofs_sb_info *sbi,
 	return false;
 }
 
-static void z_erofs_decompressqueue_work(struct work_struct *work);
-static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
-				       bool sync, int bios)
-{
-	struct erofs_sb_info *const sbi = EROFS_SB(io->sb);
-
-	/* wake up the caller thread for sync decompression */
-	if (sync) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&io->u.wait.lock, flags);
-		if (!atomic_add_return(bios, &io->pending_bios))
-			wake_up_locked(&io->u.wait);
-		spin_unlock_irqrestore(&io->u.wait.lock, flags);
-		return;
-	}
-
-	if (atomic_add_return(bios, &io->pending_bios))
-		return;
-	/* Use workqueue and sync decompression for atomic contexts only */
-	if (in_atomic() || irqs_disabled()) {
-		queue_work(z_erofs_workqueue, &io->u.work);
-		/* enable sync decompression for readahead */
-		if (sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO)
-			sbi->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON;
-		return;
-	}
-	z_erofs_decompressqueue_work(&io->u.work);
-}
-
 static bool z_erofs_page_is_invalidated(struct page *page)
 {
 	return !page->mapping && !z_erofs_is_shortlived_page(page);
 }
 
-static void z_erofs_decompressqueue_endio(struct bio *bio)
-{
-	tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
-	struct z_erofs_decompressqueue *q = tagptr_unfold_ptr(t);
-	blk_status_t err = bio->bi_status;
-	struct bio_vec *bvec;
-	struct bvec_iter_all iter_all;
-
-	bio_for_each_segment_all(bvec, bio, iter_all) {
-		struct page *page = bvec->bv_page;
-
-		DBG_BUGON(PageUptodate(page));
-		DBG_BUGON(z_erofs_page_is_invalidated(page));
-
-		if (err)
-			SetPageError(page);
-
-		if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
-			if (!err)
-				SetPageUptodate(page);
-			unlock_page(page);
-		}
-	}
-	z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1);
-	bio_put(bio);
-}
-
 static int z_erofs_decompress_pcluster(struct super_block *sb,
 				       struct z_erofs_pcluster *pcl,
 				       struct page **pagepool)
@@ -1123,6 +1066,35 @@ static void z_erofs_decompressqueue_work(struct work_struct *work)
 	kvfree(bgq);
 }
 
+static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
+				       bool sync, int bios)
+{
+	struct erofs_sb_info *const sbi = EROFS_SB(io->sb);
+
+	/* wake up the caller thread for sync decompression */
+	if (sync) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&io->u.wait.lock, flags);
+		if (!atomic_add_return(bios, &io->pending_bios))
+			wake_up_locked(&io->u.wait);
+		spin_unlock_irqrestore(&io->u.wait.lock, flags);
+		return;
+	}
+
+	if (atomic_add_return(bios, &io->pending_bios))
+		return;
+	/* Use workqueue and sync decompression for atomic contexts only */
+	if (in_atomic() || irqs_disabled()) {
+		queue_work(z_erofs_workqueue, &io->u.work);
+		/* enable sync decompression for readahead */
+		if (sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO)
+			sbi->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON;
+		return;
+	}
+	z_erofs_decompressqueue_work(&io->u.work);
+}
+
 static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
 					       unsigned int nr,
 					       struct page **pagepool,
@@ -1300,6 +1272,33 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
 	qtail[JQ_BYPASS] = &pcl->next;
 }
 
+static void z_erofs_decompressqueue_endio(struct bio *bio)
+{
+	tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
+	struct z_erofs_decompressqueue *q = tagptr_unfold_ptr(t);
+	blk_status_t err = bio->bi_status;
+	struct bio_vec *bvec;
+	struct bvec_iter_all iter_all;
+
+	bio_for_each_segment_all(bvec, bio, iter_all) {
+		struct page *page = bvec->bv_page;
+
+		DBG_BUGON(PageUptodate(page));
+		DBG_BUGON(z_erofs_page_is_invalidated(page));
+
+		if (err)
+			SetPageError(page);
+
+		if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
+			if (!err)
+				SetPageUptodate(page);
+			unlock_page(page);
+		}
+	}
+	z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1);
+	bio_put(bio);
+}
+
 static void z_erofs_submit_queue(struct super_block *sb,
 				 struct z_erofs_decompress_frontend *f,
 				 struct page **pagepool,
-- 
cgit v1.2.3


From 24331050a3e6afcd4451409831dd9ae8085a42f6 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@linux.alibaba.com>
Date: Fri, 4 Feb 2022 03:02:03 +0800
Subject: erofs: fix small compressed files inlining

Prior to ztailpacking feature, it's enough that each lcluster has
two pclusters at most, and the last pcluster should be turned into
an uncompressed pcluster when necessary. For example,
  _________________________________________________
 |_ pcluster n-2 _|_ pcluster n-1 _|____ EOFed ____|

which should be converted into:
  _________________________________________________
 |_ pcluster n-2 _|_ pcluster n-1 (uncompressed)' _|

That is fine since either pcluster n-1 or (uncompressed)' takes one
physical block.

However, after ztailpacking was supported, the game is changed since
the last pcluster can be inlined now. And such case above is quite
common for inlining small files. Therefore, in order to inline more
effectively, special EOF lclusters are now supported which can have
three parts at most, as illustrated below:
  _________________________________________________
 |_ pcluster n-2 _|_ pcluster n-1 _|____ EOFed ____|
                                   ^ i_size

Actually similar code exists in Yue Hu's original patchset [1], but I
removed this part on purpose. After evaluating more real cases with
small files, I've changed my mind.

[1] https://lore.kernel.org/r/20211215094449.15162-1-huyue2@yulong.com

Link: https://lore.kernel.org/r/20220203190203.30794-1-xiang@kernel.org
Fixes: ab92184ff8f1 ("erofs: add on-disk compressed tail-packing inline support")
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 fs/erofs/zmap.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 18d7fd1a5064..361b1d6e4bf9 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -630,6 +630,13 @@ static int z_erofs_do_map_blocks(struct inode *inode,
 		if (endoff >= m.clusterofs) {
 			m.headtype = m.type;
 			map->m_la = (m.lcn << lclusterbits) | m.clusterofs;
+			/*
+			 * For ztailpacking files, in order to inline data more
+			 * effectively, special EOF lclusters are now supported
+			 * which can have three parts at most.
+			 */
+			if (ztailpacking && end > inode->i_size)
+				end = inode->i_size;
 			break;
 		}
 		/* m.lcn should be >= 1 if endoff < m.clusterofs */
-- 
cgit v1.2.3