android_kernel_xiaomi_sm8350/fs/erofs/decompressor.c

413 lines
10 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2019 HUAWEI, Inc.
* https://www.huawei.com/
*/
#include "compress.h"
#include <linux/module.h>
#include <linux/lz4.h>
#ifndef LZ4_DISTANCE_MAX /* history window size */
#define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */
#endif
#define LZ4_MAX_DISTANCE_PAGES (DIV_ROUND_UP(LZ4_DISTANCE_MAX, PAGE_SIZE) + 1)
#ifndef LZ4_DECOMPRESS_INPLACE_MARGIN
#define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32)
#endif
struct z_erofs_decompressor {
/*
* if destpages have sparsed pages, fill them with bounce pages.
* it also check whether destpages indicate continuous physical memory.
*/
int (*prepare_destpages)(struct z_erofs_decompress_req *rq,
struct list_head *pagepool);
BACKPORT: erofs: fix lz4 inplace decompression commit 3c12466b6b7bf1e56f9b32c366a3d83d87afb4de upstream. Currently EROFS can map another compressed buffer for inplace decompression, that was used to handle the cases that some pages of compressed data are actually not in-place I/O. However, like most simple LZ77 algorithms, LZ4 expects the compressed data is arranged at the end of the decompressed buffer and it explicitly uses memmove() to handle overlapping: __________________________________________________________ |_ direction of decompression --> ____ |_ compressed data _| Although EROFS arranges compressed data like this, it typically maps two individual virtual buffers so the relative order is uncertain. Previously, it was hardly observed since LZ4 only uses memmove() for short overlapped literals and x86/arm64 memmove implementations seem to completely cover it up and they don't have this issue. Juhyung reported that EROFS data corruption can be found on a new Intel x86 processor. After some analysis, it seems that recent x86 processors with the new FSRM feature expose this issue with "rep movsb". Let's strictly use the decompressed buffer for lz4 inplace decompression for now. Later, as an useful improvement, we could try to tie up these two buffers together in the correct order. Reported-and-tested-by: Juhyung Park <qkrwngud825@gmail.com> Closes: https://lore.kernel.org/r/CAD14+f2AVKf8Fa2OO1aAUdDNTDsVzzR6ctU_oJSmTyd6zSYR2Q@mail.gmail.com Fixes: 0ffd71bcc3a0 ("staging: erofs: introduce LZ4 decompression inplace") Fixes: 598162d05080 ("erofs: support decompress big pcluster for lz4 backend") Cc: stable <stable@vger.kernel.org> # 5.4+ Tested-by: Yifan Zhao <zhaoyifan@sjtu.edu.cn> Change-Id: Ib7a578283e33f0329ae2133223878ddf0738aba4 Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Link: https://lore.kernel.org/r/20231206045534.3920847-1-hsiangkao@linux.alibaba.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> [mkbestas: Adapt for android <=5.4 kernel which contains backports that caused various conflicts] Signed-off-by: Michael Bestas <mkbestas@lineageos.org>
2023-12-05 23:55:34 -05:00
int (*decompress)(struct z_erofs_decompress_req *rq, u8 *dst);
char *name;
};
int z_erofs_load_lz4_config(struct super_block *sb,
struct erofs_super_block *dsb,
struct z_erofs_lz4_cfgs *lz4, int size)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
u16 distance;
if (lz4) {
if (size < sizeof(struct z_erofs_lz4_cfgs)) {
erofs_err(sb, "invalid lz4 cfgs, size=%u", size);
return -EINVAL;
}
distance = le16_to_cpu(lz4->max_distance);
sbi->lz4.max_pclusterblks = le16_to_cpu(lz4->max_pclusterblks);
if (!sbi->lz4.max_pclusterblks) {
sbi->lz4.max_pclusterblks = 1; /* reserved case */
} else if (sbi->lz4.max_pclusterblks >
Z_EROFS_PCLUSTER_MAX_SIZE / EROFS_BLKSIZ) {
erofs_err(sb, "too large lz4 pclusterblks %u",
sbi->lz4.max_pclusterblks);
return -EINVAL;
} else if (sbi->lz4.max_pclusterblks >= 2) {
erofs_info(sb, "EXPERIMENTAL big pcluster feature in use. Use at your own risk!");
}
} else {
distance = le16_to_cpu(dsb->u1.lz4_max_distance);
sbi->lz4.max_pclusterblks = 1;
}
sbi->lz4.max_distance_pages = distance ?
DIV_ROUND_UP(distance, PAGE_SIZE) + 1 :
LZ4_MAX_DISTANCE_PAGES;
return erofs_pcpubuf_growsize(sbi->lz4.max_pclusterblks);
}
static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
struct list_head *pagepool)
{
const unsigned int nr =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL };
unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES,
BITS_PER_LONG)] = { 0 };
unsigned int lz4_max_distance_pages =
EROFS_SB(rq->sb)->lz4.max_distance_pages;
void *kaddr = NULL;
unsigned int i, j, top;
top = 0;
for (i = j = 0; i < nr; ++i, ++j) {
struct page *const page = rq->out[i];
struct page *victim;
if (j >= lz4_max_distance_pages)
j = 0;
/* 'valid' bounced can only be tested after a complete round */
if (test_bit(j, bounced)) {
DBG_BUGON(i < lz4_max_distance_pages);
DBG_BUGON(top >= lz4_max_distance_pages);
availables[top++] = rq->out[i - lz4_max_distance_pages];
}
if (page) {
__clear_bit(j, bounced);
if (!PageHighMem(page)) {
if (!i) {
kaddr = page_address(page);
continue;
}
if (kaddr &&
kaddr + PAGE_SIZE == page_address(page)) {
kaddr += PAGE_SIZE;
continue;
}
}
kaddr = NULL;
continue;
}
kaddr = NULL;
__set_bit(j, bounced);
if (top) {
victim = availables[--top];
get_page(victim);
} else {
victim = erofs_allocpage(pagepool,
GFP_KERNEL | __GFP_NOFAIL);
set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
}
rq->out[i] = victim;
}
return kaddr ? 1 : 0;
}
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
static void *z_erofs_handle_inplace_io(struct z_erofs_decompress_req *rq,
BACKPORT: erofs: fix lz4 inplace decompression commit 3c12466b6b7bf1e56f9b32c366a3d83d87afb4de upstream. Currently EROFS can map another compressed buffer for inplace decompression, that was used to handle the cases that some pages of compressed data are actually not in-place I/O. However, like most simple LZ77 algorithms, LZ4 expects the compressed data is arranged at the end of the decompressed buffer and it explicitly uses memmove() to handle overlapping: __________________________________________________________ |_ direction of decompression --> ____ |_ compressed data _| Although EROFS arranges compressed data like this, it typically maps two individual virtual buffers so the relative order is uncertain. Previously, it was hardly observed since LZ4 only uses memmove() for short overlapped literals and x86/arm64 memmove implementations seem to completely cover it up and they don't have this issue. Juhyung reported that EROFS data corruption can be found on a new Intel x86 processor. After some analysis, it seems that recent x86 processors with the new FSRM feature expose this issue with "rep movsb". Let's strictly use the decompressed buffer for lz4 inplace decompression for now. Later, as an useful improvement, we could try to tie up these two buffers together in the correct order. Reported-and-tested-by: Juhyung Park <qkrwngud825@gmail.com> Closes: https://lore.kernel.org/r/CAD14+f2AVKf8Fa2OO1aAUdDNTDsVzzR6ctU_oJSmTyd6zSYR2Q@mail.gmail.com Fixes: 0ffd71bcc3a0 ("staging: erofs: introduce LZ4 decompression inplace") Fixes: 598162d05080 ("erofs: support decompress big pcluster for lz4 backend") Cc: stable <stable@vger.kernel.org> # 5.4+ Tested-by: Yifan Zhao <zhaoyifan@sjtu.edu.cn> Change-Id: Ib7a578283e33f0329ae2133223878ddf0738aba4 Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Link: https://lore.kernel.org/r/20231206045534.3920847-1-hsiangkao@linux.alibaba.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> [mkbestas: Adapt for android <=5.4 kernel which contains backports that caused various conflicts] Signed-off-by: Michael Bestas <mkbestas@lineageos.org>
2023-12-05 23:55:34 -05:00
void *inpage, void *out, unsigned int *inputmargin,
int *maptype, bool support_0padding)
{
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
unsigned int nrpages_in, nrpages_out;
BACKPORT: erofs: fix lz4 inplace decompression commit 3c12466b6b7bf1e56f9b32c366a3d83d87afb4de upstream. Currently EROFS can map another compressed buffer for inplace decompression, that was used to handle the cases that some pages of compressed data are actually not in-place I/O. However, like most simple LZ77 algorithms, LZ4 expects the compressed data is arranged at the end of the decompressed buffer and it explicitly uses memmove() to handle overlapping: __________________________________________________________ |_ direction of decompression --> ____ |_ compressed data _| Although EROFS arranges compressed data like this, it typically maps two individual virtual buffers so the relative order is uncertain. Previously, it was hardly observed since LZ4 only uses memmove() for short overlapped literals and x86/arm64 memmove implementations seem to completely cover it up and they don't have this issue. Juhyung reported that EROFS data corruption can be found on a new Intel x86 processor. After some analysis, it seems that recent x86 processors with the new FSRM feature expose this issue with "rep movsb". Let's strictly use the decompressed buffer for lz4 inplace decompression for now. Later, as an useful improvement, we could try to tie up these two buffers together in the correct order. Reported-and-tested-by: Juhyung Park <qkrwngud825@gmail.com> Closes: https://lore.kernel.org/r/CAD14+f2AVKf8Fa2OO1aAUdDNTDsVzzR6ctU_oJSmTyd6zSYR2Q@mail.gmail.com Fixes: 0ffd71bcc3a0 ("staging: erofs: introduce LZ4 decompression inplace") Fixes: 598162d05080 ("erofs: support decompress big pcluster for lz4 backend") Cc: stable <stable@vger.kernel.org> # 5.4+ Tested-by: Yifan Zhao <zhaoyifan@sjtu.edu.cn> Change-Id: Ib7a578283e33f0329ae2133223878ddf0738aba4 Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Link: https://lore.kernel.org/r/20231206045534.3920847-1-hsiangkao@linux.alibaba.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> [mkbestas: Adapt for android <=5.4 kernel which contains backports that caused various conflicts] Signed-off-by: Michael Bestas <mkbestas@lineageos.org>
2023-12-05 23:55:34 -05:00
unsigned int ofull, oend, inputsize, total, i;
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
struct page **in;
void *src, *tmp;
inputsize = rq->inputsize;
nrpages_in = PAGE_ALIGN(inputsize) >> PAGE_SHIFT;
oend = rq->pageofs_out + rq->outputsize;
ofull = PAGE_ALIGN(oend);
nrpages_out = ofull >> PAGE_SHIFT;
if (rq->inplace_io) {
if (rq->partial_decoding || !support_0padding ||
ofull - oend < LZ4_DECOMPRESS_INPLACE_MARGIN(inputsize))
goto docopy;
BACKPORT: erofs: fix lz4 inplace decompression commit 3c12466b6b7bf1e56f9b32c366a3d83d87afb4de upstream. Currently EROFS can map another compressed buffer for inplace decompression, that was used to handle the cases that some pages of compressed data are actually not in-place I/O. However, like most simple LZ77 algorithms, LZ4 expects the compressed data is arranged at the end of the decompressed buffer and it explicitly uses memmove() to handle overlapping: __________________________________________________________ |_ direction of decompression --> ____ |_ compressed data _| Although EROFS arranges compressed data like this, it typically maps two individual virtual buffers so the relative order is uncertain. Previously, it was hardly observed since LZ4 only uses memmove() for short overlapped literals and x86/arm64 memmove implementations seem to completely cover it up and they don't have this issue. Juhyung reported that EROFS data corruption can be found on a new Intel x86 processor. After some analysis, it seems that recent x86 processors with the new FSRM feature expose this issue with "rep movsb". Let's strictly use the decompressed buffer for lz4 inplace decompression for now. Later, as an useful improvement, we could try to tie up these two buffers together in the correct order. Reported-and-tested-by: Juhyung Park <qkrwngud825@gmail.com> Closes: https://lore.kernel.org/r/CAD14+f2AVKf8Fa2OO1aAUdDNTDsVzzR6ctU_oJSmTyd6zSYR2Q@mail.gmail.com Fixes: 0ffd71bcc3a0 ("staging: erofs: introduce LZ4 decompression inplace") Fixes: 598162d05080 ("erofs: support decompress big pcluster for lz4 backend") Cc: stable <stable@vger.kernel.org> # 5.4+ Tested-by: Yifan Zhao <zhaoyifan@sjtu.edu.cn> Change-Id: Ib7a578283e33f0329ae2133223878ddf0738aba4 Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Link: https://lore.kernel.org/r/20231206045534.3920847-1-hsiangkao@linux.alibaba.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> [mkbestas: Adapt for android <=5.4 kernel which contains backports that caused various conflicts] Signed-off-by: Michael Bestas <mkbestas@lineageos.org>
2023-12-05 23:55:34 -05:00
for (i = 0; i < nrpages_in; ++i)
if (rq->out[nrpages_out - nrpages_in + i] !=
rq->in[i])
goto docopy;
kunmap_atomic(inpage);
*maptype = 3;
return out + ((nrpages_out - nrpages_in) << PAGE_SHIFT);
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
}
if (nrpages_in <= 1) {
*maptype = 0;
return inpage;
}
kunmap_atomic(inpage);
src = erofs_vm_map_ram(rq->in, nrpages_in);
if (!src)
return ERR_PTR(-ENOMEM);
*maptype = 1;
return src;
docopy:
/* Or copy compressed data which can be overlapped to per-CPU buffer */
in = rq->in;
src = erofs_get_pcpubuf(nrpages_in);
if (!src) {
DBG_BUGON(1);
kunmap_atomic(inpage);
return ERR_PTR(-EFAULT);
}
tmp = src;
total = rq->inputsize;
while (total) {
unsigned int page_copycnt =
min_t(unsigned int, total, PAGE_SIZE - *inputmargin);
if (!inpage)
inpage = kmap_atomic(*in);
memcpy(tmp, inpage + *inputmargin, page_copycnt);
kunmap_atomic(inpage);
inpage = NULL;
tmp += page_copycnt;
total -= page_copycnt;
++in;
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
*inputmargin = 0;
}
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
*maptype = 2;
return src;
}
BACKPORT: erofs: fix lz4 inplace decompression commit 3c12466b6b7bf1e56f9b32c366a3d83d87afb4de upstream. Currently EROFS can map another compressed buffer for inplace decompression, that was used to handle the cases that some pages of compressed data are actually not in-place I/O. However, like most simple LZ77 algorithms, LZ4 expects the compressed data is arranged at the end of the decompressed buffer and it explicitly uses memmove() to handle overlapping: __________________________________________________________ |_ direction of decompression --> ____ |_ compressed data _| Although EROFS arranges compressed data like this, it typically maps two individual virtual buffers so the relative order is uncertain. Previously, it was hardly observed since LZ4 only uses memmove() for short overlapped literals and x86/arm64 memmove implementations seem to completely cover it up and they don't have this issue. Juhyung reported that EROFS data corruption can be found on a new Intel x86 processor. After some analysis, it seems that recent x86 processors with the new FSRM feature expose this issue with "rep movsb". Let's strictly use the decompressed buffer for lz4 inplace decompression for now. Later, as an useful improvement, we could try to tie up these two buffers together in the correct order. Reported-and-tested-by: Juhyung Park <qkrwngud825@gmail.com> Closes: https://lore.kernel.org/r/CAD14+f2AVKf8Fa2OO1aAUdDNTDsVzzR6ctU_oJSmTyd6zSYR2Q@mail.gmail.com Fixes: 0ffd71bcc3a0 ("staging: erofs: introduce LZ4 decompression inplace") Fixes: 598162d05080 ("erofs: support decompress big pcluster for lz4 backend") Cc: stable <stable@vger.kernel.org> # 5.4+ Tested-by: Yifan Zhao <zhaoyifan@sjtu.edu.cn> Change-Id: Ib7a578283e33f0329ae2133223878ddf0738aba4 Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Link: https://lore.kernel.org/r/20231206045534.3920847-1-hsiangkao@linux.alibaba.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> [mkbestas: Adapt for android <=5.4 kernel which contains backports that caused various conflicts] Signed-off-by: Michael Bestas <mkbestas@lineageos.org>
2023-12-05 23:55:34 -05:00
static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *dst)
{
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
unsigned int inputmargin;
BACKPORT: erofs: fix lz4 inplace decompression commit 3c12466b6b7bf1e56f9b32c366a3d83d87afb4de upstream. Currently EROFS can map another compressed buffer for inplace decompression, that was used to handle the cases that some pages of compressed data are actually not in-place I/O. However, like most simple LZ77 algorithms, LZ4 expects the compressed data is arranged at the end of the decompressed buffer and it explicitly uses memmove() to handle overlapping: __________________________________________________________ |_ direction of decompression --> ____ |_ compressed data _| Although EROFS arranges compressed data like this, it typically maps two individual virtual buffers so the relative order is uncertain. Previously, it was hardly observed since LZ4 only uses memmove() for short overlapped literals and x86/arm64 memmove implementations seem to completely cover it up and they don't have this issue. Juhyung reported that EROFS data corruption can be found on a new Intel x86 processor. After some analysis, it seems that recent x86 processors with the new FSRM feature expose this issue with "rep movsb". Let's strictly use the decompressed buffer for lz4 inplace decompression for now. Later, as an useful improvement, we could try to tie up these two buffers together in the correct order. Reported-and-tested-by: Juhyung Park <qkrwngud825@gmail.com> Closes: https://lore.kernel.org/r/CAD14+f2AVKf8Fa2OO1aAUdDNTDsVzzR6ctU_oJSmTyd6zSYR2Q@mail.gmail.com Fixes: 0ffd71bcc3a0 ("staging: erofs: introduce LZ4 decompression inplace") Fixes: 598162d05080 ("erofs: support decompress big pcluster for lz4 backend") Cc: stable <stable@vger.kernel.org> # 5.4+ Tested-by: Yifan Zhao <zhaoyifan@sjtu.edu.cn> Change-Id: Ib7a578283e33f0329ae2133223878ddf0738aba4 Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Link: https://lore.kernel.org/r/20231206045534.3920847-1-hsiangkao@linux.alibaba.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> [mkbestas: Adapt for android <=5.4 kernel which contains backports that caused various conflicts] Signed-off-by: Michael Bestas <mkbestas@lineageos.org>
2023-12-05 23:55:34 -05:00
u8 *out, *headpage, *src;
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
bool support_0padding;
int ret, maptype;
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
DBG_BUGON(*rq->in == NULL);
headpage = kmap_atomic(*rq->in);
inputmargin = 0;
support_0padding = false;
/* decompression inplace is only safe when 0padding is enabled */
if (erofs_sb_has_lz4_0padding(EROFS_SB(rq->sb))) {
support_0padding = true;
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
while (!headpage[inputmargin & ~PAGE_MASK])
if (!(++inputmargin & ~PAGE_MASK))
break;
if (inputmargin >= rq->inputsize) {
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
kunmap_atomic(headpage);
return -EIO;
}
}
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
rq->inputsize -= inputmargin;
BACKPORT: erofs: fix lz4 inplace decompression commit 3c12466b6b7bf1e56f9b32c366a3d83d87afb4de upstream. Currently EROFS can map another compressed buffer for inplace decompression, that was used to handle the cases that some pages of compressed data are actually not in-place I/O. However, like most simple LZ77 algorithms, LZ4 expects the compressed data is arranged at the end of the decompressed buffer and it explicitly uses memmove() to handle overlapping: __________________________________________________________ |_ direction of decompression --> ____ |_ compressed data _| Although EROFS arranges compressed data like this, it typically maps two individual virtual buffers so the relative order is uncertain. Previously, it was hardly observed since LZ4 only uses memmove() for short overlapped literals and x86/arm64 memmove implementations seem to completely cover it up and they don't have this issue. Juhyung reported that EROFS data corruption can be found on a new Intel x86 processor. After some analysis, it seems that recent x86 processors with the new FSRM feature expose this issue with "rep movsb". Let's strictly use the decompressed buffer for lz4 inplace decompression for now. Later, as an useful improvement, we could try to tie up these two buffers together in the correct order. Reported-and-tested-by: Juhyung Park <qkrwngud825@gmail.com> Closes: https://lore.kernel.org/r/CAD14+f2AVKf8Fa2OO1aAUdDNTDsVzzR6ctU_oJSmTyd6zSYR2Q@mail.gmail.com Fixes: 0ffd71bcc3a0 ("staging: erofs: introduce LZ4 decompression inplace") Fixes: 598162d05080 ("erofs: support decompress big pcluster for lz4 backend") Cc: stable <stable@vger.kernel.org> # 5.4+ Tested-by: Yifan Zhao <zhaoyifan@sjtu.edu.cn> Change-Id: Ib7a578283e33f0329ae2133223878ddf0738aba4 Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Link: https://lore.kernel.org/r/20231206045534.3920847-1-hsiangkao@linux.alibaba.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> [mkbestas: Adapt for android <=5.4 kernel which contains backports that caused various conflicts] Signed-off-by: Michael Bestas <mkbestas@lineageos.org>
2023-12-05 23:55:34 -05:00
src = z_erofs_handle_inplace_io(rq, headpage, dst, &inputmargin,
&maptype, support_0padding);
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
if (IS_ERR(src))
return PTR_ERR(src);
BACKPORT: erofs: fix lz4 inplace decompression commit 3c12466b6b7bf1e56f9b32c366a3d83d87afb4de upstream. Currently EROFS can map another compressed buffer for inplace decompression, that was used to handle the cases that some pages of compressed data are actually not in-place I/O. However, like most simple LZ77 algorithms, LZ4 expects the compressed data is arranged at the end of the decompressed buffer and it explicitly uses memmove() to handle overlapping: __________________________________________________________ |_ direction of decompression --> ____ |_ compressed data _| Although EROFS arranges compressed data like this, it typically maps two individual virtual buffers so the relative order is uncertain. Previously, it was hardly observed since LZ4 only uses memmove() for short overlapped literals and x86/arm64 memmove implementations seem to completely cover it up and they don't have this issue. Juhyung reported that EROFS data corruption can be found on a new Intel x86 processor. After some analysis, it seems that recent x86 processors with the new FSRM feature expose this issue with "rep movsb". Let's strictly use the decompressed buffer for lz4 inplace decompression for now. Later, as an useful improvement, we could try to tie up these two buffers together in the correct order. Reported-and-tested-by: Juhyung Park <qkrwngud825@gmail.com> Closes: https://lore.kernel.org/r/CAD14+f2AVKf8Fa2OO1aAUdDNTDsVzzR6ctU_oJSmTyd6zSYR2Q@mail.gmail.com Fixes: 0ffd71bcc3a0 ("staging: erofs: introduce LZ4 decompression inplace") Fixes: 598162d05080 ("erofs: support decompress big pcluster for lz4 backend") Cc: stable <stable@vger.kernel.org> # 5.4+ Tested-by: Yifan Zhao <zhaoyifan@sjtu.edu.cn> Change-Id: Ib7a578283e33f0329ae2133223878ddf0738aba4 Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Link: https://lore.kernel.org/r/20231206045534.3920847-1-hsiangkao@linux.alibaba.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> [mkbestas: Adapt for android <=5.4 kernel which contains backports that caused various conflicts] Signed-off-by: Michael Bestas <mkbestas@lineageos.org>
2023-12-05 23:55:34 -05:00
out = dst + rq->pageofs_out;
/* legacy format could compress extra data in a pcluster. */
if (rq->partial_decoding || !support_0padding)
ret = LZ4_decompress_safe_partial(src + inputmargin, out,
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
rq->inputsize, rq->outputsize, rq->outputsize);
else
ret = LZ4_decompress_safe(src + inputmargin, out,
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
rq->inputsize, rq->outputsize);
if (ret != rq->outputsize) {
erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]",
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
ret, rq->inputsize, inputmargin, rq->outputsize);
print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET,
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
16, 1, src + inputmargin, rq->inputsize, true);
print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET,
16, 1, out, rq->outputsize, true);
if (ret >= 0)
memset(out + ret, 0, rq->outputsize - ret);
ret = -EIO;
}
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
if (maptype == 0) {
kunmap_atomic(src);
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
} else if (maptype == 1) {
vm_unmap_ram(src, PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT);
} else if (maptype == 2) {
erofs_put_pcpubuf(src);
BACKPORT: erofs: fix lz4 inplace decompression commit 3c12466b6b7bf1e56f9b32c366a3d83d87afb4de upstream. Currently EROFS can map another compressed buffer for inplace decompression, that was used to handle the cases that some pages of compressed data are actually not in-place I/O. However, like most simple LZ77 algorithms, LZ4 expects the compressed data is arranged at the end of the decompressed buffer and it explicitly uses memmove() to handle overlapping: __________________________________________________________ |_ direction of decompression --> ____ |_ compressed data _| Although EROFS arranges compressed data like this, it typically maps two individual virtual buffers so the relative order is uncertain. Previously, it was hardly observed since LZ4 only uses memmove() for short overlapped literals and x86/arm64 memmove implementations seem to completely cover it up and they don't have this issue. Juhyung reported that EROFS data corruption can be found on a new Intel x86 processor. After some analysis, it seems that recent x86 processors with the new FSRM feature expose this issue with "rep movsb". Let's strictly use the decompressed buffer for lz4 inplace decompression for now. Later, as an useful improvement, we could try to tie up these two buffers together in the correct order. Reported-and-tested-by: Juhyung Park <qkrwngud825@gmail.com> Closes: https://lore.kernel.org/r/CAD14+f2AVKf8Fa2OO1aAUdDNTDsVzzR6ctU_oJSmTyd6zSYR2Q@mail.gmail.com Fixes: 0ffd71bcc3a0 ("staging: erofs: introduce LZ4 decompression inplace") Fixes: 598162d05080 ("erofs: support decompress big pcluster for lz4 backend") Cc: stable <stable@vger.kernel.org> # 5.4+ Tested-by: Yifan Zhao <zhaoyifan@sjtu.edu.cn> Change-Id: Ib7a578283e33f0329ae2133223878ddf0738aba4 Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Link: https://lore.kernel.org/r/20231206045534.3920847-1-hsiangkao@linux.alibaba.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> [mkbestas: Adapt for android <=5.4 kernel which contains backports that caused various conflicts] Signed-off-by: Michael Bestas <mkbestas@lineageos.org>
2023-12-05 23:55:34 -05:00
} else if (maptype != 3) {
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
DBG_BUGON(1);
return -EFAULT;
}
return ret;
}
static struct z_erofs_decompressor decompressors[] = {
[Z_EROFS_COMPRESSION_SHIFTED] = {
.name = "shifted"
},
[Z_EROFS_COMPRESSION_LZ4] = {
.prepare_destpages = z_erofs_lz4_prepare_destpages,
.decompress = z_erofs_lz4_decompress,
.name = "lz4"
},
};
static void copy_from_pcpubuf(struct page **out, const char *dst,
unsigned short pageofs_out,
unsigned int outputsize)
{
const char *end = dst + outputsize;
const unsigned int righthalf = PAGE_SIZE - pageofs_out;
const char *cur = dst - pageofs_out;
while (cur < end) {
struct page *const page = *out++;
if (page) {
char *buf = kmap_atomic(page);
if (cur >= dst) {
memcpy(buf, cur, min_t(uint, PAGE_SIZE,
end - cur));
} else {
memcpy(buf + pageofs_out, cur + pageofs_out,
min_t(uint, righthalf, end - cur));
}
kunmap_atomic(buf);
}
cur += PAGE_SIZE;
}
}
static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq,
struct list_head *pagepool)
{
const unsigned int nrpages_out =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
const struct z_erofs_decompressor *alg = decompressors + rq->alg;
unsigned int dst_maptype;
void *dst;
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
int ret;
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
/* two optimized fast paths only for non bigpcluster cases yet */
if (rq->inputsize <= PAGE_SIZE) {
if (nrpages_out == 1 && !rq->inplace_io) {
DBG_BUGON(!*rq->out);
dst = kmap_atomic(*rq->out);
dst_maptype = 0;
goto dstmap_out;
}
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
/*
* For the case of small output size (especially much less
* than PAGE_SIZE), memcpy the decompressed data rather than
* compressed data is preferred.
*/
if (rq->outputsize <= PAGE_SIZE * 7 / 8) {
dst = erofs_get_pcpubuf(1);
if (IS_ERR(dst))
return PTR_ERR(dst);
rq->inplace_io = false;
ret = alg->decompress(rq, dst);
if (!ret)
copy_from_pcpubuf(rq->out, dst, rq->pageofs_out,
rq->outputsize);
erofs_put_pcpubuf(dst);
return ret;
}
}
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
/* general decoding path which can be used for all cases */
ret = alg->prepare_destpages(rq, pagepool);
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
if (ret < 0)
return ret;
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
if (ret) {
dst = page_address(*rq->out);
dst_maptype = 1;
goto dstmap_out;
}
BACKPORT: erofs: support decompress big pcluster for lz4 backend Prior to big pcluster, there was only one compressed page so it'd easy to map this. However, when big pcluster is enabled, more work needs to be done to handle multiple compressed pages. In detail, - (maptype 0) if there is only one compressed page + no need to copy inplace I/O, just map it directly what we did before; - (maptype 1) if there are more compressed pages + no need to copy inplace I/O, vmap such compressed pages instead; - (maptype 2) if inplace I/O needs to be copied, use per-CPU buffers for decompression then. Another thing is how to detect inplace decompression is feasable or not (it's still quite easy for non big pclusters), apart from the inplace margin calculation, inplace I/O page reusing order is also needed to be considered for each compressed page. Currently, if the compressed page is the xth page, it shouldn't be reused as [0 ... nrpages_out - nrpages_in + x], otherwise a full copy will be triggered. Although there are some extra optimization ideas for this, I'd like to make big pcluster work correctly first and obviously it can be further optimized later since it has nothing with the on-disk format at all. Link: https://lore.kernel.org/r/20210407043927.10623-10-xiang@kernel.org Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Gao Xiang <hsiangkao@redhat.com> Bug: 201372112 Change-Id: I8e8b90bd0a401850ea81d895dc55e5d8a2772ed7 (cherry picked from commit 598162d050801e556750defff4ddab499e5d76ed) Signed-off-by: Huang Jianan <huangjianan@oppo.com>
2021-04-07 00:39:26 -04:00
dst = erofs_vm_map_ram(rq->out, nrpages_out);
if (!dst)
return -ENOMEM;
dst_maptype = 2;
dstmap_out:
BACKPORT: erofs: fix lz4 inplace decompression commit 3c12466b6b7bf1e56f9b32c366a3d83d87afb4de upstream. Currently EROFS can map another compressed buffer for inplace decompression, that was used to handle the cases that some pages of compressed data are actually not in-place I/O. However, like most simple LZ77 algorithms, LZ4 expects the compressed data is arranged at the end of the decompressed buffer and it explicitly uses memmove() to handle overlapping: __________________________________________________________ |_ direction of decompression --> ____ |_ compressed data _| Although EROFS arranges compressed data like this, it typically maps two individual virtual buffers so the relative order is uncertain. Previously, it was hardly observed since LZ4 only uses memmove() for short overlapped literals and x86/arm64 memmove implementations seem to completely cover it up and they don't have this issue. Juhyung reported that EROFS data corruption can be found on a new Intel x86 processor. After some analysis, it seems that recent x86 processors with the new FSRM feature expose this issue with "rep movsb". Let's strictly use the decompressed buffer for lz4 inplace decompression for now. Later, as an useful improvement, we could try to tie up these two buffers together in the correct order. Reported-and-tested-by: Juhyung Park <qkrwngud825@gmail.com> Closes: https://lore.kernel.org/r/CAD14+f2AVKf8Fa2OO1aAUdDNTDsVzzR6ctU_oJSmTyd6zSYR2Q@mail.gmail.com Fixes: 0ffd71bcc3a0 ("staging: erofs: introduce LZ4 decompression inplace") Fixes: 598162d05080 ("erofs: support decompress big pcluster for lz4 backend") Cc: stable <stable@vger.kernel.org> # 5.4+ Tested-by: Yifan Zhao <zhaoyifan@sjtu.edu.cn> Change-Id: Ib7a578283e33f0329ae2133223878ddf0738aba4 Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com> Link: https://lore.kernel.org/r/20231206045534.3920847-1-hsiangkao@linux.alibaba.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> [mkbestas: Adapt for android <=5.4 kernel which contains backports that caused various conflicts] Signed-off-by: Michael Bestas <mkbestas@lineageos.org>
2023-12-05 23:55:34 -05:00
ret = alg->decompress(rq, dst);
if (!dst_maptype)
kunmap_atomic(dst);
else if (dst_maptype == 2)
vm_unmap_ram(dst, nrpages_out);
return ret;
}
static int z_erofs_shifted_transform(const struct z_erofs_decompress_req *rq,
struct list_head *pagepool)
{
const unsigned int nrpages_out =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
const unsigned int righthalf = PAGE_SIZE - rq->pageofs_out;
unsigned char *src, *dst;
if (nrpages_out > 2) {
DBG_BUGON(1);
return -EIO;
}
if (rq->out[0] == *rq->in) {
DBG_BUGON(nrpages_out != 1);
return 0;
}
src = kmap_atomic(*rq->in);
if (rq->out[0]) {
dst = kmap_atomic(rq->out[0]);
memcpy(dst + rq->pageofs_out, src, righthalf);
kunmap_atomic(dst);
}
if (nrpages_out == 2) {
DBG_BUGON(!rq->out[1]);
if (rq->out[1] == *rq->in) {
memmove(src, src + righthalf, rq->pageofs_out);
} else {
dst = kmap_atomic(rq->out[1]);
memcpy(dst, src + righthalf, rq->pageofs_out);
kunmap_atomic(dst);
}
}
kunmap_atomic(src);
return 0;
}
int z_erofs_decompress(struct z_erofs_decompress_req *rq,
struct list_head *pagepool)
{
if (rq->alg == Z_EROFS_COMPRESSION_SHIFTED)
return z_erofs_shifted_transform(rq, pagepool);
return z_erofs_decompress_generic(rq, pagepool);
}