ext4: make sure allocate pending entry not fail
[ Upstream commit 8e387c89e96b9543a339f84043cf9df15fed2632 ] __insert_pending() allocate memory in atomic context, so the allocation could fail, but we are not handling that failure now. It could lead ext4_es_remove_extent() to get wrong reserved clusters, and the global data blocks reservation count will be incorrect. The same to extents_status entry preallocation, preallocate pending entry out of the i_es_lock with __GFP_NOFAIL, make sure __insert_pending() and __revise_pending() always succeeds. Signed-off-by: Zhang Yi <yi.zhang@huawei.com> Cc: stable@kernel.org Link: https://lore.kernel.org/r/20230824092619.1327976-3-yi.zhang@huaweicloud.com Reviewed-by: Jan Kara <jack@suse.cz> Signed-off-by: Theodore Ts'o <tytso@mit.edu> Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
70edeedd79
commit
32cfd5c3b8
@ -152,8 +152,9 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);
|
||||
static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
|
||||
struct ext4_inode_info *locked_ei);
|
||||
static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t len);
|
||||
static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t len,
|
||||
struct pending_reservation **prealloc);
|
||||
|
||||
int __init ext4_init_es(void)
|
||||
{
|
||||
@ -441,6 +442,19 @@ static void ext4_es_list_del(struct inode *inode)
|
||||
spin_unlock(&sbi->s_es_lock);
|
||||
}
|
||||
|
||||
static inline struct pending_reservation *__alloc_pending(bool nofail)
|
||||
{
|
||||
if (!nofail)
|
||||
return kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
|
||||
|
||||
return kmem_cache_zalloc(ext4_pending_cachep, GFP_KERNEL | __GFP_NOFAIL);
|
||||
}
|
||||
|
||||
static inline void __free_pending(struct pending_reservation *pr)
|
||||
{
|
||||
kmem_cache_free(ext4_pending_cachep, pr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if we cannot fail to allocate memory for this extent_status
|
||||
* entry and cannot reclaim it until its status changes.
|
||||
@ -832,11 +846,12 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
{
|
||||
struct extent_status newes;
|
||||
ext4_lblk_t end = lblk + len - 1;
|
||||
int err1 = 0;
|
||||
int err2 = 0;
|
||||
int err1 = 0, err2 = 0, err3 = 0;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
struct extent_status *es1 = NULL;
|
||||
struct extent_status *es2 = NULL;
|
||||
struct pending_reservation *pr = NULL;
|
||||
bool revise_pending = false;
|
||||
|
||||
es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
|
||||
lblk, len, pblk, status, inode->i_ino);
|
||||
@ -861,11 +876,17 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
|
||||
|
||||
ext4_es_insert_extent_check(inode, &newes);
|
||||
|
||||
revise_pending = sbi->s_cluster_ratio > 1 &&
|
||||
test_opt(inode->i_sb, DELALLOC) &&
|
||||
(status & (EXTENT_STATUS_WRITTEN |
|
||||
EXTENT_STATUS_UNWRITTEN));
|
||||
retry:
|
||||
if (err1 && !es1)
|
||||
es1 = __es_alloc_extent(true);
|
||||
if ((err1 || err2) && !es2)
|
||||
es2 = __es_alloc_extent(true);
|
||||
if ((err1 || err2 || err3) && revise_pending && !pr)
|
||||
pr = __alloc_pending(true);
|
||||
write_lock(&EXT4_I(inode)->i_es_lock);
|
||||
|
||||
err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
|
||||
@ -890,13 +911,18 @@ retry:
|
||||
es2 = NULL;
|
||||
}
|
||||
|
||||
if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
|
||||
(status & EXTENT_STATUS_WRITTEN ||
|
||||
status & EXTENT_STATUS_UNWRITTEN))
|
||||
__revise_pending(inode, lblk, len);
|
||||
if (revise_pending) {
|
||||
err3 = __revise_pending(inode, lblk, len, &pr);
|
||||
if (err3 != 0)
|
||||
goto error;
|
||||
if (pr) {
|
||||
__free_pending(pr);
|
||||
pr = NULL;
|
||||
}
|
||||
}
|
||||
error:
|
||||
write_unlock(&EXT4_I(inode)->i_es_lock);
|
||||
if (err1 || err2)
|
||||
if (err1 || err2 || err3)
|
||||
goto retry;
|
||||
|
||||
ext4_es_print_tree(inode);
|
||||
@ -1298,7 +1324,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,
|
||||
rc->ndelonly--;
|
||||
node = rb_next(&pr->rb_node);
|
||||
rb_erase(&pr->rb_node, &tree->root);
|
||||
kmem_cache_free(ext4_pending_cachep, pr);
|
||||
__free_pending(pr);
|
||||
if (!node)
|
||||
break;
|
||||
pr = rb_entry(node, struct pending_reservation,
|
||||
@ -1892,11 +1918,13 @@ static struct pending_reservation *__get_pending(struct inode *inode,
|
||||
*
|
||||
* @inode - file containing the cluster
|
||||
* @lblk - logical block in the cluster to be added
|
||||
* @prealloc - preallocated pending entry
|
||||
*
|
||||
* Returns 0 on successful insertion and -ENOMEM on failure. If the
|
||||
* pending reservation is already in the set, returns successfully.
|
||||
*/
|
||||
static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
|
||||
static int __insert_pending(struct inode *inode, ext4_lblk_t lblk,
|
||||
struct pending_reservation **prealloc)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
struct ext4_pending_tree *tree = &EXT4_I(inode)->i_pending_tree;
|
||||
@ -1922,10 +1950,15 @@ static int __insert_pending(struct inode *inode, ext4_lblk_t lblk)
|
||||
}
|
||||
}
|
||||
|
||||
pr = kmem_cache_alloc(ext4_pending_cachep, GFP_ATOMIC);
|
||||
if (pr == NULL) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
if (likely(*prealloc == NULL)) {
|
||||
pr = __alloc_pending(false);
|
||||
if (!pr) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
pr = *prealloc;
|
||||
*prealloc = NULL;
|
||||
}
|
||||
pr->lclu = lclu;
|
||||
|
||||
@ -1955,7 +1988,7 @@ static void __remove_pending(struct inode *inode, ext4_lblk_t lblk)
|
||||
if (pr != NULL) {
|
||||
tree = &EXT4_I(inode)->i_pending_tree;
|
||||
rb_erase(&pr->rb_node, &tree->root);
|
||||
kmem_cache_free(ext4_pending_cachep, pr);
|
||||
__free_pending(pr);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2016,10 +2049,10 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
|
||||
bool allocated)
|
||||
{
|
||||
struct extent_status newes;
|
||||
int err1 = 0;
|
||||
int err2 = 0;
|
||||
int err1 = 0, err2 = 0, err3 = 0;
|
||||
struct extent_status *es1 = NULL;
|
||||
struct extent_status *es2 = NULL;
|
||||
struct pending_reservation *pr = NULL;
|
||||
|
||||
es_debug("add [%u/1) delayed to extent status tree of inode %lu\n",
|
||||
lblk, inode->i_ino);
|
||||
@ -2036,6 +2069,8 @@ retry:
|
||||
es1 = __es_alloc_extent(true);
|
||||
if ((err1 || err2) && !es2)
|
||||
es2 = __es_alloc_extent(true);
|
||||
if ((err1 || err2 || err3) && allocated && !pr)
|
||||
pr = __alloc_pending(true);
|
||||
write_lock(&EXT4_I(inode)->i_es_lock);
|
||||
|
||||
err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1);
|
||||
@ -2058,11 +2093,18 @@ retry:
|
||||
es2 = NULL;
|
||||
}
|
||||
|
||||
if (allocated)
|
||||
__insert_pending(inode, lblk);
|
||||
if (allocated) {
|
||||
err3 = __insert_pending(inode, lblk, &pr);
|
||||
if (err3 != 0)
|
||||
goto error;
|
||||
if (pr) {
|
||||
__free_pending(pr);
|
||||
pr = NULL;
|
||||
}
|
||||
}
|
||||
error:
|
||||
write_unlock(&EXT4_I(inode)->i_es_lock);
|
||||
if (err1 || err2)
|
||||
if (err1 || err2 || err3)
|
||||
goto retry;
|
||||
|
||||
ext4_es_print_tree(inode);
|
||||
@ -2168,21 +2210,24 @@ unsigned int ext4_es_delayed_clu(struct inode *inode, ext4_lblk_t lblk,
|
||||
* @inode - file containing the range
|
||||
* @lblk - logical block defining the start of range
|
||||
* @len - length of range in blocks
|
||||
* @prealloc - preallocated pending entry
|
||||
*
|
||||
* Used after a newly allocated extent is added to the extents status tree.
|
||||
* Requires that the extents in the range have either written or unwritten
|
||||
* status. Must be called while holding i_es_lock.
|
||||
*/
|
||||
static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t len)
|
||||
static int __revise_pending(struct inode *inode, ext4_lblk_t lblk,
|
||||
ext4_lblk_t len,
|
||||
struct pending_reservation **prealloc)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
ext4_lblk_t end = lblk + len - 1;
|
||||
ext4_lblk_t first, last;
|
||||
bool f_del = false, l_del = false;
|
||||
int ret = 0;
|
||||
|
||||
if (len == 0)
|
||||
return;
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Two cases - block range within single cluster and block range
|
||||
@ -2203,7 +2248,9 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
|
||||
f_del = __es_scan_range(inode, &ext4_es_is_delonly,
|
||||
first, lblk - 1);
|
||||
if (f_del) {
|
||||
__insert_pending(inode, first);
|
||||
ret = __insert_pending(inode, first, prealloc);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
} else {
|
||||
last = EXT4_LBLK_CMASK(sbi, end) +
|
||||
sbi->s_cluster_ratio - 1;
|
||||
@ -2211,9 +2258,11 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
|
||||
l_del = __es_scan_range(inode,
|
||||
&ext4_es_is_delonly,
|
||||
end + 1, last);
|
||||
if (l_del)
|
||||
__insert_pending(inode, last);
|
||||
else
|
||||
if (l_del) {
|
||||
ret = __insert_pending(inode, last, prealloc);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
} else
|
||||
__remove_pending(inode, last);
|
||||
}
|
||||
} else {
|
||||
@ -2221,18 +2270,24 @@ static void __revise_pending(struct inode *inode, ext4_lblk_t lblk,
|
||||
if (first != lblk)
|
||||
f_del = __es_scan_range(inode, &ext4_es_is_delonly,
|
||||
first, lblk - 1);
|
||||
if (f_del)
|
||||
__insert_pending(inode, first);
|
||||
else
|
||||
if (f_del) {
|
||||
ret = __insert_pending(inode, first, prealloc);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
} else
|
||||
__remove_pending(inode, first);
|
||||
|
||||
last = EXT4_LBLK_CMASK(sbi, end) + sbi->s_cluster_ratio - 1;
|
||||
if (last != end)
|
||||
l_del = __es_scan_range(inode, &ext4_es_is_delonly,
|
||||
end + 1, last);
|
||||
if (l_del)
|
||||
__insert_pending(inode, last);
|
||||
else
|
||||
if (l_del) {
|
||||
ret = __insert_pending(inode, last, prealloc);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
} else
|
||||
__remove_pending(inode, last);
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user