jbd2: recheck chechpointing non-dirty buffer
[ Upstream commit c2d6fd9d6f35079f1669f0100f05b46708c74b7f ] There is a long-standing metadata corruption issue that happens from time to time, but it's very difficult to reproduce and analyse, benefit from the JBD2_CYCLE_RECORD option, we found out that the problem is the checkpointing process miss to write out some buffers which are raced by another do_get_write_access(). Looks below for detail. jbd2_log_do_checkpoint() //transaction X //buffer A is dirty and not belones to any transaction __buffer_relink_io() //move it to the IO list __flush_batch() write_dirty_buffer() do_get_write_access() clear_buffer_dirty __jbd2_journal_file_buffer() //add buffer A to a new transaction Y lock_buffer(bh) //doesn't write out __jbd2_journal_remove_checkpoint() //finish checkpoint except buffer A //filesystem corrupt if the new transaction Y isn't fully write out. Due to the t_checkpoint_list walking loop in jbd2_log_do_checkpoint() have already handles waiting for buffers under IO and re-added new transaction to complete commit, and it also removing cleaned buffers, this makes sure the list will eventually get empty. So it's fine to leave buffers on the t_checkpoint_list while flushing out and completely stop using the t_checkpoint_io_list. Cc: stable@vger.kernel.org Suggested-by: Jan Kara <jack@suse.cz> Signed-off-by: Zhang Yi <yi.zhang@huawei.com> Tested-by: Zhihao Cheng <chengzhihao1@huawei.com> Reviewed-by: Jan Kara <jack@suse.cz> Link: https://lore.kernel.org/r/20230606135928.434610-2-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o <tytso@mit.edu> Stable-dep-of: e34c8dd238d0 ("jbd2: Fix wrongly judgement for buffer head removing while doing checkpoint") Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
acc9a81f7c
commit
937cb20746
@ -57,28 +57,6 @@ static inline void __buffer_unlink(struct journal_head *jh)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Move a buffer from the checkpoint list to the checkpoint io list
|
||||
*
|
||||
* Called with j_list_lock held
|
||||
*/
|
||||
static inline void __buffer_relink_io(struct journal_head *jh)
|
||||
{
|
||||
transaction_t *transaction = jh->b_cp_transaction;
|
||||
|
||||
__buffer_unlink_first(jh);
|
||||
|
||||
if (!transaction->t_checkpoint_io_list) {
|
||||
jh->b_cpnext = jh->b_cpprev = jh;
|
||||
} else {
|
||||
jh->b_cpnext = transaction->t_checkpoint_io_list;
|
||||
jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
|
||||
jh->b_cpprev->b_cpnext = jh;
|
||||
jh->b_cpnext->b_cpprev = jh;
|
||||
}
|
||||
transaction->t_checkpoint_io_list = jh;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to release a checkpointed buffer from its transaction.
|
||||
* Returns 1 if we released it and 2 if we also released the
|
||||
@ -190,6 +168,7 @@ __flush_batch(journal_t *journal, int *batch_count)
|
||||
struct buffer_head *bh = journal->j_chkpt_bhs[i];
|
||||
BUFFER_TRACE(bh, "brelse");
|
||||
__brelse(bh);
|
||||
journal->j_chkpt_bhs[i] = NULL;
|
||||
}
|
||||
*batch_count = 0;
|
||||
}
|
||||
@ -249,6 +228,11 @@ restart:
|
||||
jh = transaction->t_checkpoint_list;
|
||||
bh = jh2bh(jh);
|
||||
|
||||
/*
|
||||
* The buffer may be writing back, or flushing out in the
|
||||
* last couple of cycles, or re-adding into a new transaction,
|
||||
* need to check it again until it's unlocked.
|
||||
*/
|
||||
if (buffer_locked(bh)) {
|
||||
get_bh(bh);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
@ -294,28 +278,32 @@ restart:
|
||||
}
|
||||
if (!buffer_dirty(bh)) {
|
||||
BUFFER_TRACE(bh, "remove from checkpoint");
|
||||
if (__jbd2_journal_remove_checkpoint(jh))
|
||||
/* The transaction was released; we're done */
|
||||
/*
|
||||
* If the transaction was released or the checkpoint
|
||||
* list was empty, we're done.
|
||||
*/
|
||||
if (__jbd2_journal_remove_checkpoint(jh) ||
|
||||
!transaction->t_checkpoint_list)
|
||||
goto out;
|
||||
continue;
|
||||
} else {
|
||||
/*
|
||||
* We are about to write the buffer, it could be
|
||||
* raced by some other transaction shrink or buffer
|
||||
* re-log logic once we release the j_list_lock,
|
||||
* leave it on the checkpoint list and check status
|
||||
* again to make sure it's clean.
|
||||
*/
|
||||
BUFFER_TRACE(bh, "queue");
|
||||
get_bh(bh);
|
||||
J_ASSERT_BH(bh, !buffer_jwrite(bh));
|
||||
journal->j_chkpt_bhs[batch_count++] = bh;
|
||||
transaction->t_chp_stats.cs_written++;
|
||||
transaction->t_checkpoint_list = jh->b_cpnext;
|
||||
}
|
||||
/*
|
||||
* Important: we are about to write the buffer, and
|
||||
* possibly block, while still holding the journal
|
||||
* lock. We cannot afford to let the transaction
|
||||
* logic start messing around with this buffer before
|
||||
* we write it to disk, as that would break
|
||||
* recoverability.
|
||||
*/
|
||||
BUFFER_TRACE(bh, "queue");
|
||||
get_bh(bh);
|
||||
J_ASSERT_BH(bh, !buffer_jwrite(bh));
|
||||
journal->j_chkpt_bhs[batch_count++] = bh;
|
||||
__buffer_relink_io(jh);
|
||||
transaction->t_chp_stats.cs_written++;
|
||||
|
||||
if ((batch_count == JBD2_NR_BATCH) ||
|
||||
need_resched() ||
|
||||
spin_needbreak(&journal->j_list_lock))
|
||||
need_resched() || spin_needbreak(&journal->j_list_lock) ||
|
||||
jh2bh(transaction->t_checkpoint_list) == journal->j_chkpt_bhs[0])
|
||||
goto unlock_and_flush;
|
||||
}
|
||||
|
||||
@ -329,38 +317,6 @@ restart:
|
||||
goto restart;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now we issued all of the transaction's buffers, let's deal
|
||||
* with the buffers that are out for I/O.
|
||||
*/
|
||||
restart2:
|
||||
/* Did somebody clean up the transaction in the meanwhile? */
|
||||
if (journal->j_checkpoint_transactions != transaction ||
|
||||
transaction->t_tid != this_tid)
|
||||
goto out;
|
||||
|
||||
while (transaction->t_checkpoint_io_list) {
|
||||
jh = transaction->t_checkpoint_io_list;
|
||||
bh = jh2bh(jh);
|
||||
if (buffer_locked(bh)) {
|
||||
get_bh(bh);
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
wait_on_buffer(bh);
|
||||
/* the journal_head may have gone by now */
|
||||
BUFFER_TRACE(bh, "brelse");
|
||||
__brelse(bh);
|
||||
spin_lock(&journal->j_list_lock);
|
||||
goto restart2;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now in whatever state the buffer currently is, we
|
||||
* know that it has been written out and so we can
|
||||
* drop it from the list
|
||||
*/
|
||||
if (__jbd2_journal_remove_checkpoint(jh))
|
||||
break;
|
||||
}
|
||||
out:
|
||||
spin_unlock(&journal->j_list_lock);
|
||||
result = jbd2_cleanup_journal_tail(journal);
|
||||
|
Loading…
Reference in New Issue
Block a user