Revision 082cd4ec240b8734a82a89ffb890216ac98fec68 authored by Ye Bin on 06 May 2021, 14:10:42 UTC, committed by Theodore Ts'o on 06 June 2021, 14:09:55 UTC
We got follow bug_on when run fsstress with injecting IO fault: [130747.323114] kernel BUG at fs/ext4/extents_status.c:762! [130747.323117] Internal error: Oops - BUG: 0 [#1] SMP ...... [130747.334329] Call trace: [130747.334553] ext4_es_cache_extent+0x150/0x168 [ext4] [130747.334975] ext4_cache_extents+0x64/0xe8 [ext4] [130747.335368] ext4_find_extent+0x300/0x330 [ext4] [130747.335759] ext4_ext_map_blocks+0x74/0x1178 [ext4] [130747.336179] ext4_map_blocks+0x2f4/0x5f0 [ext4] [130747.336567] ext4_mpage_readpages+0x4a8/0x7a8 [ext4] [130747.336995] ext4_readpage+0x54/0x100 [ext4] [130747.337359] generic_file_buffered_read+0x410/0xae8 [130747.337767] generic_file_read_iter+0x114/0x190 [130747.338152] ext4_file_read_iter+0x5c/0x140 [ext4] [130747.338556] __vfs_read+0x11c/0x188 [130747.338851] vfs_read+0x94/0x150 [130747.339110] ksys_read+0x74/0xf0 This patch's modification is according to Jan Kara's suggestion in: https://patchwork.ozlabs.org/project/linux-ext4/patch/20210428085158.3728201-1-yebin10@huawei.com/ "I see. Now I understand your patch. Honestly, seeing how fragile is trying to fix extent tree after split has failed in the middle, I would probably go even further and make sure we fix the tree properly in case of ENOSPC and EDQUOT (those are easily user triggerable). Anything else indicates a HW problem or fs corruption so I'd rather leave the extent tree as is and don't try to fix it (which also means we will not create overlapping extents)." Cc: stable@kernel.org Signed-off-by: Ye Bin <yebin10@huawei.com> Reviewed-by: Jan Kara <jack@suse.cz> Link: https://lore.kernel.org/r/20210506141042.3298679-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o <tytso@mit.edu>
1 parent b45f189
ptdump.c
// SPDX-License-Identifier: GPL-2.0
#include <linux/pagewalk.h>
#include <linux/ptdump.h>
#include <linux/kasan.h>
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
/*
* This is an optimization for KASAN=y case. Since all kasan page tables
* eventually point to the kasan_early_shadow_page we could call note_page()
* right away without walking through lower level page tables. This saves
* us dozens of seconds (minutes for 5-level config) while checking for
* W+X mapping or reading kernel_page_tables debugfs file.
*/
static inline int note_kasan_page_table(struct mm_walk *walk,
unsigned long addr)
{
struct ptdump_state *st = walk->private;
st->note_page(st, addr, 4, pte_val(kasan_early_shadow_pte[0]));
walk->action = ACTION_CONTINUE;
return 0;
}
#endif
static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
struct ptdump_state *st = walk->private;
pgd_t val = READ_ONCE(*pgd);
#if CONFIG_PGTABLE_LEVELS > 4 && \
(defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS))
if (pgd_page(val) == virt_to_page(lm_alias(kasan_early_shadow_p4d)))
return note_kasan_page_table(walk, addr);
#endif
if (st->effective_prot)
st->effective_prot(st, 0, pgd_val(val));
if (pgd_leaf(val))
st->note_page(st, addr, 0, pgd_val(val));
return 0;
}
static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
struct ptdump_state *st = walk->private;
p4d_t val = READ_ONCE(*p4d);
#if CONFIG_PGTABLE_LEVELS > 3 && \
(defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS))
if (p4d_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pud)))
return note_kasan_page_table(walk, addr);
#endif
if (st->effective_prot)
st->effective_prot(st, 1, p4d_val(val));
if (p4d_leaf(val))
st->note_page(st, addr, 1, p4d_val(val));
return 0;
}
static int ptdump_pud_entry(pud_t *pud, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
struct ptdump_state *st = walk->private;
pud_t val = READ_ONCE(*pud);
#if CONFIG_PGTABLE_LEVELS > 2 && \
(defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS))
if (pud_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pmd)))
return note_kasan_page_table(walk, addr);
#endif
if (st->effective_prot)
st->effective_prot(st, 2, pud_val(val));
if (pud_leaf(val))
st->note_page(st, addr, 2, pud_val(val));
return 0;
}
static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
struct ptdump_state *st = walk->private;
pmd_t val = READ_ONCE(*pmd);
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
if (pmd_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pte)))
return note_kasan_page_table(walk, addr);
#endif
if (st->effective_prot)
st->effective_prot(st, 3, pmd_val(val));
if (pmd_leaf(val))
st->note_page(st, addr, 3, pmd_val(val));
return 0;
}
static int ptdump_pte_entry(pte_t *pte, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
struct ptdump_state *st = walk->private;
pte_t val = READ_ONCE(*pte);
if (st->effective_prot)
st->effective_prot(st, 4, pte_val(val));
st->note_page(st, addr, 4, pte_val(val));
return 0;
}
static int ptdump_hole(unsigned long addr, unsigned long next,
int depth, struct mm_walk *walk)
{
struct ptdump_state *st = walk->private;
st->note_page(st, addr, depth, 0);
return 0;
}
static const struct mm_walk_ops ptdump_ops = {
.pgd_entry = ptdump_pgd_entry,
.p4d_entry = ptdump_p4d_entry,
.pud_entry = ptdump_pud_entry,
.pmd_entry = ptdump_pmd_entry,
.pte_entry = ptdump_pte_entry,
.pte_hole = ptdump_hole,
};
void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd)
{
const struct ptdump_range *range = st->range;
mmap_read_lock(mm);
while (range->start != range->end) {
walk_page_range_novma(mm, range->start, range->end,
&ptdump_ops, pgd, st);
range++;
}
mmap_read_unlock(mm);
/* Flush out the last page */
st->note_page(st, 0, -1, 0);
}
Computing file changes ...