Revision b15fa9224e6e1239414525d8d556d824701849fc authored by Valentin Vidic on 18 October 2021, 22:15:42 UTC, committed by Linus Torvalds on 19 October 2021, 06:22:03 UTC
Starting with kernel 5.11 built with CONFIG_FORTIFY_SOURCE mouting an
ocfs2 filesystem with either o2cb or pcmk cluster stack fails with the
trace below.  Problem seems to be that strings for cluster stack and
cluster name are not guaranteed to be null terminated in the disk
representation, while strlcpy assumes that the source string is always
null terminated.  This causes a read outside of the source string
triggering the buffer overflow detection.

  detected buffer overflow in strlen
  ------------[ cut here ]------------
  kernel BUG at lib/string.c:1149!
  invalid opcode: 0000 [#1] SMP PTI
  CPU: 1 PID: 910 Comm: mount.ocfs2 Not tainted 5.14.0-1-amd64 #1
    Debian 5.14.6-2
  RIP: 0010:fortify_panic+0xf/0x11
  ...
  Call Trace:
   ocfs2_initialize_super.isra.0.cold+0xc/0x18 [ocfs2]
   ocfs2_fill_super+0x359/0x19b0 [ocfs2]
   mount_bdev+0x185/0x1b0
   legacy_get_tree+0x27/0x40
   vfs_get_tree+0x25/0xb0
   path_mount+0x454/0xa20
   __x64_sys_mount+0x103/0x140
   do_syscall_64+0x3b/0xc0
   entry_SYSCALL_64_after_hwframe+0x44/0xae

Link: https://lkml.kernel.org/r/20210929180654.32460-1-vvidic@valentin-vidic.from.hr
Signed-off-by: Valentin Vidic <vvidic@valentin-vidic.from.hr>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Gang He <ghe@suse.com>
Cc: Jun Piao <piaojun@huawei.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 5314454
Raw File
ptdump.c
// SPDX-License-Identifier: GPL-2.0

#include <linux/pagewalk.h>
#include <linux/ptdump.h>
#include <linux/kasan.h>

#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
/*
 * This is an optimization for KASAN=y case. Since all kasan page tables
 * eventually point to the kasan_early_shadow_page we could call note_page()
 * right away without walking through lower level page tables. This saves
 * us dozens of seconds (minutes for 5-level config) while checking for
 * W+X mapping or reading kernel_page_tables debugfs file.
 */
static inline int note_kasan_page_table(struct mm_walk *walk,
					unsigned long addr)
{
	struct ptdump_state *st = walk->private;

	st->note_page(st, addr, 4, pte_val(kasan_early_shadow_pte[0]));

	walk->action = ACTION_CONTINUE;

	return 0;
}
#endif

static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr,
			    unsigned long next, struct mm_walk *walk)
{
	struct ptdump_state *st = walk->private;
	pgd_t val = READ_ONCE(*pgd);

#if CONFIG_PGTABLE_LEVELS > 4 && \
		(defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS))
	if (pgd_page(val) == virt_to_page(lm_alias(kasan_early_shadow_p4d)))
		return note_kasan_page_table(walk, addr);
#endif

	if (st->effective_prot)
		st->effective_prot(st, 0, pgd_val(val));

	if (pgd_leaf(val))
		st->note_page(st, addr, 0, pgd_val(val));

	return 0;
}

static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr,
			    unsigned long next, struct mm_walk *walk)
{
	struct ptdump_state *st = walk->private;
	p4d_t val = READ_ONCE(*p4d);

#if CONFIG_PGTABLE_LEVELS > 3 && \
		(defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS))
	if (p4d_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pud)))
		return note_kasan_page_table(walk, addr);
#endif

	if (st->effective_prot)
		st->effective_prot(st, 1, p4d_val(val));

	if (p4d_leaf(val))
		st->note_page(st, addr, 1, p4d_val(val));

	return 0;
}

static int ptdump_pud_entry(pud_t *pud, unsigned long addr,
			    unsigned long next, struct mm_walk *walk)
{
	struct ptdump_state *st = walk->private;
	pud_t val = READ_ONCE(*pud);

#if CONFIG_PGTABLE_LEVELS > 2 && \
		(defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS))
	if (pud_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pmd)))
		return note_kasan_page_table(walk, addr);
#endif

	if (st->effective_prot)
		st->effective_prot(st, 2, pud_val(val));

	if (pud_leaf(val))
		st->note_page(st, addr, 2, pud_val(val));

	return 0;
}

static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr,
			    unsigned long next, struct mm_walk *walk)
{
	struct ptdump_state *st = walk->private;
	pmd_t val = READ_ONCE(*pmd);

#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
	if (pmd_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pte)))
		return note_kasan_page_table(walk, addr);
#endif

	if (st->effective_prot)
		st->effective_prot(st, 3, pmd_val(val));
	if (pmd_leaf(val))
		st->note_page(st, addr, 3, pmd_val(val));

	return 0;
}

static int ptdump_pte_entry(pte_t *pte, unsigned long addr,
			    unsigned long next, struct mm_walk *walk)
{
	struct ptdump_state *st = walk->private;
	pte_t val = ptep_get(pte);

	if (st->effective_prot)
		st->effective_prot(st, 4, pte_val(val));

	st->note_page(st, addr, 4, pte_val(val));

	return 0;
}

static int ptdump_hole(unsigned long addr, unsigned long next,
		       int depth, struct mm_walk *walk)
{
	struct ptdump_state *st = walk->private;

	st->note_page(st, addr, depth, 0);

	return 0;
}

static const struct mm_walk_ops ptdump_ops = {
	.pgd_entry	= ptdump_pgd_entry,
	.p4d_entry	= ptdump_p4d_entry,
	.pud_entry	= ptdump_pud_entry,
	.pmd_entry	= ptdump_pmd_entry,
	.pte_entry	= ptdump_pte_entry,
	.pte_hole	= ptdump_hole,
};

void ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm, pgd_t *pgd)
{
	const struct ptdump_range *range = st->range;

	mmap_read_lock(mm);
	while (range->start != range->end) {
		walk_page_range_novma(mm, range->start, range->end,
				      &ptdump_ops, pgd, st);
		range++;
	}
	mmap_read_unlock(mm);

	/* Flush out the last page */
	st->note_page(st, 0, -1, 0);
}
back to top