https://github.com/torvalds/linux
Revision 9230a0b65b47fe6856c4468ec0175c4987e5bede authored by Dave Chinner on 20 November 2018, 06:50:08 UTC, committed by Darrick J. Wong on 21 November 2018, 18:10:53 UTC
Long saga. There have been days spent following this through dead end
after dead end in multi-GB event traces. This morning, after writing
a trace-cmd wrapper that enabled me to be more selective about XFS
trace points, I discovered that I could get just enough essential
tracepoints enabled that there was a 50:50 chance the fsx config
would fail at ~115k ops. If it didn't fail at op 115547, I stopped
fsx at op 115548 anyway.

That gave me two traces - one where the problem manifested, and one
where it didn't. After refining the traces to have the necessary
information, I found that in the failing case there was a real
extent in the COW fork compared to an unwritten extent in the
working case.

Walking back through the two traces to the point where the CWO fork
extents actually diverged, I found that the bad case had an extra
unwritten extent in it. This is likely because the bug it led me to
had triggered multiple times in those 115k ops, leaving stray
COW extents around. What I saw was a COW delalloc conversion to an
unwritten extent (as they should always be through
xfs_iomap_write_allocate()) resulted in a /written extent/:

xfs_writepage:        dev 259:0 ino 0x83 pgoff 0x17000 size 0x79a00 offset 0 length 0
xfs_iext_remove:      dev 259:0 ino 0x83 state RC|LF|RF|COW cur 0xffff888247b899c0/2 offset 32 block 152 count 20 flag 1 caller xfs_bmap_add_extent_delay_real
xfs_bmap_pre_update:  dev 259:0 ino 0x83 state RC|LF|RF|COW cur 0xffff888247b899c0/1 offset 1 block 4503599627239429 count 31 flag 0 caller xfs_bmap_add_extent_delay_real
xfs_bmap_post_update: dev 259:0 ino 0x83 state RC|LF|RF|COW cur 0xffff888247b899c0/1 offset 1 block 121 count 51 flag 0 caller xfs_bmap_add_ex

Basically, Cow fork before:

	0 1            32          52
	+H+DDDDDDDDDDDD+UUUUUUUUUUU+
	   PREV		RIGHT

COW delalloc conversion allocates:

	  1	       32
	  +uuuuuuuuuuuu+
	  NEW

And the result according to the xfs_bmap_post_update trace was:

	0 1            32          52
	+H+wwwwwwwwwwwwwwwwwwwwwwww+
	   PREV

Which is clearly wrong - it should be a merged unwritten extent,
not an unwritten extent.

That lead me to look at the LEFT_FILLING|RIGHT_FILLING|RIGHT_CONTIG
case in xfs_bmap_add_extent_delay_real(), and sure enough, there's
the bug.

It takes the old delalloc extent (PREV) and adds the length of the
RIGHT extent to it, takes the start block from NEW, removes the
RIGHT extent and then updates PREV with the new extent.

What it fails to do is update PREV.br_state. For delalloc, this is
always XFS_EXT_NORM, while in this case we are converting the
delayed allocation to unwritten, so it needs to be updated to
XFS_EXT_UNWRITTEN. This LF|RF|RC case does not do this, and so
the resultant extent is always written.

And that's the bug I've been chasing for a week - a bmap btree bug,
not a reflink/dedupe/copy_file_range bug, but a BMBT bug introduced
with the recent in core extent tree scalability enhancements.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
1 parent 2c30717
Raw File
Tip revision: 9230a0b65b47fe6856c4468ec0175c4987e5bede authored by Dave Chinner on 20 November 2018, 06:50:08 UTC
xfs: delalloc -> unwritten COW fork allocation can go wrong
Tip revision: 9230a0b
test_kasan.c
/*
 *
 * Copyright (c) 2014 Samsung Electronics Co., Ltd.
 * Author: Andrey Ryabinin <a.ryabinin@samsung.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 */

#define pr_fmt(fmt) "kasan test: %s " fmt, __func__

#include <linux/delay.h>
#include <linux/kernel.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/module.h>
#include <linux/kasan.h>

/*
 * Note: test functions are marked noinline so that their names appear in
 * reports.
 */

static noinline void __init kmalloc_oob_right(void)
{
	char *ptr;
	size_t size = 123;

	pr_info("out-of-bounds to right\n");
	ptr = kmalloc(size, GFP_KERNEL);
	if (!ptr) {
		pr_err("Allocation failed\n");
		return;
	}

	ptr[size] = 'x';
	kfree(ptr);
}

static noinline void __init kmalloc_oob_left(void)
{
	char *ptr;
	size_t size = 15;

	pr_info("out-of-bounds to left\n");
	ptr = kmalloc(size, GFP_KERNEL);
	if (!ptr) {
		pr_err("Allocation failed\n");
		return;
	}

	*ptr = *(ptr - 1);
	kfree(ptr);
}

static noinline void __init kmalloc_node_oob_right(void)
{
	char *ptr;
	size_t size = 4096;

	pr_info("kmalloc_node(): out-of-bounds to right\n");
	ptr = kmalloc_node(size, GFP_KERNEL, 0);
	if (!ptr) {
		pr_err("Allocation failed\n");
		return;
	}

	ptr[size] = 0;
	kfree(ptr);
}

#ifdef CONFIG_SLUB
static noinline void __init kmalloc_pagealloc_oob_right(void)
{
	char *ptr;
	size_t size = KMALLOC_MAX_CACHE_SIZE + 10;

	/* Allocate a chunk that does not fit into a SLUB cache to trigger
	 * the page allocator fallback.
	 */
	pr_info("kmalloc pagealloc allocation: out-of-bounds to right\n");
	ptr = kmalloc(size, GFP_KERNEL);
	if (!ptr) {
		pr_err("Allocation failed\n");
		return;
	}

	ptr[size] = 0;
	kfree(ptr);
}

static noinline void __init kmalloc_pagealloc_uaf(void)
{
	char *ptr;
	size_t size = KMALLOC_MAX_CACHE_SIZE + 10;

	pr_info("kmalloc pagealloc allocation: use-after-free\n");
	ptr = kmalloc(size, GFP_KERNEL);
	if (!ptr) {
		pr_err("Allocation failed\n");
		return;
	}

	kfree(ptr);
	ptr[0] = 0;
}

static noinline void __init kmalloc_pagealloc_invalid_free(void)
{
	char *ptr;
	size_t size = KMALLOC_MAX_CACHE_SIZE + 10;

	pr_info("kmalloc pagealloc allocation: invalid-free\n");
	ptr = kmalloc(size, GFP_KERNEL);
	if (!ptr) {
		pr_err("Allocation failed\n");
		return;
	}

	kfree(ptr + 1);
}
#endif

static noinline void __init kmalloc_large_oob_right(void)
{
	char *ptr;
	size_t size = KMALLOC_MAX_CACHE_SIZE - 256;
	/* Allocate a chunk that is large enough, but still fits into a slab
	 * and does not trigger the page allocator fallback in SLUB.
	 */
	pr_info("kmalloc large allocation: out-of-bounds to right\n");
	ptr = kmalloc(size, GFP_KERNEL);
	if (!ptr) {
		pr_err("Allocation failed\n");
		return;
	}

	ptr[size] = 0;
	kfree(ptr);
}

static noinline void __init kmalloc_oob_krealloc_more(void)
{
	char *ptr1, *ptr2;
	size_t size1 = 17;
	size_t size2 = 19;

	pr_info("out-of-bounds after krealloc more\n");
	ptr1 = kmalloc(size1, GFP_KERNEL);
	ptr2 = krealloc(ptr1, size2, GFP_KERNEL);
	if (!ptr1 || !ptr2) {
		pr_err("Allocation failed\n");
		kfree(ptr1);
		return;
	}

	ptr2[size2] = 'x';
	kfree(ptr2);
}

static noinline void __init kmalloc_oob_krealloc_less(void)
{
	char *ptr1, *ptr2;
	size_t size1 = 17;
	size_t size2 = 15;

	pr_info("out-of-bounds after krealloc less\n");
	ptr1 = kmalloc(size1, GFP_KERNEL);
	ptr2 = krealloc(ptr1, size2, GFP_KERNEL);
	if (!ptr1 || !ptr2) {
		pr_err("Allocation failed\n");
		kfree(ptr1);
		return;
	}
	ptr2[size2] = 'x';
	kfree(ptr2);
}

static noinline void __init kmalloc_oob_16(void)
{
	struct {
		u64 words[2];
	} *ptr1, *ptr2;

	pr_info("kmalloc out-of-bounds for 16-bytes access\n");
	ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL);
	ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL);
	if (!ptr1 || !ptr2) {
		pr_err("Allocation failed\n");
		kfree(ptr1);
		kfree(ptr2);
		return;
	}
	*ptr1 = *ptr2;
	kfree(ptr1);
	kfree(ptr2);
}

static noinline void __init kmalloc_oob_memset_2(void)
{
	char *ptr;
	size_t size = 8;

	pr_info("out-of-bounds in memset2\n");
	ptr = kmalloc(size, GFP_KERNEL);
	if (!ptr) {
		pr_err("Allocation failed\n");
		return;
	}

	memset(ptr+7, 0, 2);
	kfree(ptr);
}

static noinline void __init kmalloc_oob_memset_4(void)
{
	char *ptr;
	size_t size = 8;

	pr_info("out-of-bounds in memset4\n");
	ptr = kmalloc(size, GFP_KERNEL);
	if (!ptr) {
		pr_err("Allocation failed\n");
		return;
	}

	memset(ptr+5, 0, 4);
	kfree(ptr);
}


static noinline void __init kmalloc_oob_memset_8(void)
{
	char *ptr;
	size_t size = 8;

	pr_info("out-of-bounds in memset8\n");
	ptr = kmalloc(size, GFP_KERNEL);
	if (!ptr) {
		pr_err("Allocation failed\n");
		return;
	}

	memset(ptr+1, 0, 8);
	kfree(ptr);
}

static noinline void __init kmalloc_oob_memset_16(void)
{
	char *ptr;
	size_t size = 16;

	pr_info("out-of-bounds in memset16\n");
	ptr = kmalloc(size, GFP_KERNEL);
	if (!ptr) {
		pr_err("Allocation failed\n");
		return;
	}

	memset(ptr+1, 0, 16);
	kfree(ptr);
}

static noinline void __init kmalloc_oob_in_memset(void)
{
	char *ptr;
	size_t size = 666;

	pr_info("out-of-bounds in memset\n");
	ptr = kmalloc(size, GFP_KERNEL);
	if (!ptr) {
		pr_err("Allocation failed\n");
		return;
	}

	memset(ptr, 0, size+5);
	kfree(ptr);
}

static noinline void __init kmalloc_uaf(void)
{
	char *ptr;
	size_t size = 10;

	pr_info("use-after-free\n");
	ptr = kmalloc(size, GFP_KERNEL);
	if (!ptr) {
		pr_err("Allocation failed\n");
		return;
	}

	kfree(ptr);
	*(ptr + 8) = 'x';
}

static noinline void __init kmalloc_uaf_memset(void)
{
	char *ptr;
	size_t size = 33;

	pr_info("use-after-free in memset\n");
	ptr = kmalloc(size, GFP_KERNEL);
	if (!ptr) {
		pr_err("Allocation failed\n");
		return;
	}

	kfree(ptr);
	memset(ptr, 0, size);
}

static noinline void __init kmalloc_uaf2(void)
{
	char *ptr1, *ptr2;
	size_t size = 43;

	pr_info("use-after-free after another kmalloc\n");
	ptr1 = kmalloc(size, GFP_KERNEL);
	if (!ptr1) {
		pr_err("Allocation failed\n");
		return;
	}

	kfree(ptr1);
	ptr2 = kmalloc(size, GFP_KERNEL);
	if (!ptr2) {
		pr_err("Allocation failed\n");
		return;
	}

	ptr1[40] = 'x';
	if (ptr1 == ptr2)
		pr_err("Could not detect use-after-free: ptr1 == ptr2\n");
	kfree(ptr2);
}

static noinline void __init kmem_cache_oob(void)
{
	char *p;
	size_t size = 200;
	struct kmem_cache *cache = kmem_cache_create("test_cache",
						size, 0,
						0, NULL);
	if (!cache) {
		pr_err("Cache allocation failed\n");
		return;
	}
	pr_info("out-of-bounds in kmem_cache_alloc\n");
	p = kmem_cache_alloc(cache, GFP_KERNEL);
	if (!p) {
		pr_err("Allocation failed\n");
		kmem_cache_destroy(cache);
		return;
	}

	*p = p[size];
	kmem_cache_free(cache, p);
	kmem_cache_destroy(cache);
}

static noinline void __init memcg_accounted_kmem_cache(void)
{
	int i;
	char *p;
	size_t size = 200;
	struct kmem_cache *cache;

	cache = kmem_cache_create("test_cache", size, 0, SLAB_ACCOUNT, NULL);
	if (!cache) {
		pr_err("Cache allocation failed\n");
		return;
	}

	pr_info("allocate memcg accounted object\n");
	/*
	 * Several allocations with a delay to allow for lazy per memcg kmem
	 * cache creation.
	 */
	for (i = 0; i < 5; i++) {
		p = kmem_cache_alloc(cache, GFP_KERNEL);
		if (!p)
			goto free_cache;

		kmem_cache_free(cache, p);
		msleep(100);
	}

free_cache:
	kmem_cache_destroy(cache);
}

static char global_array[10];

static noinline void __init kasan_global_oob(void)
{
	volatile int i = 3;
	char *p = &global_array[ARRAY_SIZE(global_array) + i];

	pr_info("out-of-bounds global variable\n");
	*(volatile char *)p;
}

static noinline void __init kasan_stack_oob(void)
{
	char stack_array[10];
	volatile int i = 0;
	char *p = &stack_array[ARRAY_SIZE(stack_array) + i];

	pr_info("out-of-bounds on stack\n");
	*(volatile char *)p;
}

static noinline void __init ksize_unpoisons_memory(void)
{
	char *ptr;
	size_t size = 123, real_size;

	pr_info("ksize() unpoisons the whole allocated chunk\n");
	ptr = kmalloc(size, GFP_KERNEL);
	if (!ptr) {
		pr_err("Allocation failed\n");
		return;
	}
	real_size = ksize(ptr);
	/* This access doesn't trigger an error. */
	ptr[size] = 'x';
	/* This one does. */
	ptr[real_size] = 'y';
	kfree(ptr);
}

static noinline void __init copy_user_test(void)
{
	char *kmem;
	char __user *usermem;
	size_t size = 10;
	int unused;

	kmem = kmalloc(size, GFP_KERNEL);
	if (!kmem)
		return;

	usermem = (char __user *)vm_mmap(NULL, 0, PAGE_SIZE,
			    PROT_READ | PROT_WRITE | PROT_EXEC,
			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
	if (IS_ERR(usermem)) {
		pr_err("Failed to allocate user memory\n");
		kfree(kmem);
		return;
	}

	pr_info("out-of-bounds in copy_from_user()\n");
	unused = copy_from_user(kmem, usermem, size + 1);

	pr_info("out-of-bounds in copy_to_user()\n");
	unused = copy_to_user(usermem, kmem, size + 1);

	pr_info("out-of-bounds in __copy_from_user()\n");
	unused = __copy_from_user(kmem, usermem, size + 1);

	pr_info("out-of-bounds in __copy_to_user()\n");
	unused = __copy_to_user(usermem, kmem, size + 1);

	pr_info("out-of-bounds in __copy_from_user_inatomic()\n");
	unused = __copy_from_user_inatomic(kmem, usermem, size + 1);

	pr_info("out-of-bounds in __copy_to_user_inatomic()\n");
	unused = __copy_to_user_inatomic(usermem, kmem, size + 1);

	pr_info("out-of-bounds in strncpy_from_user()\n");
	unused = strncpy_from_user(kmem, usermem, size + 1);

	vm_munmap((unsigned long)usermem, PAGE_SIZE);
	kfree(kmem);
}

static noinline void __init use_after_scope_test(void)
{
	volatile char *volatile p;

	pr_info("use-after-scope on int\n");
	{
		int local = 0;

		p = (char *)&local;
	}
	p[0] = 1;
	p[3] = 1;

	pr_info("use-after-scope on array\n");
	{
		char local[1024] = {0};

		p = local;
	}
	p[0] = 1;
	p[1023] = 1;
}

static noinline void __init kasan_alloca_oob_left(void)
{
	volatile int i = 10;
	char alloca_array[i];
	char *p = alloca_array - 1;

	pr_info("out-of-bounds to left on alloca\n");
	*(volatile char *)p;
}

static noinline void __init kasan_alloca_oob_right(void)
{
	volatile int i = 10;
	char alloca_array[i];
	char *p = alloca_array + i;

	pr_info("out-of-bounds to right on alloca\n");
	*(volatile char *)p;
}

static noinline void __init kmem_cache_double_free(void)
{
	char *p;
	size_t size = 200;
	struct kmem_cache *cache;

	cache = kmem_cache_create("test_cache", size, 0, 0, NULL);
	if (!cache) {
		pr_err("Cache allocation failed\n");
		return;
	}
	pr_info("double-free on heap object\n");
	p = kmem_cache_alloc(cache, GFP_KERNEL);
	if (!p) {
		pr_err("Allocation failed\n");
		kmem_cache_destroy(cache);
		return;
	}

	kmem_cache_free(cache, p);
	kmem_cache_free(cache, p);
	kmem_cache_destroy(cache);
}

static noinline void __init kmem_cache_invalid_free(void)
{
	char *p;
	size_t size = 200;
	struct kmem_cache *cache;

	cache = kmem_cache_create("test_cache", size, 0, SLAB_TYPESAFE_BY_RCU,
				  NULL);
	if (!cache) {
		pr_err("Cache allocation failed\n");
		return;
	}
	pr_info("invalid-free of heap object\n");
	p = kmem_cache_alloc(cache, GFP_KERNEL);
	if (!p) {
		pr_err("Allocation failed\n");
		kmem_cache_destroy(cache);
		return;
	}

	/* Trigger invalid free, the object doesn't get freed */
	kmem_cache_free(cache, p + 1);

	/*
	 * Properly free the object to prevent the "Objects remaining in
	 * test_cache on __kmem_cache_shutdown" BUG failure.
	 */
	kmem_cache_free(cache, p);

	kmem_cache_destroy(cache);
}

static noinline void __init kasan_memchr(void)
{
	char *ptr;
	size_t size = 24;

	pr_info("out-of-bounds in memchr\n");
	ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
	if (!ptr)
		return;

	memchr(ptr, '1', size + 1);
	kfree(ptr);
}

static noinline void __init kasan_memcmp(void)
{
	char *ptr;
	size_t size = 24;
	int arr[9];

	pr_info("out-of-bounds in memcmp\n");
	ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
	if (!ptr)
		return;

	memset(arr, 0, sizeof(arr));
	memcmp(ptr, arr, size+1);
	kfree(ptr);
}

static noinline void __init kasan_strings(void)
{
	char *ptr;
	size_t size = 24;

	pr_info("use-after-free in strchr\n");
	ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
	if (!ptr)
		return;

	kfree(ptr);

	/*
	 * Try to cause only 1 invalid access (less spam in dmesg).
	 * For that we need ptr to point to zeroed byte.
	 * Skip metadata that could be stored in freed object so ptr
	 * will likely point to zeroed byte.
	 */
	ptr += 16;
	strchr(ptr, '1');

	pr_info("use-after-free in strrchr\n");
	strrchr(ptr, '1');

	pr_info("use-after-free in strcmp\n");
	strcmp(ptr, "2");

	pr_info("use-after-free in strncmp\n");
	strncmp(ptr, "2", 1);

	pr_info("use-after-free in strlen\n");
	strlen(ptr);

	pr_info("use-after-free in strnlen\n");
	strnlen(ptr, 1);
}

static int __init kmalloc_tests_init(void)
{
	/*
	 * Temporarily enable multi-shot mode. Otherwise, we'd only get a
	 * report for the first case.
	 */
	bool multishot = kasan_save_enable_multi_shot();

	kmalloc_oob_right();
	kmalloc_oob_left();
	kmalloc_node_oob_right();
#ifdef CONFIG_SLUB
	kmalloc_pagealloc_oob_right();
	kmalloc_pagealloc_uaf();
	kmalloc_pagealloc_invalid_free();
#endif
	kmalloc_large_oob_right();
	kmalloc_oob_krealloc_more();
	kmalloc_oob_krealloc_less();
	kmalloc_oob_16();
	kmalloc_oob_in_memset();
	kmalloc_oob_memset_2();
	kmalloc_oob_memset_4();
	kmalloc_oob_memset_8();
	kmalloc_oob_memset_16();
	kmalloc_uaf();
	kmalloc_uaf_memset();
	kmalloc_uaf2();
	kmem_cache_oob();
	memcg_accounted_kmem_cache();
	kasan_stack_oob();
	kasan_global_oob();
	kasan_alloca_oob_left();
	kasan_alloca_oob_right();
	ksize_unpoisons_memory();
	copy_user_test();
	use_after_scope_test();
	kmem_cache_double_free();
	kmem_cache_invalid_free();
	kasan_memchr();
	kasan_memcmp();
	kasan_strings();

	kasan_restore_multi_shot(multishot);

	return -EAGAIN;
}

module_init(kmalloc_tests_init);
MODULE_LICENSE("GPL");
back to top