https://github.com/torvalds/linux
Revision 758f2dfcf8a249b1f1510aa32e625c2ec20642a3 authored by Filipe Manana on 19 November 2015, 11:45:48 UTC, committed by Chris Mason on 25 November 2015, 13:22:08 UTC
Currently scrub can race with the cleaner kthread when the later attempts
to delete an unused block group, and the result is preventing the cleaner
kthread from ever deleting later the block group - unless the block group
becomes used and unused again. The following diagram illustrates that
race:

              CPU 1                                 CPU 2

 cleaner kthread
   btrfs_delete_unused_bgs()

     gets block group X from
     fs_info->unused_bgs and
     removes it from that list

                                             scrub_enumerate_chunks()

                                               searches device tree using
                                               its commit root

                                               finds device extent for
                                               block group X

                                               gets block group X from the tree
                                               fs_info->block_group_cache_tree
                                               (via btrfs_lookup_block_group())

                                               sets bg X to RO

     sees the block group is
     already RO and therefore
     doesn't delete it nor adds
     it back to unused list

So fix this by making scrub add the block group again to the list of
unused block groups if the block group is still unused when it finished
scrubbing it and it hasn't been removed already.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
1 parent 020d5b7
Raw File
Tip revision: 758f2dfcf8a249b1f1510aa32e625c2ec20642a3 authored by Filipe Manana on 19 November 2015, 11:45:48 UTC
Btrfs: fix scrub preventing unused block groups from being deleted
Tip revision: 758f2df
frame_vector.c
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/sched.h>

/*
 * get_vaddr_frames() - map virtual addresses to pfns
 * @start:	starting user address
 * @nr_frames:	number of pages / pfns from start to map
 * @write:	whether pages will be written to by the caller
 * @force:	whether to force write access even if user mapping is
 *		readonly. See description of the same argument of
		get_user_pages().
 * @vec:	structure which receives pages / pfns of the addresses mapped.
 *		It should have space for at least nr_frames entries.
 *
 * This function maps virtual addresses from @start and fills @vec structure
 * with page frame numbers or page pointers to corresponding pages (choice
 * depends on the type of the vma underlying the virtual address). If @start
 * belongs to a normal vma, the function grabs reference to each of the pages
 * to pin them in memory. If @start belongs to VM_IO | VM_PFNMAP vma, we don't
 * touch page structures and the caller must make sure pfns aren't reused for
 * anything else while he is using them.
 *
 * The function returns number of pages mapped which may be less than
 * @nr_frames. In particular we stop mapping if there are more vmas of
 * different type underlying the specified range of virtual addresses.
 * When the function isn't able to map a single page, it returns error.
 *
 * This function takes care of grabbing mmap_sem as necessary.
 */
int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
		     bool write, bool force, struct frame_vector *vec)
{
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma;
	int ret = 0;
	int err;
	int locked;

	if (nr_frames == 0)
		return 0;

	if (WARN_ON_ONCE(nr_frames > vec->nr_allocated))
		nr_frames = vec->nr_allocated;

	down_read(&mm->mmap_sem);
	locked = 1;
	vma = find_vma_intersection(mm, start, start + 1);
	if (!vma) {
		ret = -EFAULT;
		goto out;
	}
	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) {
		vec->got_ref = true;
		vec->is_pfns = false;
		ret = get_user_pages_locked(current, mm, start, nr_frames,
			write, force, (struct page **)(vec->ptrs), &locked);
		goto out;
	}

	vec->got_ref = false;
	vec->is_pfns = true;
	do {
		unsigned long *nums = frame_vector_pfns(vec);

		while (ret < nr_frames && start + PAGE_SIZE <= vma->vm_end) {
			err = follow_pfn(vma, start, &nums[ret]);
			if (err) {
				if (ret == 0)
					ret = err;
				goto out;
			}
			start += PAGE_SIZE;
			ret++;
		}
		/*
		 * We stop if we have enough pages or if VMA doesn't completely
		 * cover the tail page.
		 */
		if (ret >= nr_frames || start < vma->vm_end)
			break;
		vma = find_vma_intersection(mm, start, start + 1);
	} while (vma && vma->vm_flags & (VM_IO | VM_PFNMAP));
out:
	if (locked)
		up_read(&mm->mmap_sem);
	if (!ret)
		ret = -EFAULT;
	if (ret > 0)
		vec->nr_frames = ret;
	return ret;
}
EXPORT_SYMBOL(get_vaddr_frames);

/**
 * put_vaddr_frames() - drop references to pages if get_vaddr_frames() acquired
 *			them
 * @vec:	frame vector to put
 *
 * Drop references to pages if get_vaddr_frames() acquired them. We also
 * invalidate the frame vector so that it is prepared for the next call into
 * get_vaddr_frames().
 */
void put_vaddr_frames(struct frame_vector *vec)
{
	int i;
	struct page **pages;

	if (!vec->got_ref)
		goto out;
	pages = frame_vector_pages(vec);
	/*
	 * frame_vector_pages() might needed to do a conversion when
	 * get_vaddr_frames() got pages but vec was later converted to pfns.
	 * But it shouldn't really fail to convert pfns back...
	 */
	if (WARN_ON(IS_ERR(pages)))
		goto out;
	for (i = 0; i < vec->nr_frames; i++)
		put_page(pages[i]);
	vec->got_ref = false;
out:
	vec->nr_frames = 0;
}
EXPORT_SYMBOL(put_vaddr_frames);

/**
 * frame_vector_to_pages - convert frame vector to contain page pointers
 * @vec:	frame vector to convert
 *
 * Convert @vec to contain array of page pointers.  If the conversion is
 * successful, return 0. Otherwise return an error. Note that we do not grab
 * page references for the page structures.
 */
int frame_vector_to_pages(struct frame_vector *vec)
{
	int i;
	unsigned long *nums;
	struct page **pages;

	if (!vec->is_pfns)
		return 0;
	nums = frame_vector_pfns(vec);
	for (i = 0; i < vec->nr_frames; i++)
		if (!pfn_valid(nums[i]))
			return -EINVAL;
	pages = (struct page **)nums;
	for (i = 0; i < vec->nr_frames; i++)
		pages[i] = pfn_to_page(nums[i]);
	vec->is_pfns = false;
	return 0;
}
EXPORT_SYMBOL(frame_vector_to_pages);

/**
 * frame_vector_to_pfns - convert frame vector to contain pfns
 * @vec:	frame vector to convert
 *
 * Convert @vec to contain array of pfns.
 */
void frame_vector_to_pfns(struct frame_vector *vec)
{
	int i;
	unsigned long *nums;
	struct page **pages;

	if (vec->is_pfns)
		return;
	pages = (struct page **)(vec->ptrs);
	nums = (unsigned long *)pages;
	for (i = 0; i < vec->nr_frames; i++)
		nums[i] = page_to_pfn(pages[i]);
	vec->is_pfns = true;
}
EXPORT_SYMBOL(frame_vector_to_pfns);

/**
 * frame_vector_create() - allocate & initialize structure for pinned pfns
 * @nr_frames:	number of pfns slots we should reserve
 *
 * Allocate and initialize struct pinned_pfns to be able to hold @nr_pfns
 * pfns.
 */
struct frame_vector *frame_vector_create(unsigned int nr_frames)
{
	struct frame_vector *vec;
	int size = sizeof(struct frame_vector) + sizeof(void *) * nr_frames;

	if (WARN_ON_ONCE(nr_frames == 0))
		return NULL;
	/*
	 * This is absurdly high. It's here just to avoid strange effects when
	 * arithmetics overflows.
	 */
	if (WARN_ON_ONCE(nr_frames > INT_MAX / sizeof(void *) / 2))
		return NULL;
	/*
	 * Avoid higher order allocations, use vmalloc instead. It should
	 * be rare anyway.
	 */
	if (size <= PAGE_SIZE)
		vec = kmalloc(size, GFP_KERNEL);
	else
		vec = vmalloc(size);
	if (!vec)
		return NULL;
	vec->nr_allocated = nr_frames;
	vec->nr_frames = 0;
	return vec;
}
EXPORT_SYMBOL(frame_vector_create);

/**
 * frame_vector_destroy() - free memory allocated to carry frame vector
 * @vec:	Frame vector to free
 *
 * Free structure allocated by frame_vector_create() to carry frames.
 */
void frame_vector_destroy(struct frame_vector *vec)
{
	/* Make sure put_vaddr_frames() got called properly... */
	VM_BUG_ON(vec->nr_frames > 0);
	kvfree(vec);
}
EXPORT_SYMBOL(frame_vector_destroy);
back to top