Revision 3ad33b2436b545cbe8b28e53f3710432cad457ab authored by Lee Schermerhorn on 15 November 2007, 00:59:10 UTC, committed by Linus Torvalds on 15 November 2007, 02:45:38 UTC
We hit the BUG_ON() in mm/rmap.c:vma_address() when trying to migrate via
mbind(MPOL_MF_MOVE) a non-anon region that spans multiple vmas.  For
anon-regions, we just fail to migrate any pages beyond the 1st vma in the
range.

This occurs because do_mbind() collects a list of pages to migrate by
calling check_range().  check_range() walks the task's mm, spanning vmas as
necessary, to collect the migratable pages into a list.  Then, do_mbind()
calls migrate_pages() passing the list of pages, a function to allocate new
pages based on vma policy [new_vma_page()], and a pointer to the first vma
of the range.

For each page in the list, new_vma_page() calls page_address_in_vma()
passing the page and the vma [first in range] to obtain the address to get
for alloc_page_vma().  The page address is needed to get interleaving
policy correct.  If the pages in the list come from multiple vmas,
eventually, new_page_address() will pass that page to page_address_in_vma()
with the incorrect vma.  For !PageAnon pages, this will result in a bug
check in rmap.c:vma_address().  For anon pages, vma_address() will just
return EFAULT and fail the migration.

This patch modifies new_vma_page() to check the return value from
page_address_in_vma().  If the return value is EFAULT, new_vma_page()
searchs forward via vm_next for the vma that maps the page--i.e., that does
not return EFAULT.  This assumes that the pages in the list handed to
migrate_pages() is in address order.  This is currently case.  The patch
documents this assumption in a new comment block for new_vma_page().

If new_vma_page() cannot locate the vma mapping the page in a forward
search in the mm, it will pass a NULL vma to alloc_page_vma().  This will
result in the allocation using the task policy, if any, else system default
policy.  This situation is unlikely, but the patch documents this behavior
with a comment.

Note, this patch results in restarting from the first vma in a multi-vma
range each time new_vma_page() is called.  If this is not acceptable, we
can make the vma argument a pointer, both in new_vma_page() and it's caller
unmap_and_move() so that the value held by the loop in migrate_pages()
always passes down the last vma in which a page was found.  This will
require changes to all new_page_t functions passed to migrate_pages().  Is
this necessary?

For this patch to work, we can't bug check in vma_address() for pages
outside the argument vma.  This patch removes the BUG_ON().  All other
callers [besides new_vma_page()] already check the return status.

Tested on x86_64, 4 node NUMA platform.

Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent e1a1c99
Raw File
bitops.h
/* bitops.h: bit operations for the Fujitsu FR-V CPUs
 *
 * For an explanation of how atomic ops work in this arch, see:
 *   Documentation/fujitsu/frv/atomic-ops.txt
 *
 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */
#ifndef _ASM_BITOPS_H
#define _ASM_BITOPS_H

#include <linux/compiler.h>
#include <asm/byteorder.h>
#include <asm/system.h>
#include <asm/atomic.h>

#ifdef __KERNEL__

#ifndef _LINUX_BITOPS_H
#error only <linux/bitops.h> can be included directly
#endif

#include <asm-generic/bitops/ffz.h>

/*
 * clear_bit() doesn't provide any barrier for the compiler.
 */
#define smp_mb__before_clear_bit()	barrier()
#define smp_mb__after_clear_bit()	barrier()

static inline int test_and_clear_bit(int nr, volatile void *addr)
{
	volatile unsigned long *ptr = addr;
	unsigned long mask = 1UL << (nr & 31);
	ptr += nr >> 5;
	return (atomic_test_and_ANDNOT_mask(mask, ptr) & mask) != 0;
}

static inline int test_and_set_bit(int nr, volatile void *addr)
{
	volatile unsigned long *ptr = addr;
	unsigned long mask = 1UL << (nr & 31);
	ptr += nr >> 5;
	return (atomic_test_and_OR_mask(mask, ptr) & mask) != 0;
}

static inline int test_and_change_bit(int nr, volatile void *addr)
{
	volatile unsigned long *ptr = addr;
	unsigned long mask = 1UL << (nr & 31);
	ptr += nr >> 5;
	return (atomic_test_and_XOR_mask(mask, ptr) & mask) != 0;
}

static inline void clear_bit(int nr, volatile void *addr)
{
	test_and_clear_bit(nr, addr);
}

static inline void set_bit(int nr, volatile void *addr)
{
	test_and_set_bit(nr, addr);
}

static inline void change_bit(int nr, volatile void * addr)
{
	test_and_change_bit(nr, addr);
}

static inline void __clear_bit(int nr, volatile void * addr)
{
	volatile unsigned long *a = addr;
	int mask;

	a += nr >> 5;
	mask = 1 << (nr & 31);
	*a &= ~mask;
}

static inline void __set_bit(int nr, volatile void * addr)
{
	volatile unsigned long *a = addr;
	int mask;

	a += nr >> 5;
	mask = 1 << (nr & 31);
	*a |= mask;
}

static inline void __change_bit(int nr, volatile void *addr)
{
	volatile unsigned long *a = addr;
	int mask;

	a += nr >> 5;
	mask = 1 << (nr & 31);
	*a ^= mask;
}

static inline int __test_and_clear_bit(int nr, volatile void * addr)
{
	volatile unsigned long *a = addr;
	int mask, retval;

	a += nr >> 5;
	mask = 1 << (nr & 31);
	retval = (mask & *a) != 0;
	*a &= ~mask;
	return retval;
}

static inline int __test_and_set_bit(int nr, volatile void * addr)
{
	volatile unsigned long *a = addr;
	int mask, retval;

	a += nr >> 5;
	mask = 1 << (nr & 31);
	retval = (mask & *a) != 0;
	*a |= mask;
	return retval;
}

static inline int __test_and_change_bit(int nr, volatile void * addr)
{
	volatile unsigned long *a = addr;
	int mask, retval;

	a += nr >> 5;
	mask = 1 << (nr & 31);
	retval = (mask & *a) != 0;
	*a ^= mask;
	return retval;
}

/*
 * This routine doesn't need to be atomic.
 */
static inline int __constant_test_bit(int nr, const volatile void * addr)
{
	return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
}

static inline int __test_bit(int nr, const volatile void * addr)
{
	int 	* a = (int *) addr;
	int	mask;

	a += nr >> 5;
	mask = 1 << (nr & 0x1f);
	return ((mask & *a) != 0);
}

#define test_bit(nr,addr) \
(__builtin_constant_p(nr) ? \
 __constant_test_bit((nr),(addr)) : \
 __test_bit((nr),(addr)))

#include <asm-generic/bitops/find.h>

/**
 * fls - find last bit set
 * @x: the word to search
 *
 * This is defined the same way as ffs:
 * - return 32..1 to indicate bit 31..0 most significant bit set
 * - return 0 to indicate no bits set
 */
#define fls(x)						\
({							\
	int bit;					\
							\
	asm("	subcc	%1,gr0,gr0,icc0		\n"	\
	    "	ckne	icc0,cc4		\n"	\
	    "	cscan.p	%1,gr0,%0	,cc4,#1	\n"	\
	    "	csub	%0,%0,%0	,cc4,#0	\n"	\
	    "   csub    %2,%0,%0	,cc4,#1	\n"	\
	    : "=&r"(bit)				\
	    : "r"(x), "r"(32)				\
	    : "icc0", "cc4"				\
	    );						\
							\
	bit;						\
})

/**
 * fls64 - find last bit set in a 64-bit value
 * @n: the value to search
 *
 * This is defined the same way as ffs:
 * - return 64..1 to indicate bit 63..0 most significant bit set
 * - return 0 to indicate no bits set
 */
static inline __attribute__((const))
int fls64(u64 n)
{
	union {
		u64 ll;
		struct { u32 h, l; };
	} _;
	int bit, x, y;

	_.ll = n;

	asm("	subcc.p		%3,gr0,gr0,icc0		\n"
	    "	subcc		%4,gr0,gr0,icc1		\n"
	    "	ckne		icc0,cc4		\n"
	    "	ckne		icc1,cc5		\n"
	    "	norcr		cc4,cc5,cc6		\n"
	    "	csub.p		%0,%0,%0	,cc6,1	\n"
	    "	orcr		cc5,cc4,cc4		\n"
	    "	andcr		cc4,cc5,cc4		\n"
	    "	cscan.p		%3,gr0,%0	,cc4,0	\n"
	    "   setlos		#64,%1			\n"
	    "	cscan.p		%4,gr0,%0	,cc4,1	\n"
	    "   setlos		#32,%2			\n"
	    "	csub.p		%1,%0,%0	,cc4,0	\n"
	    "	csub		%2,%0,%0	,cc4,1	\n"
	    : "=&r"(bit), "=r"(x), "=r"(y)
	    : "0r"(_.h), "r"(_.l)
	    : "icc0", "icc1", "cc4", "cc5", "cc6"
	    );
	return bit;

}

/**
 * ffs - find first bit set
 * @x: the word to search
 *
 * - return 32..1 to indicate bit 31..0 most least significant bit set
 * - return 0 to indicate no bits set
 */
static inline __attribute__((const))
int ffs(int x)
{
	/* Note: (x & -x) gives us a mask that is the least significant
	 * (rightmost) 1-bit of the value in x.
	 */
	return fls(x & -x);
}

/**
 * __ffs - find first bit set
 * @x: the word to search
 *
 * - return 31..0 to indicate bit 31..0 most least significant bit set
 * - if no bits are set in x, the result is undefined
 */
static inline __attribute__((const))
int __ffs(unsigned long x)
{
	int bit;
	asm("scan %1,gr0,%0" : "=r"(bit) : "r"(x & -x));
	return 31 - bit;
}

/*
 * special slimline version of fls() for calculating ilog2_u32()
 * - note: no protection against n == 0
 */
#define ARCH_HAS_ILOG2_U32
static inline __attribute__((const))
int __ilog2_u32(u32 n)
{
	int bit;
	asm("scan %1,gr0,%0" : "=r"(bit) : "r"(n));
	return 31 - bit;
}

/*
 * special slimline version of fls64() for calculating ilog2_u64()
 * - note: no protection against n == 0
 */
#define ARCH_HAS_ILOG2_U64
static inline __attribute__((const))
int __ilog2_u64(u64 n)
{
	union {
		u64 ll;
		struct { u32 h, l; };
	} _;
	int bit, x, y;

	_.ll = n;

	asm("	subcc		%3,gr0,gr0,icc0		\n"
	    "	ckeq		icc0,cc4		\n"
	    "	cscan.p		%3,gr0,%0	,cc4,0	\n"
	    "   setlos		#63,%1			\n"
	    "	cscan.p		%4,gr0,%0	,cc4,1	\n"
	    "   setlos		#31,%2			\n"
	    "	csub.p		%1,%0,%0	,cc4,0	\n"
	    "	csub		%2,%0,%0	,cc4,1	\n"
	    : "=&r"(bit), "=r"(x), "=r"(y)
	    : "0r"(_.h), "r"(_.l)
	    : "icc0", "cc4"
	    );
	return bit;
}

#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/hweight.h>
#include <asm-generic/bitops/lock.h>

#include <asm-generic/bitops/ext2-non-atomic.h>

#define ext2_set_bit_atomic(lock,nr,addr)	test_and_set_bit  ((nr) ^ 0x18, (addr))
#define ext2_clear_bit_atomic(lock,nr,addr)	test_and_clear_bit((nr) ^ 0x18, (addr))

#include <asm-generic/bitops/minix-le.h>

#endif /* __KERNEL__ */

#endif /* _ASM_BITOPS_H */
back to top