Revision 3ad33b2436b545cbe8b28e53f3710432cad457ab authored by Lee Schermerhorn on 15 November 2007, 00:59:10 UTC, committed by Linus Torvalds on 15 November 2007, 02:45:38 UTC
We hit the BUG_ON() in mm/rmap.c:vma_address() when trying to migrate via mbind(MPOL_MF_MOVE) a non-anon region that spans multiple vmas. For anon-regions, we just fail to migrate any pages beyond the 1st vma in the range. This occurs because do_mbind() collects a list of pages to migrate by calling check_range(). check_range() walks the task's mm, spanning vmas as necessary, to collect the migratable pages into a list. Then, do_mbind() calls migrate_pages() passing the list of pages, a function to allocate new pages based on vma policy [new_vma_page()], and a pointer to the first vma of the range. For each page in the list, new_vma_page() calls page_address_in_vma() passing the page and the vma [first in range] to obtain the address to get for alloc_page_vma(). The page address is needed to get interleaving policy correct. If the pages in the list come from multiple vmas, eventually, new_page_address() will pass that page to page_address_in_vma() with the incorrect vma. For !PageAnon pages, this will result in a bug check in rmap.c:vma_address(). For anon pages, vma_address() will just return EFAULT and fail the migration. This patch modifies new_vma_page() to check the return value from page_address_in_vma(). If the return value is EFAULT, new_vma_page() searchs forward via vm_next for the vma that maps the page--i.e., that does not return EFAULT. This assumes that the pages in the list handed to migrate_pages() is in address order. This is currently case. The patch documents this assumption in a new comment block for new_vma_page(). If new_vma_page() cannot locate the vma mapping the page in a forward search in the mm, it will pass a NULL vma to alloc_page_vma(). This will result in the allocation using the task policy, if any, else system default policy. This situation is unlikely, but the patch documents this behavior with a comment. Note, this patch results in restarting from the first vma in a multi-vma range each time new_vma_page() is called. If this is not acceptable, we can make the vma argument a pointer, both in new_vma_page() and it's caller unmap_and_move() so that the value held by the loop in migrate_pages() always passes down the last vma in which a page was found. This will require changes to all new_page_t functions passed to migrate_pages(). Is this necessary? For this patch to work, we can't bug check in vma_address() for pages outside the argument vma. This patch removes the BUG_ON(). All other callers [besides new_vma_page()] already check the return status. Tested on x86_64, 4 node NUMA platform. Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Acked-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent e1a1c99
bitops.h
/* bitops.h: bit operations for the Fujitsu FR-V CPUs
*
* For an explanation of how atomic ops work in this arch, see:
* Documentation/fujitsu/frv/atomic-ops.txt
*
* Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_BITOPS_H
#define _ASM_BITOPS_H
#include <linux/compiler.h>
#include <asm/byteorder.h>
#include <asm/system.h>
#include <asm/atomic.h>
#ifdef __KERNEL__
#ifndef _LINUX_BITOPS_H
#error only <linux/bitops.h> can be included directly
#endif
#include <asm-generic/bitops/ffz.h>
/*
* clear_bit() doesn't provide any barrier for the compiler.
*/
#define smp_mb__before_clear_bit() barrier()
#define smp_mb__after_clear_bit() barrier()
static inline int test_and_clear_bit(int nr, volatile void *addr)
{
volatile unsigned long *ptr = addr;
unsigned long mask = 1UL << (nr & 31);
ptr += nr >> 5;
return (atomic_test_and_ANDNOT_mask(mask, ptr) & mask) != 0;
}
static inline int test_and_set_bit(int nr, volatile void *addr)
{
volatile unsigned long *ptr = addr;
unsigned long mask = 1UL << (nr & 31);
ptr += nr >> 5;
return (atomic_test_and_OR_mask(mask, ptr) & mask) != 0;
}
static inline int test_and_change_bit(int nr, volatile void *addr)
{
volatile unsigned long *ptr = addr;
unsigned long mask = 1UL << (nr & 31);
ptr += nr >> 5;
return (atomic_test_and_XOR_mask(mask, ptr) & mask) != 0;
}
static inline void clear_bit(int nr, volatile void *addr)
{
test_and_clear_bit(nr, addr);
}
static inline void set_bit(int nr, volatile void *addr)
{
test_and_set_bit(nr, addr);
}
static inline void change_bit(int nr, volatile void * addr)
{
test_and_change_bit(nr, addr);
}
static inline void __clear_bit(int nr, volatile void * addr)
{
volatile unsigned long *a = addr;
int mask;
a += nr >> 5;
mask = 1 << (nr & 31);
*a &= ~mask;
}
static inline void __set_bit(int nr, volatile void * addr)
{
volatile unsigned long *a = addr;
int mask;
a += nr >> 5;
mask = 1 << (nr & 31);
*a |= mask;
}
static inline void __change_bit(int nr, volatile void *addr)
{
volatile unsigned long *a = addr;
int mask;
a += nr >> 5;
mask = 1 << (nr & 31);
*a ^= mask;
}
static inline int __test_and_clear_bit(int nr, volatile void * addr)
{
volatile unsigned long *a = addr;
int mask, retval;
a += nr >> 5;
mask = 1 << (nr & 31);
retval = (mask & *a) != 0;
*a &= ~mask;
return retval;
}
static inline int __test_and_set_bit(int nr, volatile void * addr)
{
volatile unsigned long *a = addr;
int mask, retval;
a += nr >> 5;
mask = 1 << (nr & 31);
retval = (mask & *a) != 0;
*a |= mask;
return retval;
}
static inline int __test_and_change_bit(int nr, volatile void * addr)
{
volatile unsigned long *a = addr;
int mask, retval;
a += nr >> 5;
mask = 1 << (nr & 31);
retval = (mask & *a) != 0;
*a ^= mask;
return retval;
}
/*
* This routine doesn't need to be atomic.
*/
static inline int __constant_test_bit(int nr, const volatile void * addr)
{
return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
}
static inline int __test_bit(int nr, const volatile void * addr)
{
int * a = (int *) addr;
int mask;
a += nr >> 5;
mask = 1 << (nr & 0x1f);
return ((mask & *a) != 0);
}
#define test_bit(nr,addr) \
(__builtin_constant_p(nr) ? \
__constant_test_bit((nr),(addr)) : \
__test_bit((nr),(addr)))
#include <asm-generic/bitops/find.h>
/**
* fls - find last bit set
* @x: the word to search
*
* This is defined the same way as ffs:
* - return 32..1 to indicate bit 31..0 most significant bit set
* - return 0 to indicate no bits set
*/
#define fls(x) \
({ \
int bit; \
\
asm(" subcc %1,gr0,gr0,icc0 \n" \
" ckne icc0,cc4 \n" \
" cscan.p %1,gr0,%0 ,cc4,#1 \n" \
" csub %0,%0,%0 ,cc4,#0 \n" \
" csub %2,%0,%0 ,cc4,#1 \n" \
: "=&r"(bit) \
: "r"(x), "r"(32) \
: "icc0", "cc4" \
); \
\
bit; \
})
/**
* fls64 - find last bit set in a 64-bit value
* @n: the value to search
*
* This is defined the same way as ffs:
* - return 64..1 to indicate bit 63..0 most significant bit set
* - return 0 to indicate no bits set
*/
static inline __attribute__((const))
int fls64(u64 n)
{
union {
u64 ll;
struct { u32 h, l; };
} _;
int bit, x, y;
_.ll = n;
asm(" subcc.p %3,gr0,gr0,icc0 \n"
" subcc %4,gr0,gr0,icc1 \n"
" ckne icc0,cc4 \n"
" ckne icc1,cc5 \n"
" norcr cc4,cc5,cc6 \n"
" csub.p %0,%0,%0 ,cc6,1 \n"
" orcr cc5,cc4,cc4 \n"
" andcr cc4,cc5,cc4 \n"
" cscan.p %3,gr0,%0 ,cc4,0 \n"
" setlos #64,%1 \n"
" cscan.p %4,gr0,%0 ,cc4,1 \n"
" setlos #32,%2 \n"
" csub.p %1,%0,%0 ,cc4,0 \n"
" csub %2,%0,%0 ,cc4,1 \n"
: "=&r"(bit), "=r"(x), "=r"(y)
: "0r"(_.h), "r"(_.l)
: "icc0", "icc1", "cc4", "cc5", "cc6"
);
return bit;
}
/**
* ffs - find first bit set
* @x: the word to search
*
* - return 32..1 to indicate bit 31..0 most least significant bit set
* - return 0 to indicate no bits set
*/
static inline __attribute__((const))
int ffs(int x)
{
/* Note: (x & -x) gives us a mask that is the least significant
* (rightmost) 1-bit of the value in x.
*/
return fls(x & -x);
}
/**
* __ffs - find first bit set
* @x: the word to search
*
* - return 31..0 to indicate bit 31..0 most least significant bit set
* - if no bits are set in x, the result is undefined
*/
static inline __attribute__((const))
int __ffs(unsigned long x)
{
int bit;
asm("scan %1,gr0,%0" : "=r"(bit) : "r"(x & -x));
return 31 - bit;
}
/*
* special slimline version of fls() for calculating ilog2_u32()
* - note: no protection against n == 0
*/
#define ARCH_HAS_ILOG2_U32
static inline __attribute__((const))
int __ilog2_u32(u32 n)
{
int bit;
asm("scan %1,gr0,%0" : "=r"(bit) : "r"(n));
return 31 - bit;
}
/*
* special slimline version of fls64() for calculating ilog2_u64()
* - note: no protection against n == 0
*/
#define ARCH_HAS_ILOG2_U64
static inline __attribute__((const))
int __ilog2_u64(u64 n)
{
union {
u64 ll;
struct { u32 h, l; };
} _;
int bit, x, y;
_.ll = n;
asm(" subcc %3,gr0,gr0,icc0 \n"
" ckeq icc0,cc4 \n"
" cscan.p %3,gr0,%0 ,cc4,0 \n"
" setlos #63,%1 \n"
" cscan.p %4,gr0,%0 ,cc4,1 \n"
" setlos #31,%2 \n"
" csub.p %1,%0,%0 ,cc4,0 \n"
" csub %2,%0,%0 ,cc4,1 \n"
: "=&r"(bit), "=r"(x), "=r"(y)
: "0r"(_.h), "r"(_.l)
: "icc0", "cc4"
);
return bit;
}
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/hweight.h>
#include <asm-generic/bitops/lock.h>
#include <asm-generic/bitops/ext2-non-atomic.h>
#define ext2_set_bit_atomic(lock,nr,addr) test_and_set_bit ((nr) ^ 0x18, (addr))
#define ext2_clear_bit_atomic(lock,nr,addr) test_and_clear_bit((nr) ^ 0x18, (addr))
#include <asm-generic/bitops/minix-le.h>
#endif /* __KERNEL__ */
#endif /* _ASM_BITOPS_H */
![swh spinner](/static/img/swh-spinner.gif)
Computing file changes ...