Revision 3ad33b2436b545cbe8b28e53f3710432cad457ab authored by Lee Schermerhorn on 15 November 2007, 00:59:10 UTC, committed by Linus Torvalds on 15 November 2007, 02:45:38 UTC
We hit the BUG_ON() in mm/rmap.c:vma_address() when trying to migrate via mbind(MPOL_MF_MOVE) a non-anon region that spans multiple vmas. For anon-regions, we just fail to migrate any pages beyond the 1st vma in the range. This occurs because do_mbind() collects a list of pages to migrate by calling check_range(). check_range() walks the task's mm, spanning vmas as necessary, to collect the migratable pages into a list. Then, do_mbind() calls migrate_pages() passing the list of pages, a function to allocate new pages based on vma policy [new_vma_page()], and a pointer to the first vma of the range. For each page in the list, new_vma_page() calls page_address_in_vma() passing the page and the vma [first in range] to obtain the address to get for alloc_page_vma(). The page address is needed to get interleaving policy correct. If the pages in the list come from multiple vmas, eventually, new_page_address() will pass that page to page_address_in_vma() with the incorrect vma. For !PageAnon pages, this will result in a bug check in rmap.c:vma_address(). For anon pages, vma_address() will just return EFAULT and fail the migration. This patch modifies new_vma_page() to check the return value from page_address_in_vma(). If the return value is EFAULT, new_vma_page() searchs forward via vm_next for the vma that maps the page--i.e., that does not return EFAULT. This assumes that the pages in the list handed to migrate_pages() is in address order. This is currently case. The patch documents this assumption in a new comment block for new_vma_page(). If new_vma_page() cannot locate the vma mapping the page in a forward search in the mm, it will pass a NULL vma to alloc_page_vma(). This will result in the allocation using the task policy, if any, else system default policy. This situation is unlikely, but the patch documents this behavior with a comment. Note, this patch results in restarting from the first vma in a multi-vma range each time new_vma_page() is called. If this is not acceptable, we can make the vma argument a pointer, both in new_vma_page() and it's caller unmap_and_move() so that the value held by the loop in migrate_pages() always passes down the last vma in which a page was found. This will require changes to all new_page_t functions passed to migrate_pages(). Is this necessary? For this patch to work, we can't bug check in vma_address() for pages outside the argument vma. This patch removes the BUG_ON(). All other callers [besides new_vma_page()] already check the return status. Tested on x86_64, 4 node NUMA platform. Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Acked-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent e1a1c99
sfp-util.h
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/types.h>
#include <asm/byteorder.h>
#define add_ssaaaa(sh, sl, ah, al, bh, bl) ({ \
unsigned int __sh = (ah); \
unsigned int __sl = (al); \
asm volatile( \
" alr %1,%3\n" \
" brc 12,0f\n" \
" ahi %0,1\n" \
"0: alr %0,%2" \
: "+&d" (__sh), "+d" (__sl) \
: "d" (bh), "d" (bl) : "cc"); \
(sh) = __sh; \
(sl) = __sl; \
})
#define sub_ddmmss(sh, sl, ah, al, bh, bl) ({ \
unsigned int __sh = (ah); \
unsigned int __sl = (al); \
asm volatile( \
" slr %1,%3\n" \
" brc 3,0f\n" \
" ahi %0,-1\n" \
"0: slr %0,%2" \
: "+&d" (__sh), "+d" (__sl) \
: "d" (bh), "d" (bl) : "cc"); \
(sh) = __sh; \
(sl) = __sl; \
})
/* a umul b = a mul b + (a>=2<<31) ? b<<32:0 + (b>=2<<31) ? a<<32:0 */
#define umul_ppmm(wh, wl, u, v) ({ \
unsigned int __wh = u; \
unsigned int __wl = v; \
asm volatile( \
" ltr 1,%0\n" \
" mr 0,%1\n" \
" jnm 0f\n" \
" alr 0,%1\n" \
"0: ltr %1,%1\n" \
" jnm 1f\n" \
" alr 0,%0\n" \
"1: lr %0,0\n" \
" lr %1,1\n" \
: "+d" (__wh), "+d" (__wl) \
: : "0", "1", "cc"); \
wh = __wh; \
wl = __wl; \
})
#ifdef __s390x__
#define udiv_qrnnd(q, r, n1, n0, d) \
do { unsigned long __n; \
unsigned int __r, __d; \
__n = ((unsigned long)(n1) << 32) + n0; \
__d = (d); \
(q) = __n / __d; \
(r) = __n % __d; \
} while (0)
#else
#define udiv_qrnnd(q, r, n1, n0, d) \
do { unsigned int __r; \
(q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
(r) = __r; \
} while (0)
extern unsigned long __udiv_qrnnd (unsigned int *, unsigned int,
unsigned int , unsigned int);
#endif
#define UDIV_NEEDS_NORMALIZATION 0
#define abort() return 0
#define __BYTE_ORDER __BIG_ENDIAN
![swh spinner](/static/img/swh-spinner.gif)
Computing file changes ...