Revision 3ad33b2436b545cbe8b28e53f3710432cad457ab authored by Lee Schermerhorn on 15 November 2007, 00:59:10 UTC, committed by Linus Torvalds on 15 November 2007, 02:45:38 UTC
We hit the BUG_ON() in mm/rmap.c:vma_address() when trying to migrate via mbind(MPOL_MF_MOVE) a non-anon region that spans multiple vmas. For anon-regions, we just fail to migrate any pages beyond the 1st vma in the range. This occurs because do_mbind() collects a list of pages to migrate by calling check_range(). check_range() walks the task's mm, spanning vmas as necessary, to collect the migratable pages into a list. Then, do_mbind() calls migrate_pages() passing the list of pages, a function to allocate new pages based on vma policy [new_vma_page()], and a pointer to the first vma of the range. For each page in the list, new_vma_page() calls page_address_in_vma() passing the page and the vma [first in range] to obtain the address to get for alloc_page_vma(). The page address is needed to get interleaving policy correct. If the pages in the list come from multiple vmas, eventually, new_page_address() will pass that page to page_address_in_vma() with the incorrect vma. For !PageAnon pages, this will result in a bug check in rmap.c:vma_address(). For anon pages, vma_address() will just return EFAULT and fail the migration. This patch modifies new_vma_page() to check the return value from page_address_in_vma(). If the return value is EFAULT, new_vma_page() searchs forward via vm_next for the vma that maps the page--i.e., that does not return EFAULT. This assumes that the pages in the list handed to migrate_pages() is in address order. This is currently case. The patch documents this assumption in a new comment block for new_vma_page(). If new_vma_page() cannot locate the vma mapping the page in a forward search in the mm, it will pass a NULL vma to alloc_page_vma(). This will result in the allocation using the task policy, if any, else system default policy. This situation is unlikely, but the patch documents this behavior with a comment. Note, this patch results in restarting from the first vma in a multi-vma range each time new_vma_page() is called. If this is not acceptable, we can make the vma argument a pointer, both in new_vma_page() and it's caller unmap_and_move() so that the value held by the loop in migrate_pages() always passes down the last vma in which a page was found. This will require changes to all new_page_t functions passed to migrate_pages(). Is this necessary? For this patch to work, we can't bug check in vma_address() for pages outside the argument vma. This patch removes the BUG_ON(). All other callers [besides new_vma_page()] already check the return status. Tested on x86_64, 4 node NUMA platform. Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Acked-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent e1a1c99
page.h
/*
* include/asm-s390/page.h
*
* S390 version
* Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
* Author(s): Hartmut Penner (hp@de.ibm.com)
*/
#ifndef _S390_PAGE_H
#define _S390_PAGE_H
#include <linux/const.h>
#include <asm/types.h>
/* PAGE_SHIFT determines the page size */
#define PAGE_SHIFT 12
#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
#define PAGE_DEFAULT_ACC 0
#define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4)
#ifdef __KERNEL__
#include <asm/setup.h>
#ifndef __ASSEMBLY__
static inline void clear_page(void *page)
{
register unsigned long reg1 asm ("1") = 0;
register void *reg2 asm ("2") = page;
register unsigned long reg3 asm ("3") = 4096;
asm volatile(
" mvcl 2,0"
: "+d" (reg2), "+d" (reg3) : "d" (reg1) : "memory", "cc");
}
static inline void copy_page(void *to, void *from)
{
if (MACHINE_HAS_MVPG) {
register unsigned long reg0 asm ("0") = 0;
asm volatile(
" mvpg %0,%1"
: : "a" (to), "a" (from), "d" (reg0)
: "memory", "cc");
} else
asm volatile(
" mvc 0(256,%0),0(%1)\n"
" mvc 256(256,%0),256(%1)\n"
" mvc 512(256,%0),512(%1)\n"
" mvc 768(256,%0),768(%1)\n"
" mvc 1024(256,%0),1024(%1)\n"
" mvc 1280(256,%0),1280(%1)\n"
" mvc 1536(256,%0),1536(%1)\n"
" mvc 1792(256,%0),1792(%1)\n"
" mvc 2048(256,%0),2048(%1)\n"
" mvc 2304(256,%0),2304(%1)\n"
" mvc 2560(256,%0),2560(%1)\n"
" mvc 2816(256,%0),2816(%1)\n"
" mvc 3072(256,%0),3072(%1)\n"
" mvc 3328(256,%0),3328(%1)\n"
" mvc 3584(256,%0),3584(%1)\n"
" mvc 3840(256,%0),3840(%1)\n"
: : "a" (to), "a" (from) : "memory");
}
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
/*
* These are used to make use of C type-checking..
*/
typedef struct { unsigned long pgprot; } pgprot_t;
typedef struct { unsigned long pte; } pte_t;
#define pte_val(x) ((x).pte)
#define pgprot_val(x) ((x).pgprot)
#ifndef __s390x__
typedef struct { unsigned long pmd; } pmd_t;
typedef struct { unsigned long pud; } pud_t;
typedef struct {
unsigned long pgd0;
unsigned long pgd1;
unsigned long pgd2;
unsigned long pgd3;
} pgd_t;
#define pmd_val(x) ((x).pmd)
#define pud_val(x) ((x).pud)
#define pgd_val(x) ((x).pgd0)
#else /* __s390x__ */
typedef struct {
unsigned long pmd0;
unsigned long pmd1;
} pmd_t;
typedef struct { unsigned long pud; } pud_t;
typedef struct { unsigned long pgd; } pgd_t;
#define pmd_val(x) ((x).pmd0)
#define pmd_val1(x) ((x).pmd1)
#define pud_val(x) ((x).pud)
#define pgd_val(x) ((x).pgd)
#endif /* __s390x__ */
#define __pte(x) ((pte_t) { (x) } )
#define __pmd(x) ((pmd_t) { (x) } )
#define __pgd(x) ((pgd_t) { (x) } )
#define __pgprot(x) ((pgprot_t) { (x) } )
/* default storage key used for all pages */
extern unsigned int default_storage_key;
static inline void
page_set_storage_key(unsigned long addr, unsigned int skey)
{
asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
}
static inline unsigned int
page_get_storage_key(unsigned long addr)
{
unsigned int skey;
asm volatile("iske %0,%1" : "=d" (skey) : "a" (addr), "0" (0));
return skey;
}
extern unsigned long max_pfn;
static inline int pfn_valid(unsigned long pfn)
{
unsigned long dummy;
int ccode;
if (pfn >= max_pfn)
return 0;
asm volatile(
" lra %0,0(%2)\n"
" ipm %1\n"
" srl %1,28\n"
: "=d" (dummy), "=d" (ccode)
: "a" (pfn << PAGE_SHIFT)
: "cc");
return !ccode;
}
#endif /* !__ASSEMBLY__ */
/* to align the pointer to the (next) page boundary */
#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
#define __PAGE_OFFSET 0x0UL
#define PAGE_OFFSET 0x0UL
#define __pa(x) (unsigned long)(x)
#define __va(x) (void *)(unsigned long)(x)
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
#include <asm-generic/memory_model.h>
#include <asm-generic/page.h>
#endif /* __KERNEL__ */
#endif /* _S390_PAGE_H */
![swh spinner](/static/img/swh-spinner.gif)
Computing file changes ...