Revision 3ad33b2436b545cbe8b28e53f3710432cad457ab authored by Lee Schermerhorn on 15 November 2007, 00:59:10 UTC, committed by Linus Torvalds on 15 November 2007, 02:45:38 UTC
We hit the BUG_ON() in mm/rmap.c:vma_address() when trying to migrate via mbind(MPOL_MF_MOVE) a non-anon region that spans multiple vmas. For anon-regions, we just fail to migrate any pages beyond the 1st vma in the range. This occurs because do_mbind() collects a list of pages to migrate by calling check_range(). check_range() walks the task's mm, spanning vmas as necessary, to collect the migratable pages into a list. Then, do_mbind() calls migrate_pages() passing the list of pages, a function to allocate new pages based on vma policy [new_vma_page()], and a pointer to the first vma of the range. For each page in the list, new_vma_page() calls page_address_in_vma() passing the page and the vma [first in range] to obtain the address to get for alloc_page_vma(). The page address is needed to get interleaving policy correct. If the pages in the list come from multiple vmas, eventually, new_page_address() will pass that page to page_address_in_vma() with the incorrect vma. For !PageAnon pages, this will result in a bug check in rmap.c:vma_address(). For anon pages, vma_address() will just return EFAULT and fail the migration. This patch modifies new_vma_page() to check the return value from page_address_in_vma(). If the return value is EFAULT, new_vma_page() searchs forward via vm_next for the vma that maps the page--i.e., that does not return EFAULT. This assumes that the pages in the list handed to migrate_pages() is in address order. This is currently case. The patch documents this assumption in a new comment block for new_vma_page(). If new_vma_page() cannot locate the vma mapping the page in a forward search in the mm, it will pass a NULL vma to alloc_page_vma(). This will result in the allocation using the task policy, if any, else system default policy. This situation is unlikely, but the patch documents this behavior with a comment. Note, this patch results in restarting from the first vma in a multi-vma range each time new_vma_page() is called. If this is not acceptable, we can make the vma argument a pointer, both in new_vma_page() and it's caller unmap_and_move() so that the value held by the loop in migrate_pages() always passes down the last vma in which a page was found. This will require changes to all new_page_t functions passed to migrate_pages(). Is this necessary? For this patch to work, we can't bug check in vma_address() for pages outside the argument vma. This patch removes the BUG_ON(). All other callers [besides new_vma_page()] already check the return status. Tested on x86_64, 4 node NUMA platform. Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Acked-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent e1a1c99
File | Mode | Size |
---|---|---|
Kbuild | -rw-r--r-- | 228 bytes |
a.out.h | -rw-r--r-- | 1.0 KB |
appldata.h | -rw-r--r-- | 2.2 KB |
atomic.h | -rw-r--r-- | 7.1 KB |
auxvec.h | -rw-r--r-- | 62 bytes |
bitops.h | -rw-r--r-- | 22.5 KB |
bug.h | -rw-r--r-- | 1.4 KB |
bugs.h | -rw-r--r-- | 477 bytes |
byteorder.h | -rw-r--r-- | 2.5 KB |
cache.h | -rw-r--r-- | 418 bytes |
cacheflush.h | -rw-r--r-- | 1.1 KB |
ccwdev.h | -rw-r--r-- | 5.8 KB |
ccwgroup.h | -rw-r--r-- | 1.9 KB |
checksum.h | -rw-r--r-- | 4.1 KB |
chpid.h | -rw-r--r-- | 997 bytes |
cio.h | -rw-r--r-- | 9.6 KB |
cmb.h | -rw-r--r-- | 2.1 KB |
compat.h | -rw-r--r-- | 5.1 KB |
cpcmd.h | -rw-r--r-- | 1.2 KB |
cpu.h | -rw-r--r-- | 506 bytes |
cputime.h | -rw-r--r-- | 3.7 KB |
current.h | -rw-r--r-- | 470 bytes |
dasd.h | -rw-r--r-- | 10.3 KB |
debug.h | -rw-r--r-- | 8.0 KB |
delay.h | -rw-r--r-- | 544 bytes |
device.h | -rw-r--r-- | 129 bytes |
diag.h | -rw-r--r-- | 1.0 KB |
div64.h | -rw-r--r-- | 31 bytes |
dma.h | -rw-r--r-- | 241 bytes |
ebcdic.h | -rw-r--r-- | 1.5 KB |
elf.h | -rw-r--r-- | 8.4 KB |
emergency-restart.h | -rw-r--r-- | 149 bytes |
errno.h | -rw-r--r-- | 144 bytes |
etr.h | -rw-r--r-- | 6.5 KB |
extmem.h | -rw-r--r-- | 829 bytes |
fb.h | -rw-r--r-- | 205 bytes |
fcntl.h | -rw-r--r-- | 31 bytes |
futex.h | -rw-r--r-- | 1.3 KB |
hardirq.h | -rw-r--r-- | 931 bytes |
idals.h | -rw-r--r-- | 5.6 KB |
io.h | -rw-r--r-- | 1.0 KB |
ioctl.h | -rw-r--r-- | 31 bytes |
ioctls.h | -rw-r--r-- | 2.4 KB |
ipcbuf.h | -rw-r--r-- | 675 bytes |
ipl.h | -rw-r--r-- | 2.6 KB |
irq.h | -rw-r--r-- | 429 bytes |
irq_regs.h | -rw-r--r-- | 34 bytes |
irqflags.h | -rw-r--r-- | 2.3 KB |
kdebug.h | -rw-r--r-- | 353 bytes |
kexec.h | -rw-r--r-- | 1.0 KB |
kmap_types.h | -rw-r--r-- | 318 bytes |
kprobes.h | -rw-r--r-- | 3.1 KB |
linkage.h | -rw-r--r-- | 86 bytes |
local.h | -rw-r--r-- | 31 bytes |
lowcore.h | -rw-r--r-- | 16.0 KB |
mathemu.h | -rw-r--r-- | 881 bytes |
mman.h | -rw-r--r-- | 718 bytes |
mmu.h | -rw-r--r-- | 115 bytes |
mmu_context.h | -rw-r--r-- | 1.7 KB |
module.h | -rw-r--r-- | 1.0 KB |
monwriter.h | -rw-r--r-- | 915 bytes |
msgbuf.h | -rw-r--r-- | 1.1 KB |
mutex.h | -rw-r--r-- | 308 bytes |
namei.h | -rw-r--r-- | 387 bytes |
page.h | -rw-r--r-- | 4.4 KB |
param.h | -rw-r--r-- | 533 bytes |
pci.h | -rw-r--r-- | 255 bytes |
percpu.h | -rw-r--r-- | 2.5 KB |
pgalloc.h | -rw-r--r-- | 4.2 KB |
pgtable.h | -rw-r--r-- | 29.9 KB |
poll.h | -rw-r--r-- | 30 bytes |
posix_types.h | -rw-r--r-- | 3.2 KB |
processor.h | -rw-r--r-- | 9.8 KB |
ptrace.h | -rw-r--r-- | 11.7 KB |
qdio.h | -rw-r--r-- | 12.7 KB |
qeth.h | -rw-r--r-- | 2.0 KB |
reset.h | -rw-r--r-- | 464 bytes |
resource.h | -rw-r--r-- | 205 bytes |
rwsem.h | -rw-r--r-- | 8.8 KB |
s390_ext.h | -rw-r--r-- | 876 bytes |
s390_rdev.h | -rw-r--r-- | 453 bytes |
scatterlist.h | -rw-r--r-- | 383 bytes |
sclp.h | -rw-r--r-- | 955 bytes |
sections.h | -rw-r--r-- | 128 bytes |
segment.h | -rw-r--r-- | 54 bytes |
semaphore.h | -rw-r--r-- | 2.4 KB |
sembuf.h | -rw-r--r-- | 801 bytes |
setup.h | -rw-r--r-- | 2.9 KB |
sfp-machine.h | -rw-r--r-- | 4.7 KB |
sfp-util.h | -rw-r--r-- | 1.8 KB |
shmbuf.h | -rw-r--r-- | 1.3 KB |
shmparam.h | -rw-r--r-- | 281 bytes |
sigcontext.h | -rw-r--r-- | 1.4 KB |
siginfo.h | -rw-r--r-- | 273 bytes |
signal.h | -rw-r--r-- | 4.1 KB |
sigp.h | -rw-r--r-- | 2.6 KB |
smp.h | -rw-r--r-- | 2.6 KB |
socket.h | -rw-r--r-- | 1.3 KB |
sockios.h | -rw-r--r-- | 451 bytes |
spinlock.h | -rw-r--r-- | 4.3 KB |
spinlock_types.h | -rw-r--r-- | 436 bytes |
stat.h | -rw-r--r-- | 2.9 KB |
statfs.h | -rw-r--r-- | 1.0 KB |
string.h | -rw-r--r-- | 3.5 KB |
suspend.h | -rw-r--r-- | 67 bytes |
system.h | -rw-r--r-- | 9.4 KB |
tape390.h | -rw-r--r-- | 2.7 KB |
termbits.h | -rw-r--r-- | 4.5 KB |
termios.h | -rw-r--r-- | 1.4 KB |
thread_info.h | -rw-r--r-- | 3.5 KB |
timer.h | -rw-r--r-- | 1.1 KB |
timex.h | -rw-r--r-- | 1.8 KB |
tlb.h | -rw-r--r-- | 3.6 KB |
tlbflush.h | -rw-r--r-- | 2.9 KB |
todclk.h | -rw-r--r-- | 519 bytes |
topology.h | -rw-r--r-- | 128 bytes |
types.h | -rw-r--r-- | 1.7 KB |
uaccess.h | -rw-r--r-- | 9.8 KB |
ucontext.h | -rw-r--r-- | 393 bytes |
unaligned.h | -rw-r--r-- | 503 bytes |
unistd.h | -rw-r--r-- | 12.1 KB |
user.h | -rw-r--r-- | 3.3 KB |
vtoc.h | -rw-r--r-- | 6.9 KB |
xor.h | -rw-r--r-- | 29 bytes |
zcrypt.h | -rw-r--r-- | 9.3 KB |
![swh spinner](/static/img/swh-spinner.gif)
Computing file changes ...