Revision 85bd839983778fcd0c1c043327b14a046e979b39 authored by Gu Zheng on 10 June 2015, 18:14:43 UTC, committed by Linus Torvalds on 10 June 2015, 23:43:43 UTC
Izumi found the following oops when hot re-adding a node:

    BUG: unable to handle kernel paging request at ffffc90008963690
    IP: __wake_up_bit+0x20/0x70
    Oops: 0000 [#1] SMP
    CPU: 68 PID: 1237 Comm: rs:main Q:Reg Not tainted 4.1.0-rc5 #80
    Hardware name: FUJITSU PRIMEQUEST2800E/SB, BIOS PRIMEQUEST 2000 Series BIOS Version 1.87 04/28/2015
    task: ffff880838df8000 ti: ffff880017b94000 task.ti: ffff880017b94000
    RIP: 0010:[<ffffffff810dff80>]  [<ffffffff810dff80>] __wake_up_bit+0x20/0x70
    RSP: 0018:ffff880017b97be8  EFLAGS: 00010246
    RAX: ffffc90008963690 RBX: 00000000003c0000 RCX: 000000000000a4c9
    RDX: 0000000000000000 RSI: ffffea101bffd500 RDI: ffffc90008963648
    RBP: ffff880017b97c08 R08: 0000000002000020 R09: 0000000000000000
    R10: 0000000000000000 R11: 0000000000000000 R12: ffff8a0797c73800
    R13: ffffea101bffd500 R14: 0000000000000001 R15: 00000000003c0000
    FS:  00007fcc7ffff700(0000) GS:ffff880874800000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: ffffc90008963690 CR3: 0000000836761000 CR4: 00000000001407e0
    Call Trace:
      unlock_page+0x6d/0x70
      generic_write_end+0x53/0xb0
      xfs_vm_write_end+0x29/0x80 [xfs]
      generic_perform_write+0x10a/0x1e0
      xfs_file_buffered_aio_write+0x14d/0x3e0 [xfs]
      xfs_file_write_iter+0x79/0x120 [xfs]
      __vfs_write+0xd4/0x110
      vfs_write+0xac/0x1c0
      SyS_write+0x58/0xd0
      system_call_fastpath+0x12/0x76
    Code: 5d c3 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 48 83 ec 20 65 48 8b 04 25 28 00 00 00 48 89 45 f8 31 c0 48 8d 47 48 <48> 39 47 48 48 c7 45 e8 00 00 00 00 48 c7 45 f0 00 00 00 00 48
    RIP  [<ffffffff810dff80>] __wake_up_bit+0x20/0x70
     RSP <ffff880017b97be8>
    CR2: ffffc90008963690

Reproduce method (re-add a node)::
  Hot-add nodeA --> remove nodeA --> hot-add nodeA (panic)

This seems an use-after-free problem, and the root cause is
zone->wait_table was not set to *NULL* after free it in
try_offline_node.

When hot re-add a node, we will reuse the pgdat of it, so does the zone
struct, and when add pages to the target zone, it will init the zone
first (including the wait_table) if the zone is not initialized.  The
judgement of zone initialized is based on zone->wait_table:

	static inline bool zone_is_initialized(struct zone *zone)
	{
		return !!zone->wait_table;
	}

so if we do not set the zone->wait_table to *NULL* after free it, the
memory hotplug routine will skip the init of new zone when hot re-add
the node, and the wait_table still points to the freed memory, then we
will access the invalid address when trying to wake up the waiting
people after the i/o operation with the page is done, such as mentioned
above.

Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Reported-by: Taku Izumi <izumi.taku@jp.fujitsu.com>
Reviewed by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 5879ae5
Raw File
cs5520.c
/*
 *	IDE tuning and bus mastering support for the CS5510/CS5520
 *	chipsets
 *
 *	The CS5510/CS5520 are slightly unusual devices. Unlike the 
 *	typical IDE controllers they do bus mastering with the drive in
 *	PIO mode and smarter silicon.
 *
 *	The practical upshot of this is that we must always tune the
 *	drive for the right PIO mode. We must also ignore all the blacklists
 *	and the drive bus mastering DMA information.
 *
 *	*** This driver is strictly experimental ***
 *
 *	(c) Copyright Red Hat Inc 2002
 * 
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation; either version 2, or (at your option) any
 * later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * For the avoidance of doubt the "preferred form" of this code is one which
 * is in an open non patent encumbered format. Where cryptographic key signing
 * forms part of the process of creating an executable the information
 * including keys needed to generate an equivalently functional executable
 * are deemed to be part of the source code.
 *
 */
 
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/ide.h>
#include <linux/dma-mapping.h>

#define DRV_NAME "cs5520"

struct pio_clocks
{
	int address;
	int assert;
	int recovery;
};

static struct pio_clocks cs5520_pio_clocks[]={
	{3, 6, 11},
	{2, 5, 6},
	{1, 4, 3},
	{1, 3, 2},
	{1, 2, 1}
};

static void cs5520_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive)
{
	struct pci_dev *pdev = to_pci_dev(hwif->dev);
	int controller = drive->dn > 1 ? 1 : 0;
	const u8 pio = drive->pio_mode - XFER_PIO_0;

	/* 8bit CAT/CRT - 8bit command timing for channel */
	pci_write_config_byte(pdev, 0x62 + controller, 
		(cs5520_pio_clocks[pio].recovery << 4) |
		(cs5520_pio_clocks[pio].assert));

	/* 0x64 - 16bit Primary, 0x68 - 16bit Secondary */

	/* FIXME: should these use address ? */
	/* Data read timing */
	pci_write_config_byte(pdev, 0x64 + 4*controller + (drive->dn&1),
		(cs5520_pio_clocks[pio].recovery << 4) |
		(cs5520_pio_clocks[pio].assert));
	/* Write command timing */
	pci_write_config_byte(pdev, 0x66 + 4*controller + (drive->dn&1),
		(cs5520_pio_clocks[pio].recovery << 4) |
		(cs5520_pio_clocks[pio].assert));
}

static void cs5520_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive)
{
	printk(KERN_ERR "cs55x0: bad ide timing.\n");

	drive->pio_mode = XFER_PIO_0 + 0;
	cs5520_set_pio_mode(hwif, drive);
}

static const struct ide_port_ops cs5520_port_ops = {
	.set_pio_mode		= cs5520_set_pio_mode,
	.set_dma_mode		= cs5520_set_dma_mode,
};

static const struct ide_port_info cyrix_chipset = {
	.name		= DRV_NAME,
	.enablebits	= { { 0x60, 0x01, 0x01 }, { 0x60, 0x02, 0x02 } },
	.port_ops	= &cs5520_port_ops,
	.host_flags	= IDE_HFLAG_ISA_PORTS | IDE_HFLAG_CS5520,
	.pio_mask	= ATA_PIO4,
};

/*
 *	The 5510/5520 are a bit weird. They don't quite set up the way
 *	the PCI helper layer expects so we must do much of the set up 
 *	work longhand.
 */
 
static int cs5520_init_one(struct pci_dev *dev, const struct pci_device_id *id)
{
	const struct ide_port_info *d = &cyrix_chipset;
	struct ide_hw hw[2], *hws[] = { NULL, NULL };

	ide_setup_pci_noise(dev, d);

	/* We must not grab the entire device, it has 'ISA' space in its
	 * BARS too and we will freak out other bits of the kernel
	 */
	if (pci_enable_device_io(dev)) {
		printk(KERN_WARNING "%s: Unable to enable 55x0.\n", d->name);
		return -ENODEV;
	}
	pci_set_master(dev);
	if (dma_set_mask(&dev->dev, DMA_BIT_MASK(32))) {
		printk(KERN_WARNING "%s: No suitable DMA available.\n",
			d->name);
		return -ENODEV;
	}

	/*
	 *	Now the chipset is configured we can let the core
	 *	do all the device setup for us
	 */

	ide_pci_setup_ports(dev, d, &hw[0], &hws[0]);
	hw[0].irq = 14;
	hw[1].irq = 15;

	return ide_host_add(d, hws, 2, NULL);
}

static const struct pci_device_id cs5520_pci_tbl[] = {
	{ PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5510), 0 },
	{ PCI_VDEVICE(CYRIX, PCI_DEVICE_ID_CYRIX_5520), 1 },
	{ 0, },
};
MODULE_DEVICE_TABLE(pci, cs5520_pci_tbl);

static struct pci_driver cs5520_pci_driver = {
	.name		= "Cyrix_IDE",
	.id_table	= cs5520_pci_tbl,
	.probe		= cs5520_init_one,
	.suspend	= ide_pci_suspend,
	.resume		= ide_pci_resume,
};

static int __init cs5520_ide_init(void)
{
	return ide_pci_register_driver(&cs5520_pci_driver);
}

module_init(cs5520_ide_init);

MODULE_AUTHOR("Alan Cox");
MODULE_DESCRIPTION("PCI driver module for Cyrix 5510/5520 IDE");
MODULE_LICENSE("GPL");
back to top