Revision 475049809977bf3975d78f2d2fd992e19ce2d59e authored by Roel Kluin on 10 March 2009, 19:55:45 UTC, committed by Linus Torvalds on 10 March 2009, 22:55:10 UTC
get_nid_for_pfn() returns int

Presumably the (nid < 0) case has never happened.

We do know that it is happening on one system while creating a symlink for
a memory section so it should also happen on the same system if
unregister_mem_sect_under_nodes() were called to remove the same symlink.

The test was actually added in response to a problem with an earlier
version reported by Yasunori Goto where one or more of the leading pages
of a memory section on the 2nd node of one of his systems was
uninitialized because I believe they coincided with a memory hole.

That earlier version did not ignore uninitialized pages and determined
the nid by considering only the 1st page of each memory section.  This
caused the symlink to the 1st memory section on the 2nd node to be
incorrectly created in /sys/devices/system/node/node0 instead of
/sys/devices/system/node/node1.  The problem was fixed by adding the
test to skip over uninitialized pages.

I suspect we have not seen any reports of the non-removal
of a symlink due to the incorrect declaration of the nid
variable in unregister_mem_sect_under_nodes() because
  - systems where a memory section could have an uninitialized
    range of leading pages are probably rare.
  - memory remove is probably not done very frequently on the
    systems that are capable of demonstrating the problem.
  - lingering symlink(s) that should have been removed may
    have simply gone unnoticed.

[garyhade@us.ibm.com: wrote changelog]
Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Cc: Gary Hade <garyhade@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 1abaf33
Raw File
ide-pm.c
#include <linux/kernel.h>
#include <linux/ide.h>
#include <linux/hdreg.h>

int generic_ide_suspend(struct device *dev, pm_message_t mesg)
{
	ide_drive_t *drive = dev->driver_data, *pair = ide_get_pair_dev(drive);
	ide_hwif_t *hwif = drive->hwif;
	struct request *rq;
	struct request_pm_state rqpm;
	ide_task_t args;
	int ret;

	/* call ACPI _GTM only once */
	if ((drive->dn & 1) == 0 || pair == NULL)
		ide_acpi_get_timing(hwif);

	memset(&rqpm, 0, sizeof(rqpm));
	memset(&args, 0, sizeof(args));
	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
	rq->cmd_type = REQ_TYPE_PM_SUSPEND;
	rq->special = &args;
	rq->data = &rqpm;
	rqpm.pm_step = IDE_PM_START_SUSPEND;
	if (mesg.event == PM_EVENT_PRETHAW)
		mesg.event = PM_EVENT_FREEZE;
	rqpm.pm_state = mesg.event;

	ret = blk_execute_rq(drive->queue, NULL, rq, 0);
	blk_put_request(rq);

	/* call ACPI _PS3 only after both devices are suspended */
	if (ret == 0 && ((drive->dn & 1) || pair == NULL))
		ide_acpi_set_state(hwif, 0);

	return ret;
}

int generic_ide_resume(struct device *dev)
{
	ide_drive_t *drive = dev->driver_data, *pair = ide_get_pair_dev(drive);
	ide_hwif_t *hwif = drive->hwif;
	struct request *rq;
	struct request_pm_state rqpm;
	ide_task_t args;
	int err;

	/* call ACPI _PS0 / _STM only once */
	if ((drive->dn & 1) == 0 || pair == NULL) {
		ide_acpi_set_state(hwif, 1);
		ide_acpi_push_timing(hwif);
	}

	ide_acpi_exec_tfs(drive);

	memset(&rqpm, 0, sizeof(rqpm));
	memset(&args, 0, sizeof(args));
	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
	rq->cmd_type = REQ_TYPE_PM_RESUME;
	rq->cmd_flags |= REQ_PREEMPT;
	rq->special = &args;
	rq->data = &rqpm;
	rqpm.pm_step = IDE_PM_START_RESUME;
	rqpm.pm_state = PM_EVENT_ON;

	err = blk_execute_rq(drive->queue, NULL, rq, 1);
	blk_put_request(rq);

	if (err == 0 && dev->driver) {
		struct ide_driver *drv = to_ide_driver(dev->driver);

		if (drv->resume)
			drv->resume(drive);
	}

	return err;
}

void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
{
	struct request_pm_state *pm = rq->data;

#ifdef DEBUG_PM
	printk(KERN_INFO "%s: complete_power_step(step: %d)\n",
		drive->name, pm->pm_step);
#endif
	if (drive->media != ide_disk)
		return;

	switch (pm->pm_step) {
	case IDE_PM_FLUSH_CACHE:	/* Suspend step 1 (flush cache) */
		if (pm->pm_state == PM_EVENT_FREEZE)
			pm->pm_step = IDE_PM_COMPLETED;
		else
			pm->pm_step = IDE_PM_STANDBY;
		break;
	case IDE_PM_STANDBY:		/* Suspend step 2 (standby) */
		pm->pm_step = IDE_PM_COMPLETED;
		break;
	case IDE_PM_RESTORE_PIO:	/* Resume step 1 (restore PIO) */
		pm->pm_step = IDE_PM_IDLE;
		break;
	case IDE_PM_IDLE:		/* Resume step 2 (idle)*/
		pm->pm_step = IDE_PM_RESTORE_DMA;
		break;
	}
}

ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
{
	struct request_pm_state *pm = rq->data;
	ide_task_t *args = rq->special;

	memset(args, 0, sizeof(*args));

	switch (pm->pm_step) {
	case IDE_PM_FLUSH_CACHE:	/* Suspend step 1 (flush cache) */
		if (drive->media != ide_disk)
			break;
		/* Not supported? Switch to next step now. */
		if (ata_id_flush_enabled(drive->id) == 0 ||
		    (drive->dev_flags & IDE_DFLAG_WCACHE) == 0) {
			ide_complete_power_step(drive, rq);
			return ide_stopped;
		}
		if (ata_id_flush_ext_enabled(drive->id))
			args->tf.command = ATA_CMD_FLUSH_EXT;
		else
			args->tf.command = ATA_CMD_FLUSH;
		goto out_do_tf;
	case IDE_PM_STANDBY:		/* Suspend step 2 (standby) */
		args->tf.command = ATA_CMD_STANDBYNOW1;
		goto out_do_tf;
	case IDE_PM_RESTORE_PIO:	/* Resume step 1 (restore PIO) */
		ide_set_max_pio(drive);
		/*
		 * skip IDE_PM_IDLE for ATAPI devices
		 */
		if (drive->media != ide_disk)
			pm->pm_step = IDE_PM_RESTORE_DMA;
		else
			ide_complete_power_step(drive, rq);
		return ide_stopped;
	case IDE_PM_IDLE:		/* Resume step 2 (idle) */
		args->tf.command = ATA_CMD_IDLEIMMEDIATE;
		goto out_do_tf;
	case IDE_PM_RESTORE_DMA:	/* Resume step 3 (restore DMA) */
		/*
		 * Right now, all we do is call ide_set_dma(drive),
		 * we could be smarter and check for current xfer_speed
		 * in struct drive etc...
		 */
		if (drive->hwif->dma_ops == NULL)
			break;
		/*
		 * TODO: respect IDE_DFLAG_USING_DMA
		 */
		ide_set_dma(drive);
		break;
	}

	pm->pm_step = IDE_PM_COMPLETED;
	return ide_stopped;

out_do_tf:
	args->tf_flags	 = IDE_TFLAG_TF | IDE_TFLAG_DEVICE;
	args->data_phase = TASKFILE_NO_DATA;
	return do_rw_taskfile(drive, args);
}

/**
 *	ide_complete_pm_request - end the current Power Management request
 *	@drive: target drive
 *	@rq: request
 *
 *	This function cleans up the current PM request and stops the queue
 *	if necessary.
 */
void ide_complete_pm_request(ide_drive_t *drive, struct request *rq)
{
	struct request_queue *q = drive->queue;
	unsigned long flags;

#ifdef DEBUG_PM
	printk("%s: completing PM request, %s\n", drive->name,
	       blk_pm_suspend_request(rq) ? "suspend" : "resume");
#endif
	spin_lock_irqsave(q->queue_lock, flags);
	if (blk_pm_suspend_request(rq))
		blk_stop_queue(q);
	else
		drive->dev_flags &= ~IDE_DFLAG_BLOCKED;
	spin_unlock_irqrestore(q->queue_lock, flags);

	drive->hwif->rq = NULL;

	if (blk_end_request(rq, 0, 0))
		BUG();
}

void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
{
	struct request_pm_state *pm = rq->data;

	if (blk_pm_suspend_request(rq) &&
	    pm->pm_step == IDE_PM_START_SUSPEND)
		/* Mark drive blocked when starting the suspend sequence. */
		drive->dev_flags |= IDE_DFLAG_BLOCKED;
	else if (blk_pm_resume_request(rq) &&
		 pm->pm_step == IDE_PM_START_RESUME) {
		/*
		 * The first thing we do on wakeup is to wait for BSY bit to
		 * go away (with a looong timeout) as a drive on this hwif may
		 * just be POSTing itself.
		 * We do that before even selecting as the "other" device on
		 * the bus may be broken enough to walk on our toes at this
		 * point.
		 */
		ide_hwif_t *hwif = drive->hwif;
		struct request_queue *q = drive->queue;
		unsigned long flags;
		int rc;
#ifdef DEBUG_PM
		printk("%s: Wakeup request inited, waiting for !BSY...\n", drive->name);
#endif
		rc = ide_wait_not_busy(hwif, 35000);
		if (rc)
			printk(KERN_WARNING "%s: bus not ready on wakeup\n", drive->name);
		SELECT_DRIVE(drive);
		hwif->tp_ops->set_irq(hwif, 1);
		rc = ide_wait_not_busy(hwif, 100000);
		if (rc)
			printk(KERN_WARNING "%s: drive not ready on wakeup\n", drive->name);

		spin_lock_irqsave(q->queue_lock, flags);
		blk_start_queue(q);
		spin_unlock_irqrestore(q->queue_lock, flags);
	}
}
back to top