Revision 93d2175d3d31f11ba04fcfa0e9a496a1b4bc8b34 authored by Yinghai Lu on 14 May 2011, 01:06:17 UTC, committed by Linus Torvalds on 17 May 2011, 01:33:35 UTC
During pci remove/rescan testing found:

  pci 0000:c0:03.0: PCI bridge to [bus c4-c9]
  pci 0000:c0:03.0:   bridge window [io  0x1000-0x0fff]
  pci 0000:c0:03.0:   bridge window [mem 0xf0000000-0xf00fffff]
  pci 0000:c0:03.0:   bridge window [mem 0xfc180000000-0xfc197ffffff 64bit pref]
  pci 0000:c0:03.0: device not available (can't reserve [io  0x1000-0x0fff])
  pci 0000:c0:03.0: Error enabling bridge (-22), continuing
  pci 0000:c0:03.0: enabling bus mastering
  pci 0000:c0:03.0: setting latency timer to 64
  pcieport 0000:c0:03.0: device not available (can't reserve [io  0x1000-0x0fff])
  pcieport: probe of 0000:c0:03.0 failed with error -22

This bug was caused by commit c8adf9a3e873 ("PCI: pre-allocate
additional resources to devices only after successful allocation of
essential resources.")

After that commit, pci_hotplug_io_size is changed to additional_io_size
from minium size.  So it will not go through resource_size(res) != 0
path, and will not be reset.

The root cause is: pci_bridge_check_ranges will set RESOURCE_IO flag for
pci bridge, and later if children do not need IO resource.  those bridge
resources will not need to be allocated.  but flags is still there.
that will confuse the the pci_enable_bridges later.

related code:

   static void assign_requested_resources_sorted(struct resource_list *head,
                                    struct resource_list_x *fail_head)
   {
           struct resource *res;
           struct resource_list *list;
           int idx;

           for (list = head->next; list; list = list->next) {
                   res = list->res;
                   idx = res - &list->dev->resource[0];
                   if (resource_size(res) && pci_assign_resource(list->dev, idx)) {
   ...
                           reset_resource(res);
                   }
           }
   }

At last, We have to clear the flags in pbus_size_mem/io when requested
size == 0 and !add_head.  becasue this case it will not go through
adjust_resources_sorted().

Just make size1 = size0 when !add_head. it will make flags get cleared.

At the same time when requested size == 0, add_size != 0, will still
have in head and add_list.  because we do not clear the flags for it.

After this, we will get right result:

  pci 0000:c0:03.0: PCI bridge to [bus c4-c9]
  pci 0000:c0:03.0:   bridge window [io  disabled]
  pci 0000:c0:03.0:   bridge window [mem 0xf0000000-0xf00fffff]
  pci 0000:c0:03.0:   bridge window [mem 0xfc180000000-0xfc197ffffff 64bit pref]
  pci 0000:c0:03.0: enabling bus mastering
  pci 0000:c0:03.0: setting latency timer to 64
  pcieport 0000:c0:03.0: setting latency timer to 64
  pcieport 0000:c0:03.0: irq 160 for MSI/MSI-X
  pcieport 0000:c0:03.0: Signaling PME through PCIe PME interrupt
  pci 0000:c4:00.0: Signaling PME through PCIe PME interrupt
  pcie_pme 0000:c0:03.0:pcie01: service driver pcie_pme loaded
  aer 0000:c0:03.0:pcie02: service driver aer loaded
  pciehp 0000:c0:03.0:pcie04: Hotplug Controller:

v3: more simple fix. also fix one typo in pbus_size_mem

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Reviewed-by: Ram Pai <linuxram@us.ibm.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent df8d06a
Raw File
do_mounts_md.c
#include <linux/delay.h>
#include <linux/raid/md_u.h>
#include <linux/raid/md_p.h>

#include "do_mounts.h"

/*
 * When md (and any require personalities) are compiled into the kernel
 * (not a module), arrays can be assembles are boot time using with AUTODETECT
 * where specially marked partitions are registered with md_autodetect_dev(),
 * and with MD_BOOT where devices to be collected are given on the boot line
 * with md=.....
 * The code for that is here.
 */

#ifdef CONFIG_MD_AUTODETECT
static int __initdata raid_noautodetect;
#else
static int __initdata raid_noautodetect=1;
#endif
static int __initdata raid_autopart;

static struct {
	int minor;
	int partitioned;
	int level;
	int chunk;
	char *device_names;
} md_setup_args[256] __initdata;

static int md_setup_ents __initdata;

/*
 * Parse the command-line parameters given our kernel, but do not
 * actually try to invoke the MD device now; that is handled by
 * md_setup_drive after the low-level disk drivers have initialised.
 *
 * 27/11/1999: Fixed to work correctly with the 2.3 kernel (which
 *             assigns the task of parsing integer arguments to the
 *             invoked program now).  Added ability to initialise all
 *             the MD devices (by specifying multiple "md=" lines)
 *             instead of just one.  -- KTK
 * 18May2000: Added support for persistent-superblock arrays:
 *             md=n,0,factor,fault,device-list   uses RAID0 for device n
 *             md=n,-1,factor,fault,device-list  uses LINEAR for device n
 *             md=n,device-list      reads a RAID superblock from the devices
 *             elements in device-list are read by name_to_kdev_t so can be
 *             a hex number or something like /dev/hda1 /dev/sdb
 * 2001-06-03: Dave Cinege <dcinege@psychosis.com>
 *		Shifted name_to_kdev_t() and related operations to md_set_drive()
 *		for later execution. Rewrote section to make devfs compatible.
 */
static int __init md_setup(char *str)
{
	int minor, level, factor, fault, partitioned = 0;
	char *pername = "";
	char *str1;
	int ent;

	if (*str == 'd') {
		partitioned = 1;
		str++;
	}
	if (get_option(&str, &minor) != 2) {	/* MD Number */
		printk(KERN_WARNING "md: Too few arguments supplied to md=.\n");
		return 0;
	}
	str1 = str;
	for (ent=0 ; ent< md_setup_ents ; ent++)
		if (md_setup_args[ent].minor == minor &&
		    md_setup_args[ent].partitioned == partitioned) {
			printk(KERN_WARNING "md: md=%s%d, Specified more than once. "
			       "Replacing previous definition.\n", partitioned?"d":"", minor);
			break;
		}
	if (ent >= ARRAY_SIZE(md_setup_args)) {
		printk(KERN_WARNING "md: md=%s%d - too many md initialisations\n", partitioned?"d":"", minor);
		return 0;
	}
	if (ent >= md_setup_ents)
		md_setup_ents++;
	switch (get_option(&str, &level)) {	/* RAID level */
	case 2: /* could be 0 or -1.. */
		if (level == 0 || level == LEVEL_LINEAR) {
			if (get_option(&str, &factor) != 2 ||	/* Chunk Size */
					get_option(&str, &fault) != 2) {
				printk(KERN_WARNING "md: Too few arguments supplied to md=.\n");
				return 0;
			}
			md_setup_args[ent].level = level;
			md_setup_args[ent].chunk = 1 << (factor+12);
			if (level ==  LEVEL_LINEAR)
				pername = "linear";
			else
				pername = "raid0";
			break;
		}
		/* FALL THROUGH */
	case 1: /* the first device is numeric */
		str = str1;
		/* FALL THROUGH */
	case 0:
		md_setup_args[ent].level = LEVEL_NONE;
		pername="super-block";
	}

	printk(KERN_INFO "md: Will configure md%d (%s) from %s, below.\n",
		minor, pername, str);
	md_setup_args[ent].device_names = str;
	md_setup_args[ent].partitioned = partitioned;
	md_setup_args[ent].minor = minor;

	return 1;
}

static void __init md_setup_drive(void)
{
	int minor, i, ent, partitioned;
	dev_t dev;
	dev_t devices[MD_SB_DISKS+1];

	for (ent = 0; ent < md_setup_ents ; ent++) {
		int fd;
		int err = 0;
		char *devname;
		mdu_disk_info_t dinfo;
		char name[16];

		minor = md_setup_args[ent].minor;
		partitioned = md_setup_args[ent].partitioned;
		devname = md_setup_args[ent].device_names;

		sprintf(name, "/dev/md%s%d", partitioned?"_d":"", minor);
		if (partitioned)
			dev = MKDEV(mdp_major, minor << MdpMinorShift);
		else
			dev = MKDEV(MD_MAJOR, minor);
		create_dev(name, dev);
		for (i = 0; i < MD_SB_DISKS && devname != NULL; i++) {
			char *p;
			char comp_name[64];
			u32 rdev;

			p = strchr(devname, ',');
			if (p)
				*p++ = 0;

			dev = name_to_dev_t(devname);
			if (strncmp(devname, "/dev/", 5) == 0)
				devname += 5;
			snprintf(comp_name, 63, "/dev/%s", devname);
			rdev = bstat(comp_name);
			if (rdev)
				dev = new_decode_dev(rdev);
			if (!dev) {
				printk(KERN_WARNING "md: Unknown device name: %s\n", devname);
				break;
			}

			devices[i] = dev;

			devname = p;
		}
		devices[i] = 0;

		if (!i)
			continue;

		printk(KERN_INFO "md: Loading md%s%d: %s\n",
			partitioned ? "_d" : "", minor,
			md_setup_args[ent].device_names);

		fd = sys_open(name, 0, 0);
		if (fd < 0) {
			printk(KERN_ERR "md: open failed - cannot start "
					"array %s\n", name);
			continue;
		}
		if (sys_ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) {
			printk(KERN_WARNING
			       "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n",
			       minor);
			sys_close(fd);
			continue;
		}

		if (md_setup_args[ent].level != LEVEL_NONE) {
			/* non-persistent */
			mdu_array_info_t ainfo;
			ainfo.level = md_setup_args[ent].level;
			ainfo.size = 0;
			ainfo.nr_disks =0;
			ainfo.raid_disks =0;
			while (devices[ainfo.raid_disks])
				ainfo.raid_disks++;
			ainfo.md_minor =minor;
			ainfo.not_persistent = 1;

			ainfo.state = (1 << MD_SB_CLEAN);
			ainfo.layout = 0;
			ainfo.chunk_size = md_setup_args[ent].chunk;
			err = sys_ioctl(fd, SET_ARRAY_INFO, (long)&ainfo);
			for (i = 0; !err && i <= MD_SB_DISKS; i++) {
				dev = devices[i];
				if (!dev)
					break;
				dinfo.number = i;
				dinfo.raid_disk = i;
				dinfo.state = (1<<MD_DISK_ACTIVE)|(1<<MD_DISK_SYNC);
				dinfo.major = MAJOR(dev);
				dinfo.minor = MINOR(dev);
				err = sys_ioctl(fd, ADD_NEW_DISK, (long)&dinfo);
			}
		} else {
			/* persistent */
			for (i = 0; i <= MD_SB_DISKS; i++) {
				dev = devices[i];
				if (!dev)
					break;
				dinfo.major = MAJOR(dev);
				dinfo.minor = MINOR(dev);
				sys_ioctl(fd, ADD_NEW_DISK, (long)&dinfo);
			}
		}
		if (!err)
			err = sys_ioctl(fd, RUN_ARRAY, 0);
		if (err)
			printk(KERN_WARNING "md: starting md%d failed\n", minor);
		else {
			/* reread the partition table.
			 * I (neilb) and not sure why this is needed, but I cannot
			 * boot a kernel with devfs compiled in from partitioned md
			 * array without it
			 */
			sys_close(fd);
			fd = sys_open(name, 0, 0);
			sys_ioctl(fd, BLKRRPART, 0);
		}
		sys_close(fd);
	}
}

static int __init raid_setup(char *str)
{
	int len, pos;

	len = strlen(str) + 1;
	pos = 0;

	while (pos < len) {
		char *comma = strchr(str+pos, ',');
		int wlen;
		if (comma)
			wlen = (comma-str)-pos;
		else	wlen = (len-1)-pos;

		if (!strncmp(str, "noautodetect", wlen))
			raid_noautodetect = 1;
		if (!strncmp(str, "autodetect", wlen))
			raid_noautodetect = 0;
		if (strncmp(str, "partitionable", wlen)==0)
			raid_autopart = 1;
		if (strncmp(str, "part", wlen)==0)
			raid_autopart = 1;
		pos += wlen+1;
	}
	return 1;
}

__setup("raid=", raid_setup);
__setup("md=", md_setup);

static void __init autodetect_raid(void)
{
	int fd;

	/*
	 * Since we don't want to detect and use half a raid array, we need to
	 * wait for the known devices to complete their probing
	 */
	printk(KERN_INFO "md: Waiting for all devices to be available before autodetect\n");
	printk(KERN_INFO "md: If you don't use raid, use raid=noautodetect\n");

	wait_for_device_probe();

	fd = sys_open((const char __user __force *) "/dev/md0", 0, 0);
	if (fd >= 0) {
		sys_ioctl(fd, RAID_AUTORUN, raid_autopart);
		sys_close(fd);
	}
}

void __init md_run_setup(void)
{
	create_dev("/dev/md0", MKDEV(MD_MAJOR, 0));

	if (raid_noautodetect)
		printk(KERN_INFO "md: Skipping autodetection of RAID arrays. (raid=autodetect will force)\n");
	else
		autodetect_raid();
	md_setup_drive();
}
back to top