Revision 8aef18845266f5c05904c610088f2d1ed58f6be3 authored by Al Viro on 16 June 2011, 14:10:06 UTC, committed by Al Viro on 16 June 2011, 15:28:16 UTC
[Kudos to dhowells for tracking that crap down] If two processes attempt to cause automounting on the same mountpoint at the same time, the vfsmount holding the mountpoint will be left with one too few references on it, causing a BUG when the kernel tries to clean up. The problem is that lock_mount() drops the caller's reference to the mountpoint's vfsmount in the case where it finds something already mounted on the mountpoint as it transits to the mounted filesystem and replaces path->mnt with the new mountpoint vfsmount. During a pathwalk, however, we don't take a reference on the vfsmount if it is the same as the one in the nameidata struct, but do_add_mount() doesn't know this. The fix is to make sure we have a ref on the vfsmount of the mountpoint before calling do_add_mount(). However, if lock_mount() doesn't transit, we're then left with an extra ref on the mountpoint vfsmount which needs releasing. We can handle that in follow_managed() by not making assumptions about what we can and what we cannot get from lookup_mnt() as the current code does. The callers of follow_managed() expect that reference to path->mnt will be grabbed iff path->mnt has been changed. follow_managed() and follow_automount() keep track of whether such reference has been grabbed and assume that it'll happen in those and only those cases that'll have us return with changed path->mnt. That assumption is almost correct - it breaks in case of racing automounts and in even harder to hit race between following a mountpoint and a couple of mount --move. The thing is, we don't need to make that assumption at all - after the end of loop in follow_manage() we can check if path->mnt has ended up unchanged and do mntput() if needed. The BUG can be reproduced with the following test program: #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> #include <sys/wait.h> int main(int argc, char **argv) { int pid, ws; struct stat buf; pid = fork(); stat(argv[1], &buf); if (pid > 0) wait(&ws); return 0; } and the following procedure: (1) Mount an NFS volume that on the server has something else mounted on a subdirectory. For instance, I can mount / from my server: mount warthog:/ /mnt -t nfs4 -r On the server /data has another filesystem mounted on it, so NFS will see a change in FSID as it walks down the path, and will mark /mnt/data as being a mountpoint. This will cause the automount code to be triggered. !!! Do not look inside the mounted fs at this point !!! (2) Run the above program on a file within the submount to generate two simultaneous automount requests: /tmp/forkstat /mnt/data/testfile (3) Unmount the automounted submount: umount /mnt/data (4) Unmount the original mount: umount /mnt At this point the kernel should throw a BUG with something like the following: BUG: Dentry ffff880032e3c5c0{i=2,n=} still in use (1) [unmount of nfs4 0:12] Note that the bug appears on the root dentry of the original mount, not the mountpoint and not the submount because sys_umount() hasn't got to its final mntput_no_expire() yet, but this isn't so obvious from the call trace: [<ffffffff8117cd82>] shrink_dcache_for_umount+0x69/0x82 [<ffffffff8116160e>] generic_shutdown_super+0x37/0x15b [<ffffffffa00fae56>] ? nfs_super_return_all_delegations+0x2e/0x1b1 [nfs] [<ffffffff811617f3>] kill_anon_super+0x1d/0x7e [<ffffffffa00d0be1>] nfs4_kill_super+0x60/0xb6 [nfs] [<ffffffff81161c17>] deactivate_locked_super+0x34/0x83 [<ffffffff811629ff>] deactivate_super+0x6f/0x7b [<ffffffff81186261>] mntput_no_expire+0x18d/0x199 [<ffffffff811862a8>] mntput+0x3b/0x44 [<ffffffff81186d87>] release_mounts+0xa2/0xbf [<ffffffff811876af>] sys_umount+0x47a/0x4ba [<ffffffff8109e1ca>] ? trace_hardirqs_on_caller+0x1fd/0x22f [<ffffffff816ea86b>] system_call_fastpath+0x16/0x1b as do_umount() is inlined. However, you can see release_mounts() in there. Note also that it may be necessary to have multiple CPU cores to be able to trigger this bug. Tested-by: Jeff Layton <jlayton@redhat.com> Tested-by: Ian Kent <raven@themaw.net> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
1 parent 50338b8
serport.c
/*
* Input device TTY line discipline
*
* Copyright (c) 1999-2002 Vojtech Pavlik
*
* This is a module that converts a tty line into a much simpler
* 'serial io port' abstraction that the input device drivers use.
*/
/*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*/
#include <asm/uaccess.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/serio.h>
#include <linux/tty.h>
MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>");
MODULE_DESCRIPTION("Input device TTY line discipline");
MODULE_LICENSE("GPL");
MODULE_ALIAS_LDISC(N_MOUSE);
#define SERPORT_BUSY 1
#define SERPORT_ACTIVE 2
#define SERPORT_DEAD 3
struct serport {
struct tty_struct *tty;
wait_queue_head_t wait;
struct serio *serio;
struct serio_device_id id;
spinlock_t lock;
unsigned long flags;
};
/*
* Callback functions from the serio code.
*/
static int serport_serio_write(struct serio *serio, unsigned char data)
{
struct serport *serport = serio->port_data;
return -(serport->tty->ops->write(serport->tty, &data, 1) != 1);
}
static int serport_serio_open(struct serio *serio)
{
struct serport *serport = serio->port_data;
unsigned long flags;
spin_lock_irqsave(&serport->lock, flags);
set_bit(SERPORT_ACTIVE, &serport->flags);
spin_unlock_irqrestore(&serport->lock, flags);
return 0;
}
static void serport_serio_close(struct serio *serio)
{
struct serport *serport = serio->port_data;
unsigned long flags;
spin_lock_irqsave(&serport->lock, flags);
clear_bit(SERPORT_ACTIVE, &serport->flags);
set_bit(SERPORT_DEAD, &serport->flags);
spin_unlock_irqrestore(&serport->lock, flags);
wake_up_interruptible(&serport->wait);
}
/*
* serport_ldisc_open() is the routine that is called upon setting our line
* discipline on a tty. It prepares the serio struct.
*/
static int serport_ldisc_open(struct tty_struct *tty)
{
struct serport *serport;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
serport = kzalloc(sizeof(struct serport), GFP_KERNEL);
if (!serport)
return -ENOMEM;
serport->tty = tty;
spin_lock_init(&serport->lock);
init_waitqueue_head(&serport->wait);
tty->disc_data = serport;
tty->receive_room = 256;
set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
return 0;
}
/*
* serport_ldisc_close() is the opposite of serport_ldisc_open()
*/
static void serport_ldisc_close(struct tty_struct *tty)
{
struct serport *serport = (struct serport *) tty->disc_data;
kfree(serport);
}
/*
* serport_ldisc_receive() is called by the low level tty driver when characters
* are ready for us. We forward the characters and flags, one by one to the
* 'interrupt' routine.
*/
static void serport_ldisc_receive(struct tty_struct *tty, const unsigned char *cp, char *fp, int count)
{
struct serport *serport = (struct serport*) tty->disc_data;
unsigned long flags;
unsigned int ch_flags;
int i;
spin_lock_irqsave(&serport->lock, flags);
if (!test_bit(SERPORT_ACTIVE, &serport->flags))
goto out;
for (i = 0; i < count; i++) {
switch (fp[i]) {
case TTY_FRAME:
ch_flags = SERIO_FRAME;
break;
case TTY_PARITY:
ch_flags = SERIO_PARITY;
break;
default:
ch_flags = 0;
break;
}
serio_interrupt(serport->serio, cp[i], ch_flags);
}
out:
spin_unlock_irqrestore(&serport->lock, flags);
}
/*
* serport_ldisc_read() just waits indefinitely if everything goes well.
* However, when the serio driver closes the serio port, it finishes,
* returning 0 characters.
*/
static ssize_t serport_ldisc_read(struct tty_struct * tty, struct file * file, unsigned char __user * buf, size_t nr)
{
struct serport *serport = (struct serport*) tty->disc_data;
struct serio *serio;
char name[64];
if (test_and_set_bit(SERPORT_BUSY, &serport->flags))
return -EBUSY;
serport->serio = serio = kzalloc(sizeof(struct serio), GFP_KERNEL);
if (!serio)
return -ENOMEM;
strlcpy(serio->name, "Serial port", sizeof(serio->name));
snprintf(serio->phys, sizeof(serio->phys), "%s/serio0", tty_name(tty, name));
serio->id = serport->id;
serio->id.type = SERIO_RS232;
serio->write = serport_serio_write;
serio->open = serport_serio_open;
serio->close = serport_serio_close;
serio->port_data = serport;
serio->dev.parent = tty->dev;
serio_register_port(serport->serio);
printk(KERN_INFO "serio: Serial port %s\n", tty_name(tty, name));
wait_event_interruptible(serport->wait, test_bit(SERPORT_DEAD, &serport->flags));
serio_unregister_port(serport->serio);
serport->serio = NULL;
clear_bit(SERPORT_DEAD, &serport->flags);
clear_bit(SERPORT_BUSY, &serport->flags);
return 0;
}
/*
* serport_ldisc_ioctl() allows to set the port protocol, and device ID
*/
static int serport_ldisc_ioctl(struct tty_struct * tty, struct file * file, unsigned int cmd, unsigned long arg)
{
struct serport *serport = (struct serport*) tty->disc_data;
unsigned long type;
if (cmd == SPIOCSTYPE) {
if (get_user(type, (unsigned long __user *) arg))
return -EFAULT;
serport->id.proto = type & 0x000000ff;
serport->id.id = (type & 0x0000ff00) >> 8;
serport->id.extra = (type & 0x00ff0000) >> 16;
return 0;
}
return -EINVAL;
}
static void serport_ldisc_write_wakeup(struct tty_struct * tty)
{
struct serport *serport = (struct serport *) tty->disc_data;
unsigned long flags;
spin_lock_irqsave(&serport->lock, flags);
if (test_bit(SERPORT_ACTIVE, &serport->flags))
serio_drv_write_wakeup(serport->serio);
spin_unlock_irqrestore(&serport->lock, flags);
}
/*
* The line discipline structure.
*/
static struct tty_ldisc_ops serport_ldisc = {
.owner = THIS_MODULE,
.name = "input",
.open = serport_ldisc_open,
.close = serport_ldisc_close,
.read = serport_ldisc_read,
.ioctl = serport_ldisc_ioctl,
.receive_buf = serport_ldisc_receive,
.write_wakeup = serport_ldisc_write_wakeup
};
/*
* The functions for insering/removing us as a module.
*/
static int __init serport_init(void)
{
int retval;
retval = tty_register_ldisc(N_MOUSE, &serport_ldisc);
if (retval)
printk(KERN_ERR "serport.c: Error registering line discipline.\n");
return retval;
}
static void __exit serport_exit(void)
{
tty_unregister_ldisc(N_MOUSE);
}
module_init(serport_init);
module_exit(serport_exit);
Computing file changes ...