Revision 8aef18845266f5c05904c610088f2d1ed58f6be3 authored by Al Viro on 16 June 2011, 14:10:06 UTC, committed by Al Viro on 16 June 2011, 15:28:16 UTC
[Kudos to dhowells for tracking that crap down] If two processes attempt to cause automounting on the same mountpoint at the same time, the vfsmount holding the mountpoint will be left with one too few references on it, causing a BUG when the kernel tries to clean up. The problem is that lock_mount() drops the caller's reference to the mountpoint's vfsmount in the case where it finds something already mounted on the mountpoint as it transits to the mounted filesystem and replaces path->mnt with the new mountpoint vfsmount. During a pathwalk, however, we don't take a reference on the vfsmount if it is the same as the one in the nameidata struct, but do_add_mount() doesn't know this. The fix is to make sure we have a ref on the vfsmount of the mountpoint before calling do_add_mount(). However, if lock_mount() doesn't transit, we're then left with an extra ref on the mountpoint vfsmount which needs releasing. We can handle that in follow_managed() by not making assumptions about what we can and what we cannot get from lookup_mnt() as the current code does. The callers of follow_managed() expect that reference to path->mnt will be grabbed iff path->mnt has been changed. follow_managed() and follow_automount() keep track of whether such reference has been grabbed and assume that it'll happen in those and only those cases that'll have us return with changed path->mnt. That assumption is almost correct - it breaks in case of racing automounts and in even harder to hit race between following a mountpoint and a couple of mount --move. The thing is, we don't need to make that assumption at all - after the end of loop in follow_manage() we can check if path->mnt has ended up unchanged and do mntput() if needed. The BUG can be reproduced with the following test program: #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> #include <sys/wait.h> int main(int argc, char **argv) { int pid, ws; struct stat buf; pid = fork(); stat(argv[1], &buf); if (pid > 0) wait(&ws); return 0; } and the following procedure: (1) Mount an NFS volume that on the server has something else mounted on a subdirectory. For instance, I can mount / from my server: mount warthog:/ /mnt -t nfs4 -r On the server /data has another filesystem mounted on it, so NFS will see a change in FSID as it walks down the path, and will mark /mnt/data as being a mountpoint. This will cause the automount code to be triggered. !!! Do not look inside the mounted fs at this point !!! (2) Run the above program on a file within the submount to generate two simultaneous automount requests: /tmp/forkstat /mnt/data/testfile (3) Unmount the automounted submount: umount /mnt/data (4) Unmount the original mount: umount /mnt At this point the kernel should throw a BUG with something like the following: BUG: Dentry ffff880032e3c5c0{i=2,n=} still in use (1) [unmount of nfs4 0:12] Note that the bug appears on the root dentry of the original mount, not the mountpoint and not the submount because sys_umount() hasn't got to its final mntput_no_expire() yet, but this isn't so obvious from the call trace: [<ffffffff8117cd82>] shrink_dcache_for_umount+0x69/0x82 [<ffffffff8116160e>] generic_shutdown_super+0x37/0x15b [<ffffffffa00fae56>] ? nfs_super_return_all_delegations+0x2e/0x1b1 [nfs] [<ffffffff811617f3>] kill_anon_super+0x1d/0x7e [<ffffffffa00d0be1>] nfs4_kill_super+0x60/0xb6 [nfs] [<ffffffff81161c17>] deactivate_locked_super+0x34/0x83 [<ffffffff811629ff>] deactivate_super+0x6f/0x7b [<ffffffff81186261>] mntput_no_expire+0x18d/0x199 [<ffffffff811862a8>] mntput+0x3b/0x44 [<ffffffff81186d87>] release_mounts+0xa2/0xbf [<ffffffff811876af>] sys_umount+0x47a/0x4ba [<ffffffff8109e1ca>] ? trace_hardirqs_on_caller+0x1fd/0x22f [<ffffffff816ea86b>] system_call_fastpath+0x16/0x1b as do_umount() is inlined. However, you can see release_mounts() in there. Note also that it may be necessary to have multiple CPU cores to be able to trigger this bug. Tested-by: Jeff Layton <jlayton@redhat.com> Tested-by: Ian Kent <raven@themaw.net> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
1 parent 50338b8
newtonkbd.c
/*
* Copyright (c) 2000 Justin Cormack
*/
/*
* Newton keyboard driver for Linux
*/
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Should you need to contact me, the author, you can do so either by
* e-mail - mail your message to <j.cormack@doc.ic.ac.uk>, or by paper mail:
* Justin Cormack, 68 Dartmouth Park Road, London NW5 1SN, UK.
*/
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/input.h>
#include <linux/init.h>
#include <linux/serio.h>
#define DRIVER_DESC "Newton keyboard driver"
MODULE_AUTHOR("Justin Cormack <j.cormack@doc.ic.ac.uk>");
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");
#define NKBD_KEY 0x7f
#define NKBD_PRESS 0x80
static unsigned char nkbd_keycode[128] = {
KEY_A, KEY_S, KEY_D, KEY_F, KEY_H, KEY_G, KEY_Z, KEY_X,
KEY_C, KEY_V, 0, KEY_B, KEY_Q, KEY_W, KEY_E, KEY_R,
KEY_Y, KEY_T, KEY_1, KEY_2, KEY_3, KEY_4, KEY_6, KEY_5,
KEY_EQUAL, KEY_9, KEY_7, KEY_MINUS, KEY_8, KEY_0, KEY_RIGHTBRACE, KEY_O,
KEY_U, KEY_LEFTBRACE, KEY_I, KEY_P, KEY_ENTER, KEY_L, KEY_J, KEY_APOSTROPHE,
KEY_K, KEY_SEMICOLON, KEY_BACKSLASH, KEY_COMMA, KEY_SLASH, KEY_N, KEY_M, KEY_DOT,
KEY_TAB, KEY_SPACE, KEY_GRAVE, KEY_DELETE, 0, 0, 0, KEY_LEFTMETA,
KEY_LEFTSHIFT, KEY_CAPSLOCK, KEY_LEFTALT, KEY_LEFTCTRL, KEY_RIGHTSHIFT, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
KEY_LEFT, KEY_RIGHT, KEY_DOWN, KEY_UP, 0
};
struct nkbd {
unsigned char keycode[128];
struct input_dev *dev;
struct serio *serio;
char phys[32];
};
static irqreturn_t nkbd_interrupt(struct serio *serio,
unsigned char data, unsigned int flags)
{
struct nkbd *nkbd = serio_get_drvdata(serio);
/* invalid scan codes are probably the init sequence, so we ignore them */
if (nkbd->keycode[data & NKBD_KEY]) {
input_report_key(nkbd->dev, nkbd->keycode[data & NKBD_KEY], data & NKBD_PRESS);
input_sync(nkbd->dev);
}
else if (data == 0xe7) /* end of init sequence */
printk(KERN_INFO "input: %s on %s\n", nkbd->dev->name, serio->phys);
return IRQ_HANDLED;
}
static int nkbd_connect(struct serio *serio, struct serio_driver *drv)
{
struct nkbd *nkbd;
struct input_dev *input_dev;
int err = -ENOMEM;
int i;
nkbd = kzalloc(sizeof(struct nkbd), GFP_KERNEL);
input_dev = input_allocate_device();
if (!nkbd || !input_dev)
goto fail1;
nkbd->serio = serio;
nkbd->dev = input_dev;
snprintf(nkbd->phys, sizeof(nkbd->phys), "%s/input0", serio->phys);
memcpy(nkbd->keycode, nkbd_keycode, sizeof(nkbd->keycode));
input_dev->name = "Newton Keyboard";
input_dev->phys = nkbd->phys;
input_dev->id.bustype = BUS_RS232;
input_dev->id.vendor = SERIO_NEWTON;
input_dev->id.product = 0x0001;
input_dev->id.version = 0x0100;
input_dev->dev.parent = &serio->dev;
input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
input_dev->keycode = nkbd->keycode;
input_dev->keycodesize = sizeof(unsigned char);
input_dev->keycodemax = ARRAY_SIZE(nkbd_keycode);
for (i = 0; i < 128; i++)
set_bit(nkbd->keycode[i], input_dev->keybit);
clear_bit(0, input_dev->keybit);
serio_set_drvdata(serio, nkbd);
err = serio_open(serio, drv);
if (err)
goto fail2;
err = input_register_device(nkbd->dev);
if (err)
goto fail3;
return 0;
fail3: serio_close(serio);
fail2: serio_set_drvdata(serio, NULL);
fail1: input_free_device(input_dev);
kfree(nkbd);
return err;
}
static void nkbd_disconnect(struct serio *serio)
{
struct nkbd *nkbd = serio_get_drvdata(serio);
serio_close(serio);
serio_set_drvdata(serio, NULL);
input_unregister_device(nkbd->dev);
kfree(nkbd);
}
static struct serio_device_id nkbd_serio_ids[] = {
{
.type = SERIO_RS232,
.proto = SERIO_NEWTON,
.id = SERIO_ANY,
.extra = SERIO_ANY,
},
{ 0 }
};
MODULE_DEVICE_TABLE(serio, nkbd_serio_ids);
static struct serio_driver nkbd_drv = {
.driver = {
.name = "newtonkbd",
},
.description = DRIVER_DESC,
.id_table = nkbd_serio_ids,
.interrupt = nkbd_interrupt,
.connect = nkbd_connect,
.disconnect = nkbd_disconnect,
};
static int __init nkbd_init(void)
{
return serio_register_driver(&nkbd_drv);
}
static void __exit nkbd_exit(void)
{
serio_unregister_driver(&nkbd_drv);
}
module_init(nkbd_init);
module_exit(nkbd_exit);
Computing file changes ...