Revision 8aef18845266f5c05904c610088f2d1ed58f6be3 authored by Al Viro on 16 June 2011, 14:10:06 UTC, committed by Al Viro on 16 June 2011, 15:28:16 UTC
[Kudos to dhowells for tracking that crap down] If two processes attempt to cause automounting on the same mountpoint at the same time, the vfsmount holding the mountpoint will be left with one too few references on it, causing a BUG when the kernel tries to clean up. The problem is that lock_mount() drops the caller's reference to the mountpoint's vfsmount in the case where it finds something already mounted on the mountpoint as it transits to the mounted filesystem and replaces path->mnt with the new mountpoint vfsmount. During a pathwalk, however, we don't take a reference on the vfsmount if it is the same as the one in the nameidata struct, but do_add_mount() doesn't know this. The fix is to make sure we have a ref on the vfsmount of the mountpoint before calling do_add_mount(). However, if lock_mount() doesn't transit, we're then left with an extra ref on the mountpoint vfsmount which needs releasing. We can handle that in follow_managed() by not making assumptions about what we can and what we cannot get from lookup_mnt() as the current code does. The callers of follow_managed() expect that reference to path->mnt will be grabbed iff path->mnt has been changed. follow_managed() and follow_automount() keep track of whether such reference has been grabbed and assume that it'll happen in those and only those cases that'll have us return with changed path->mnt. That assumption is almost correct - it breaks in case of racing automounts and in even harder to hit race between following a mountpoint and a couple of mount --move. The thing is, we don't need to make that assumption at all - after the end of loop in follow_manage() we can check if path->mnt has ended up unchanged and do mntput() if needed. The BUG can be reproduced with the following test program: #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> #include <sys/wait.h> int main(int argc, char **argv) { int pid, ws; struct stat buf; pid = fork(); stat(argv[1], &buf); if (pid > 0) wait(&ws); return 0; } and the following procedure: (1) Mount an NFS volume that on the server has something else mounted on a subdirectory. For instance, I can mount / from my server: mount warthog:/ /mnt -t nfs4 -r On the server /data has another filesystem mounted on it, so NFS will see a change in FSID as it walks down the path, and will mark /mnt/data as being a mountpoint. This will cause the automount code to be triggered. !!! Do not look inside the mounted fs at this point !!! (2) Run the above program on a file within the submount to generate two simultaneous automount requests: /tmp/forkstat /mnt/data/testfile (3) Unmount the automounted submount: umount /mnt/data (4) Unmount the original mount: umount /mnt At this point the kernel should throw a BUG with something like the following: BUG: Dentry ffff880032e3c5c0{i=2,n=} still in use (1) [unmount of nfs4 0:12] Note that the bug appears on the root dentry of the original mount, not the mountpoint and not the submount because sys_umount() hasn't got to its final mntput_no_expire() yet, but this isn't so obvious from the call trace: [<ffffffff8117cd82>] shrink_dcache_for_umount+0x69/0x82 [<ffffffff8116160e>] generic_shutdown_super+0x37/0x15b [<ffffffffa00fae56>] ? nfs_super_return_all_delegations+0x2e/0x1b1 [nfs] [<ffffffff811617f3>] kill_anon_super+0x1d/0x7e [<ffffffffa00d0be1>] nfs4_kill_super+0x60/0xb6 [nfs] [<ffffffff81161c17>] deactivate_locked_super+0x34/0x83 [<ffffffff811629ff>] deactivate_super+0x6f/0x7b [<ffffffff81186261>] mntput_no_expire+0x18d/0x199 [<ffffffff811862a8>] mntput+0x3b/0x44 [<ffffffff81186d87>] release_mounts+0xa2/0xbf [<ffffffff811876af>] sys_umount+0x47a/0x4ba [<ffffffff8109e1ca>] ? trace_hardirqs_on_caller+0x1fd/0x22f [<ffffffff816ea86b>] system_call_fastpath+0x16/0x1b as do_umount() is inlined. However, you can see release_mounts() in there. Note also that it may be necessary to have multiple CPU cores to be able to trigger this bug. Tested-by: Jeff Layton <jlayton@redhat.com> Tested-by: Ian Kent <raven@themaw.net> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
1 parent 50338b8
sparse-keymap.c
/*
* Generic support for sparse keymaps
*
* Copyright (c) 2009 Dmitry Torokhov
*
* Derived from wistron button driver:
* Copyright (C) 2005 Miloslav Trmac <mitr@volny.cz>
* Copyright (C) 2005 Bernhard Rosenkraenzer <bero@arklinux.org>
* Copyright (C) 2005 Dmitry Torokhov <dtor@mail.ru>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*/
#include <linux/input.h>
#include <linux/input/sparse-keymap.h>
#include <linux/slab.h>
MODULE_AUTHOR("Dmitry Torokhov <dtor@mail.ru>");
MODULE_DESCRIPTION("Generic support for sparse keymaps");
MODULE_LICENSE("GPL v2");
MODULE_VERSION("0.1");
static unsigned int sparse_keymap_get_key_index(struct input_dev *dev,
const struct key_entry *k)
{
struct key_entry *key;
unsigned int idx = 0;
for (key = dev->keycode; key->type != KE_END; key++) {
if (key->type == KE_KEY) {
if (key == k)
break;
idx++;
}
}
return idx;
}
static struct key_entry *sparse_keymap_entry_by_index(struct input_dev *dev,
unsigned int index)
{
struct key_entry *key;
unsigned int key_cnt = 0;
for (key = dev->keycode; key->type != KE_END; key++)
if (key->type == KE_KEY)
if (key_cnt++ == index)
return key;
return NULL;
}
/**
* sparse_keymap_entry_from_scancode - perform sparse keymap lookup
* @dev: Input device using sparse keymap
* @code: Scan code
*
* This function is used to perform &struct key_entry lookup in an
* input device using sparse keymap.
*/
struct key_entry *sparse_keymap_entry_from_scancode(struct input_dev *dev,
unsigned int code)
{
struct key_entry *key;
for (key = dev->keycode; key->type != KE_END; key++)
if (code == key->code)
return key;
return NULL;
}
EXPORT_SYMBOL(sparse_keymap_entry_from_scancode);
/**
* sparse_keymap_entry_from_keycode - perform sparse keymap lookup
* @dev: Input device using sparse keymap
* @keycode: Key code
*
* This function is used to perform &struct key_entry lookup in an
* input device using sparse keymap.
*/
struct key_entry *sparse_keymap_entry_from_keycode(struct input_dev *dev,
unsigned int keycode)
{
struct key_entry *key;
for (key = dev->keycode; key->type != KE_END; key++)
if (key->type == KE_KEY && keycode == key->keycode)
return key;
return NULL;
}
EXPORT_SYMBOL(sparse_keymap_entry_from_keycode);
static struct key_entry *sparse_keymap_locate(struct input_dev *dev,
const struct input_keymap_entry *ke)
{
struct key_entry *key;
unsigned int scancode;
if (ke->flags & INPUT_KEYMAP_BY_INDEX)
key = sparse_keymap_entry_by_index(dev, ke->index);
else if (input_scancode_to_scalar(ke, &scancode) == 0)
key = sparse_keymap_entry_from_scancode(dev, scancode);
else
key = NULL;
return key;
}
static int sparse_keymap_getkeycode(struct input_dev *dev,
struct input_keymap_entry *ke)
{
const struct key_entry *key;
if (dev->keycode) {
key = sparse_keymap_locate(dev, ke);
if (key && key->type == KE_KEY) {
ke->keycode = key->keycode;
if (!(ke->flags & INPUT_KEYMAP_BY_INDEX))
ke->index =
sparse_keymap_get_key_index(dev, key);
ke->len = sizeof(key->code);
memcpy(ke->scancode, &key->code, sizeof(key->code));
return 0;
}
}
return -EINVAL;
}
static int sparse_keymap_setkeycode(struct input_dev *dev,
const struct input_keymap_entry *ke,
unsigned int *old_keycode)
{
struct key_entry *key;
if (dev->keycode) {
key = sparse_keymap_locate(dev, ke);
if (key && key->type == KE_KEY) {
*old_keycode = key->keycode;
key->keycode = ke->keycode;
set_bit(ke->keycode, dev->keybit);
if (!sparse_keymap_entry_from_keycode(dev, *old_keycode))
clear_bit(*old_keycode, dev->keybit);
return 0;
}
}
return -EINVAL;
}
/**
* sparse_keymap_setup - set up sparse keymap for an input device
* @dev: Input device
* @keymap: Keymap in form of array of &key_entry structures ending
* with %KE_END type entry
* @setup: Function that can be used to adjust keymap entries
* depending on device's deeds, may be %NULL
*
* The function calculates size and allocates copy of the original
* keymap after which sets up input device event bits appropriately.
* Before destroying input device allocated keymap should be freed
* with a call to sparse_keymap_free().
*/
int sparse_keymap_setup(struct input_dev *dev,
const struct key_entry *keymap,
int (*setup)(struct input_dev *, struct key_entry *))
{
size_t map_size = 1; /* to account for the last KE_END entry */
const struct key_entry *e;
struct key_entry *map, *entry;
int i;
int error;
for (e = keymap; e->type != KE_END; e++)
map_size++;
map = kcalloc(map_size, sizeof (struct key_entry), GFP_KERNEL);
if (!map)
return -ENOMEM;
memcpy(map, keymap, map_size * sizeof (struct key_entry));
for (i = 0; i < map_size; i++) {
entry = &map[i];
if (setup) {
error = setup(dev, entry);
if (error)
goto err_out;
}
switch (entry->type) {
case KE_KEY:
__set_bit(EV_KEY, dev->evbit);
__set_bit(entry->keycode, dev->keybit);
break;
case KE_SW:
case KE_VSW:
__set_bit(EV_SW, dev->evbit);
__set_bit(entry->sw.code, dev->swbit);
break;
}
}
if (test_bit(EV_KEY, dev->evbit)) {
__set_bit(KEY_UNKNOWN, dev->keybit);
__set_bit(EV_MSC, dev->evbit);
__set_bit(MSC_SCAN, dev->mscbit);
}
dev->keycode = map;
dev->keycodemax = map_size;
dev->getkeycode = sparse_keymap_getkeycode;
dev->setkeycode = sparse_keymap_setkeycode;
return 0;
err_out:
kfree(map);
return error;
}
EXPORT_SYMBOL(sparse_keymap_setup);
/**
* sparse_keymap_free - free memory allocated for sparse keymap
* @dev: Input device using sparse keymap
*
* This function is used to free memory allocated by sparse keymap
* in an input device that was set up by sparse_keymap_setup().
* NOTE: It is safe to cal this function while input device is
* still registered (however the drivers should care not to try to
* use freed keymap and thus have to shut off interrups/polling
* before freeing the keymap).
*/
void sparse_keymap_free(struct input_dev *dev)
{
unsigned long flags;
/*
* Take event lock to prevent racing with input_get_keycode()
* and input_set_keycode() if we are called while input device
* is still registered.
*/
spin_lock_irqsave(&dev->event_lock, flags);
kfree(dev->keycode);
dev->keycode = NULL;
dev->keycodemax = 0;
spin_unlock_irqrestore(&dev->event_lock, flags);
}
EXPORT_SYMBOL(sparse_keymap_free);
/**
* sparse_keymap_report_entry - report event corresponding to given key entry
* @dev: Input device for which event should be reported
* @ke: key entry describing event
* @value: Value that should be reported (ignored by %KE_SW entries)
* @autorelease: Signals whether release event should be emitted for %KE_KEY
* entries right after reporting press event, ignored by all other
* entries
*
* This function is used to report input event described by given
* &struct key_entry.
*/
void sparse_keymap_report_entry(struct input_dev *dev, const struct key_entry *ke,
unsigned int value, bool autorelease)
{
switch (ke->type) {
case KE_KEY:
input_event(dev, EV_MSC, MSC_SCAN, ke->code);
input_report_key(dev, ke->keycode, value);
input_sync(dev);
if (value && autorelease) {
input_report_key(dev, ke->keycode, 0);
input_sync(dev);
}
break;
case KE_SW:
value = ke->sw.value;
/* fall through */
case KE_VSW:
input_report_switch(dev, ke->sw.code, value);
break;
}
}
EXPORT_SYMBOL(sparse_keymap_report_entry);
/**
* sparse_keymap_report_event - report event corresponding to given scancode
* @dev: Input device using sparse keymap
* @code: Scan code
* @value: Value that should be reported (ignored by %KE_SW entries)
* @autorelease: Signals whether release event should be emitted for %KE_KEY
* entries right after reporting press event, ignored by all other
* entries
*
* This function is used to perform lookup in an input device using sparse
* keymap and report corresponding event. Returns %true if lookup was
* successful and %false otherwise.
*/
bool sparse_keymap_report_event(struct input_dev *dev, unsigned int code,
unsigned int value, bool autorelease)
{
const struct key_entry *ke =
sparse_keymap_entry_from_scancode(dev, code);
struct key_entry unknown_ke;
if (ke) {
sparse_keymap_report_entry(dev, ke, value, autorelease);
return true;
}
/* Report an unknown key event as a debugging aid */
unknown_ke.type = KE_KEY;
unknown_ke.code = code;
unknown_ke.keycode = KEY_UNKNOWN;
sparse_keymap_report_entry(dev, &unknown_ke, value, true);
return false;
}
EXPORT_SYMBOL(sparse_keymap_report_event);
Computing file changes ...