Revision 8aef18845266f5c05904c610088f2d1ed58f6be3 authored by Al Viro on 16 June 2011, 14:10:06 UTC, committed by Al Viro on 16 June 2011, 15:28:16 UTC
[Kudos to dhowells for tracking that crap down] If two processes attempt to cause automounting on the same mountpoint at the same time, the vfsmount holding the mountpoint will be left with one too few references on it, causing a BUG when the kernel tries to clean up. The problem is that lock_mount() drops the caller's reference to the mountpoint's vfsmount in the case where it finds something already mounted on the mountpoint as it transits to the mounted filesystem and replaces path->mnt with the new mountpoint vfsmount. During a pathwalk, however, we don't take a reference on the vfsmount if it is the same as the one in the nameidata struct, but do_add_mount() doesn't know this. The fix is to make sure we have a ref on the vfsmount of the mountpoint before calling do_add_mount(). However, if lock_mount() doesn't transit, we're then left with an extra ref on the mountpoint vfsmount which needs releasing. We can handle that in follow_managed() by not making assumptions about what we can and what we cannot get from lookup_mnt() as the current code does. The callers of follow_managed() expect that reference to path->mnt will be grabbed iff path->mnt has been changed. follow_managed() and follow_automount() keep track of whether such reference has been grabbed and assume that it'll happen in those and only those cases that'll have us return with changed path->mnt. That assumption is almost correct - it breaks in case of racing automounts and in even harder to hit race between following a mountpoint and a couple of mount --move. The thing is, we don't need to make that assumption at all - after the end of loop in follow_manage() we can check if path->mnt has ended up unchanged and do mntput() if needed. The BUG can be reproduced with the following test program: #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> #include <sys/wait.h> int main(int argc, char **argv) { int pid, ws; struct stat buf; pid = fork(); stat(argv[1], &buf); if (pid > 0) wait(&ws); return 0; } and the following procedure: (1) Mount an NFS volume that on the server has something else mounted on a subdirectory. For instance, I can mount / from my server: mount warthog:/ /mnt -t nfs4 -r On the server /data has another filesystem mounted on it, so NFS will see a change in FSID as it walks down the path, and will mark /mnt/data as being a mountpoint. This will cause the automount code to be triggered. !!! Do not look inside the mounted fs at this point !!! (2) Run the above program on a file within the submount to generate two simultaneous automount requests: /tmp/forkstat /mnt/data/testfile (3) Unmount the automounted submount: umount /mnt/data (4) Unmount the original mount: umount /mnt At this point the kernel should throw a BUG with something like the following: BUG: Dentry ffff880032e3c5c0{i=2,n=} still in use (1) [unmount of nfs4 0:12] Note that the bug appears on the root dentry of the original mount, not the mountpoint and not the submount because sys_umount() hasn't got to its final mntput_no_expire() yet, but this isn't so obvious from the call trace: [<ffffffff8117cd82>] shrink_dcache_for_umount+0x69/0x82 [<ffffffff8116160e>] generic_shutdown_super+0x37/0x15b [<ffffffffa00fae56>] ? nfs_super_return_all_delegations+0x2e/0x1b1 [nfs] [<ffffffff811617f3>] kill_anon_super+0x1d/0x7e [<ffffffffa00d0be1>] nfs4_kill_super+0x60/0xb6 [nfs] [<ffffffff81161c17>] deactivate_locked_super+0x34/0x83 [<ffffffff811629ff>] deactivate_super+0x6f/0x7b [<ffffffff81186261>] mntput_no_expire+0x18d/0x199 [<ffffffff811862a8>] mntput+0x3b/0x44 [<ffffffff81186d87>] release_mounts+0xa2/0xbf [<ffffffff811876af>] sys_umount+0x47a/0x4ba [<ffffffff8109e1ca>] ? trace_hardirqs_on_caller+0x1fd/0x22f [<ffffffff816ea86b>] system_call_fastpath+0x16/0x1b as do_umount() is inlined. However, you can see release_mounts() in there. Note also that it may be necessary to have multiple CPU cores to be able to trigger this bug. Tested-by: Jeff Layton <jlayton@redhat.com> Tested-by: Ian Kent <raven@themaw.net> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
1 parent 50338b8
input-mt.c
/*
* Input Multitouch Library
*
* Copyright (c) 2008-2010 Henrik Rydberg
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation.
*/
#include <linux/input/mt.h>
#include <linux/slab.h>
#define TRKID_SGN ((TRKID_MAX + 1) >> 1)
/**
* input_mt_init_slots() - initialize MT input slots
* @dev: input device supporting MT events and finger tracking
* @num_slots: number of slots used by the device
*
* This function allocates all necessary memory for MT slot handling
* in the input device, prepares the ABS_MT_SLOT and
* ABS_MT_TRACKING_ID events for use and sets up appropriate buffers.
* May be called repeatedly. Returns -EINVAL if attempting to
* reinitialize with a different number of slots.
*/
int input_mt_init_slots(struct input_dev *dev, unsigned int num_slots)
{
int i;
if (!num_slots)
return 0;
if (dev->mt)
return dev->mtsize != num_slots ? -EINVAL : 0;
dev->mt = kcalloc(num_slots, sizeof(struct input_mt_slot), GFP_KERNEL);
if (!dev->mt)
return -ENOMEM;
dev->mtsize = num_slots;
input_set_abs_params(dev, ABS_MT_SLOT, 0, num_slots - 1, 0, 0);
input_set_abs_params(dev, ABS_MT_TRACKING_ID, 0, TRKID_MAX, 0, 0);
input_set_events_per_packet(dev, 6 * num_slots);
/* Mark slots as 'unused' */
for (i = 0; i < num_slots; i++)
input_mt_set_value(&dev->mt[i], ABS_MT_TRACKING_ID, -1);
return 0;
}
EXPORT_SYMBOL(input_mt_init_slots);
/**
* input_mt_destroy_slots() - frees the MT slots of the input device
* @dev: input device with allocated MT slots
*
* This function is only needed in error path as the input core will
* automatically free the MT slots when the device is destroyed.
*/
void input_mt_destroy_slots(struct input_dev *dev)
{
kfree(dev->mt);
dev->mt = NULL;
dev->mtsize = 0;
dev->slot = 0;
dev->trkid = 0;
}
EXPORT_SYMBOL(input_mt_destroy_slots);
/**
* input_mt_report_slot_state() - report contact state
* @dev: input device with allocated MT slots
* @tool_type: the tool type to use in this slot
* @active: true if contact is active, false otherwise
*
* Reports a contact via ABS_MT_TRACKING_ID, and optionally
* ABS_MT_TOOL_TYPE. If active is true and the slot is currently
* inactive, or if the tool type is changed, a new tracking id is
* assigned to the slot. The tool type is only reported if the
* corresponding absbit field is set.
*/
void input_mt_report_slot_state(struct input_dev *dev,
unsigned int tool_type, bool active)
{
struct input_mt_slot *mt;
int id;
if (!dev->mt || !active) {
input_event(dev, EV_ABS, ABS_MT_TRACKING_ID, -1);
return;
}
mt = &dev->mt[dev->slot];
id = input_mt_get_value(mt, ABS_MT_TRACKING_ID);
if (id < 0 || input_mt_get_value(mt, ABS_MT_TOOL_TYPE) != tool_type)
id = input_mt_new_trkid(dev);
input_event(dev, EV_ABS, ABS_MT_TRACKING_ID, id);
input_event(dev, EV_ABS, ABS_MT_TOOL_TYPE, tool_type);
}
EXPORT_SYMBOL(input_mt_report_slot_state);
/**
* input_mt_report_finger_count() - report contact count
* @dev: input device with allocated MT slots
* @count: the number of contacts
*
* Reports the contact count via BTN_TOOL_FINGER, BTN_TOOL_DOUBLETAP,
* BTN_TOOL_TRIPLETAP and BTN_TOOL_QUADTAP.
*
* The input core ensures only the KEY events already setup for
* this device will produce output.
*/
void input_mt_report_finger_count(struct input_dev *dev, int count)
{
input_event(dev, EV_KEY, BTN_TOOL_FINGER, count == 1);
input_event(dev, EV_KEY, BTN_TOOL_DOUBLETAP, count == 2);
input_event(dev, EV_KEY, BTN_TOOL_TRIPLETAP, count == 3);
input_event(dev, EV_KEY, BTN_TOOL_QUADTAP, count == 4);
}
EXPORT_SYMBOL(input_mt_report_finger_count);
/**
* input_mt_report_pointer_emulation() - common pointer emulation
* @dev: input device with allocated MT slots
* @use_count: report number of active contacts as finger count
*
* Performs legacy pointer emulation via BTN_TOUCH, ABS_X, ABS_Y and
* ABS_PRESSURE. Touchpad finger count is emulated if use_count is true.
*
* The input core ensures only the KEY and ABS axes already setup for
* this device will produce output.
*/
void input_mt_report_pointer_emulation(struct input_dev *dev, bool use_count)
{
struct input_mt_slot *oldest = 0;
int oldid = dev->trkid;
int count = 0;
int i;
for (i = 0; i < dev->mtsize; ++i) {
struct input_mt_slot *ps = &dev->mt[i];
int id = input_mt_get_value(ps, ABS_MT_TRACKING_ID);
if (id < 0)
continue;
if ((id - oldid) & TRKID_SGN) {
oldest = ps;
oldid = id;
}
count++;
}
input_event(dev, EV_KEY, BTN_TOUCH, count > 0);
if (use_count)
input_mt_report_finger_count(dev, count);
if (oldest) {
int x = input_mt_get_value(oldest, ABS_MT_POSITION_X);
int y = input_mt_get_value(oldest, ABS_MT_POSITION_Y);
int p = input_mt_get_value(oldest, ABS_MT_PRESSURE);
input_event(dev, EV_ABS, ABS_X, x);
input_event(dev, EV_ABS, ABS_Y, y);
input_event(dev, EV_ABS, ABS_PRESSURE, p);
} else {
input_event(dev, EV_ABS, ABS_PRESSURE, 0);
}
}
EXPORT_SYMBOL(input_mt_report_pointer_emulation);
Computing file changes ...