Revision 8aef18845266f5c05904c610088f2d1ed58f6be3 authored by Al Viro on 16 June 2011, 14:10:06 UTC, committed by Al Viro on 16 June 2011, 15:28:16 UTC
[Kudos to dhowells for tracking that crap down]

If two processes attempt to cause automounting on the same mountpoint at the
same time, the vfsmount holding the mountpoint will be left with one too few
references on it, causing a BUG when the kernel tries to clean up.

The problem is that lock_mount() drops the caller's reference to the
mountpoint's vfsmount in the case where it finds something already mounted on
the mountpoint as it transits to the mounted filesystem and replaces path->mnt
with the new mountpoint vfsmount.

During a pathwalk, however, we don't take a reference on the vfsmount if it is
the same as the one in the nameidata struct, but do_add_mount() doesn't know
this.

The fix is to make sure we have a ref on the vfsmount of the mountpoint before
calling do_add_mount().  However, if lock_mount() doesn't transit, we're then
left with an extra ref on the mountpoint vfsmount which needs releasing.
We can handle that in follow_managed() by not making assumptions about what
we can and what we cannot get from lookup_mnt() as the current code does.

The callers of follow_managed() expect that reference to path->mnt will be
grabbed iff path->mnt has been changed.  follow_managed() and follow_automount()
keep track of whether such reference has been grabbed and assume that it'll
happen in those and only those cases that'll have us return with changed
path->mnt.  That assumption is almost correct - it breaks in case of
racing automounts and in even harder to hit race between following a mountpoint
and a couple of mount --move.  The thing is, we don't need to make that
assumption at all - after the end of loop in follow_manage() we can check
if path->mnt has ended up unchanged and do mntput() if needed.

The BUG can be reproduced with the following test program:

	#include <stdio.h>
	#include <sys/types.h>
	#include <sys/stat.h>
	#include <unistd.h>
	#include <sys/wait.h>
	int main(int argc, char **argv)
	{
		int pid, ws;
		struct stat buf;
		pid = fork();
		stat(argv[1], &buf);
		if (pid > 0) wait(&ws);
		return 0;
	}

and the following procedure:

 (1) Mount an NFS volume that on the server has something else mounted on a
     subdirectory.  For instance, I can mount / from my server:

	mount warthog:/ /mnt -t nfs4 -r

     On the server /data has another filesystem mounted on it, so NFS will see
     a change in FSID as it walks down the path, and will mark /mnt/data as
     being a mountpoint.  This will cause the automount code to be triggered.

     !!! Do not look inside the mounted fs at this point !!!

 (2) Run the above program on a file within the submount to generate two
     simultaneous automount requests:

	/tmp/forkstat /mnt/data/testfile

 (3) Unmount the automounted submount:

	umount /mnt/data

 (4) Unmount the original mount:

	umount /mnt

     At this point the kernel should throw a BUG with something like the
     following:

	BUG: Dentry ffff880032e3c5c0{i=2,n=} still in use (1) [unmount of nfs4 0:12]

Note that the bug appears on the root dentry of the original mount, not the
mountpoint and not the submount because sys_umount() hasn't got to its final
mntput_no_expire() yet, but this isn't so obvious from the call trace:

 [<ffffffff8117cd82>] shrink_dcache_for_umount+0x69/0x82
 [<ffffffff8116160e>] generic_shutdown_super+0x37/0x15b
 [<ffffffffa00fae56>] ? nfs_super_return_all_delegations+0x2e/0x1b1 [nfs]
 [<ffffffff811617f3>] kill_anon_super+0x1d/0x7e
 [<ffffffffa00d0be1>] nfs4_kill_super+0x60/0xb6 [nfs]
 [<ffffffff81161c17>] deactivate_locked_super+0x34/0x83
 [<ffffffff811629ff>] deactivate_super+0x6f/0x7b
 [<ffffffff81186261>] mntput_no_expire+0x18d/0x199
 [<ffffffff811862a8>] mntput+0x3b/0x44
 [<ffffffff81186d87>] release_mounts+0xa2/0xbf
 [<ffffffff811876af>] sys_umount+0x47a/0x4ba
 [<ffffffff8109e1ca>] ? trace_hardirqs_on_caller+0x1fd/0x22f
 [<ffffffff816ea86b>] system_call_fastpath+0x16/0x1b

as do_umount() is inlined.  However, you can see release_mounts() in there.

Note also that it may be necessary to have multiple CPU cores to be able to
trigger this bug.

Tested-by: Jeff Layton <jlayton@redhat.com>
Tested-by: Ian Kent <raven@themaw.net>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
1 parent 50338b8
Raw File
ff-core.c
/*
 *  Force feedback support for Linux input subsystem
 *
 *  Copyright (c) 2006 Anssi Hannula <anssi.hannula@gmail.com>
 *  Copyright (c) 2006 Dmitry Torokhov <dtor@mail.ru>
 */

/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */

/* #define DEBUG */

#define pr_fmt(fmt) KBUILD_BASENAME ": " fmt

#include <linux/input.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/slab.h>

/*
 * Check that the effect_id is a valid effect and whether the user
 * is the owner
 */
static int check_effect_access(struct ff_device *ff, int effect_id,
				struct file *file)
{
	if (effect_id < 0 || effect_id >= ff->max_effects ||
	    !ff->effect_owners[effect_id])
		return -EINVAL;

	if (file && ff->effect_owners[effect_id] != file)
		return -EACCES;

	return 0;
}

/*
 * Checks whether 2 effects can be combined together
 */
static inline int check_effects_compatible(struct ff_effect *e1,
					   struct ff_effect *e2)
{
	return e1->type == e2->type &&
	       (e1->type != FF_PERIODIC ||
		e1->u.periodic.waveform == e2->u.periodic.waveform);
}

/*
 * Convert an effect into compatible one
 */
static int compat_effect(struct ff_device *ff, struct ff_effect *effect)
{
	int magnitude;

	switch (effect->type) {
	case FF_RUMBLE:
		if (!test_bit(FF_PERIODIC, ff->ffbit))
			return -EINVAL;

		/*
		 * calculate manginude of sine wave as average of rumble's
		 * 2/3 of strong magnitude and 1/3 of weak magnitude
		 */
		magnitude = effect->u.rumble.strong_magnitude / 3 +
			    effect->u.rumble.weak_magnitude / 6;

		effect->type = FF_PERIODIC;
		effect->u.periodic.waveform = FF_SINE;
		effect->u.periodic.period = 50;
		effect->u.periodic.magnitude = max(magnitude, 0x7fff);
		effect->u.periodic.offset = 0;
		effect->u.periodic.phase = 0;
		effect->u.periodic.envelope.attack_length = 0;
		effect->u.periodic.envelope.attack_level = 0;
		effect->u.periodic.envelope.fade_length = 0;
		effect->u.periodic.envelope.fade_level = 0;

		return 0;

	default:
		/* Let driver handle conversion */
		return 0;
	}
}

/**
 * input_ff_upload() - upload effect into force-feedback device
 * @dev: input device
 * @effect: effect to be uploaded
 * @file: owner of the effect
 */
int input_ff_upload(struct input_dev *dev, struct ff_effect *effect,
		    struct file *file)
{
	struct ff_device *ff = dev->ff;
	struct ff_effect *old;
	int ret = 0;
	int id;

	if (!test_bit(EV_FF, dev->evbit))
		return -ENOSYS;

	if (effect->type < FF_EFFECT_MIN || effect->type > FF_EFFECT_MAX ||
	    !test_bit(effect->type, dev->ffbit)) {
		pr_debug("invalid or not supported effect type in upload\n");
		return -EINVAL;
	}

	if (effect->type == FF_PERIODIC &&
	    (effect->u.periodic.waveform < FF_WAVEFORM_MIN ||
	     effect->u.periodic.waveform > FF_WAVEFORM_MAX ||
	     !test_bit(effect->u.periodic.waveform, dev->ffbit))) {
		pr_debug("invalid or not supported wave form in upload\n");
		return -EINVAL;
	}

	if (!test_bit(effect->type, ff->ffbit)) {
		ret = compat_effect(ff, effect);
		if (ret)
			return ret;
	}

	mutex_lock(&ff->mutex);

	if (effect->id == -1) {
		for (id = 0; id < ff->max_effects; id++)
		     if (!ff->effect_owners[id])
			break;

		if (id >= ff->max_effects) {
			ret = -ENOSPC;
			goto out;
		}

		effect->id = id;
		old = NULL;

	} else {
		id = effect->id;

		ret = check_effect_access(ff, id, file);
		if (ret)
			goto out;

		old = &ff->effects[id];

		if (!check_effects_compatible(effect, old)) {
			ret = -EINVAL;
			goto out;
		}
	}

	ret = ff->upload(dev, effect, old);
	if (ret)
		goto out;

	spin_lock_irq(&dev->event_lock);
	ff->effects[id] = *effect;
	ff->effect_owners[id] = file;
	spin_unlock_irq(&dev->event_lock);

 out:
	mutex_unlock(&ff->mutex);
	return ret;
}
EXPORT_SYMBOL_GPL(input_ff_upload);

/*
 * Erases the effect if the requester is also the effect owner. The mutex
 * should already be locked before calling this function.
 */
static int erase_effect(struct input_dev *dev, int effect_id,
			struct file *file)
{
	struct ff_device *ff = dev->ff;
	int error;

	error = check_effect_access(ff, effect_id, file);
	if (error)
		return error;

	spin_lock_irq(&dev->event_lock);
	ff->playback(dev, effect_id, 0);
	ff->effect_owners[effect_id] = NULL;
	spin_unlock_irq(&dev->event_lock);

	if (ff->erase) {
		error = ff->erase(dev, effect_id);
		if (error) {
			spin_lock_irq(&dev->event_lock);
			ff->effect_owners[effect_id] = file;
			spin_unlock_irq(&dev->event_lock);

			return error;
		}
	}

	return 0;
}

/**
 * input_ff_erase - erase a force-feedback effect from device
 * @dev: input device to erase effect from
 * @effect_id: id of the ffect to be erased
 * @file: purported owner of the request
 *
 * This function erases a force-feedback effect from specified device.
 * The effect will only be erased if it was uploaded through the same
 * file handle that is requesting erase.
 */
int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file)
{
	struct ff_device *ff = dev->ff;
	int ret;

	if (!test_bit(EV_FF, dev->evbit))
		return -ENOSYS;

	mutex_lock(&ff->mutex);
	ret = erase_effect(dev, effect_id, file);
	mutex_unlock(&ff->mutex);

	return ret;
}
EXPORT_SYMBOL_GPL(input_ff_erase);

/*
 * flush_effects - erase all effects owned by a file handle
 */
static int flush_effects(struct input_dev *dev, struct file *file)
{
	struct ff_device *ff = dev->ff;
	int i;

	pr_debug("flushing now\n");

	mutex_lock(&ff->mutex);

	for (i = 0; i < ff->max_effects; i++)
		erase_effect(dev, i, file);

	mutex_unlock(&ff->mutex);

	return 0;
}

/**
 * input_ff_event() - generic handler for force-feedback events
 * @dev: input device to send the effect to
 * @type: event type (anything but EV_FF is ignored)
 * @code: event code
 * @value: event value
 */
int input_ff_event(struct input_dev *dev, unsigned int type,
		   unsigned int code, int value)
{
	struct ff_device *ff = dev->ff;

	if (type != EV_FF)
		return 0;

	switch (code) {
	case FF_GAIN:
		if (!test_bit(FF_GAIN, dev->ffbit) || value > 0xffff)
			break;

		ff->set_gain(dev, value);
		break;

	case FF_AUTOCENTER:
		if (!test_bit(FF_AUTOCENTER, dev->ffbit) || value > 0xffff)
			break;

		ff->set_autocenter(dev, value);
		break;

	default:
		if (check_effect_access(ff, code, NULL) == 0)
			ff->playback(dev, code, value);
		break;
	}

	return 0;
}
EXPORT_SYMBOL_GPL(input_ff_event);

/**
 * input_ff_create() - create force-feedback device
 * @dev: input device supporting force-feedback
 * @max_effects: maximum number of effects supported by the device
 *
 * This function allocates all necessary memory for a force feedback
 * portion of an input device and installs all default handlers.
 * @dev->ffbit should be already set up before calling this function.
 * Once ff device is created you need to setup its upload, erase,
 * playback and other handlers before registering input device
 */
int input_ff_create(struct input_dev *dev, int max_effects)
{
	struct ff_device *ff;
	int i;

	if (!max_effects) {
		pr_err("cannot allocate device without any effects\n");
		return -EINVAL;
	}

	ff = kzalloc(sizeof(struct ff_device) +
		     max_effects * sizeof(struct file *), GFP_KERNEL);
	if (!ff)
		return -ENOMEM;

	ff->effects = kcalloc(max_effects, sizeof(struct ff_effect),
			      GFP_KERNEL);
	if (!ff->effects) {
		kfree(ff);
		return -ENOMEM;
	}

	ff->max_effects = max_effects;
	mutex_init(&ff->mutex);

	dev->ff = ff;
	dev->flush = flush_effects;
	dev->event = input_ff_event;
	__set_bit(EV_FF, dev->evbit);

	/* Copy "true" bits into ff device bitmap */
	for (i = 0; i <= FF_MAX; i++)
		if (test_bit(i, dev->ffbit))
			__set_bit(i, ff->ffbit);

	/* we can emulate RUMBLE with periodic effects */
	if (test_bit(FF_PERIODIC, ff->ffbit))
		__set_bit(FF_RUMBLE, dev->ffbit);

	return 0;
}
EXPORT_SYMBOL_GPL(input_ff_create);

/**
 * input_ff_destroy() - frees force feedback portion of input device
 * @dev: input device supporting force feedback
 *
 * This function is only needed in error path as input core will
 * automatically free force feedback structures when device is
 * destroyed.
 */
void input_ff_destroy(struct input_dev *dev)
{
	struct ff_device *ff = dev->ff;

	__clear_bit(EV_FF, dev->evbit);
	if (ff) {
		if (ff->destroy)
			ff->destroy(ff);
		kfree(ff->private);
		kfree(ff->effects);
		kfree(ff);
		dev->ff = NULL;
	}
}
EXPORT_SYMBOL_GPL(input_ff_destroy);
back to top