Revision 8aef18845266f5c05904c610088f2d1ed58f6be3 authored by Al Viro on 16 June 2011, 14:10:06 UTC, committed by Al Viro on 16 June 2011, 15:28:16 UTC
[Kudos to dhowells for tracking that crap down] If two processes attempt to cause automounting on the same mountpoint at the same time, the vfsmount holding the mountpoint will be left with one too few references on it, causing a BUG when the kernel tries to clean up. The problem is that lock_mount() drops the caller's reference to the mountpoint's vfsmount in the case where it finds something already mounted on the mountpoint as it transits to the mounted filesystem and replaces path->mnt with the new mountpoint vfsmount. During a pathwalk, however, we don't take a reference on the vfsmount if it is the same as the one in the nameidata struct, but do_add_mount() doesn't know this. The fix is to make sure we have a ref on the vfsmount of the mountpoint before calling do_add_mount(). However, if lock_mount() doesn't transit, we're then left with an extra ref on the mountpoint vfsmount which needs releasing. We can handle that in follow_managed() by not making assumptions about what we can and what we cannot get from lookup_mnt() as the current code does. The callers of follow_managed() expect that reference to path->mnt will be grabbed iff path->mnt has been changed. follow_managed() and follow_automount() keep track of whether such reference has been grabbed and assume that it'll happen in those and only those cases that'll have us return with changed path->mnt. That assumption is almost correct - it breaks in case of racing automounts and in even harder to hit race between following a mountpoint and a couple of mount --move. The thing is, we don't need to make that assumption at all - after the end of loop in follow_manage() we can check if path->mnt has ended up unchanged and do mntput() if needed. The BUG can be reproduced with the following test program: #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> #include <sys/wait.h> int main(int argc, char **argv) { int pid, ws; struct stat buf; pid = fork(); stat(argv[1], &buf); if (pid > 0) wait(&ws); return 0; } and the following procedure: (1) Mount an NFS volume that on the server has something else mounted on a subdirectory. For instance, I can mount / from my server: mount warthog:/ /mnt -t nfs4 -r On the server /data has another filesystem mounted on it, so NFS will see a change in FSID as it walks down the path, and will mark /mnt/data as being a mountpoint. This will cause the automount code to be triggered. !!! Do not look inside the mounted fs at this point !!! (2) Run the above program on a file within the submount to generate two simultaneous automount requests: /tmp/forkstat /mnt/data/testfile (3) Unmount the automounted submount: umount /mnt/data (4) Unmount the original mount: umount /mnt At this point the kernel should throw a BUG with something like the following: BUG: Dentry ffff880032e3c5c0{i=2,n=} still in use (1) [unmount of nfs4 0:12] Note that the bug appears on the root dentry of the original mount, not the mountpoint and not the submount because sys_umount() hasn't got to its final mntput_no_expire() yet, but this isn't so obvious from the call trace: [<ffffffff8117cd82>] shrink_dcache_for_umount+0x69/0x82 [<ffffffff8116160e>] generic_shutdown_super+0x37/0x15b [<ffffffffa00fae56>] ? nfs_super_return_all_delegations+0x2e/0x1b1 [nfs] [<ffffffff811617f3>] kill_anon_super+0x1d/0x7e [<ffffffffa00d0be1>] nfs4_kill_super+0x60/0xb6 [nfs] [<ffffffff81161c17>] deactivate_locked_super+0x34/0x83 [<ffffffff811629ff>] deactivate_super+0x6f/0x7b [<ffffffff81186261>] mntput_no_expire+0x18d/0x199 [<ffffffff811862a8>] mntput+0x3b/0x44 [<ffffffff81186d87>] release_mounts+0xa2/0xbf [<ffffffff811876af>] sys_umount+0x47a/0x4ba [<ffffffff8109e1ca>] ? trace_hardirqs_on_caller+0x1fd/0x22f [<ffffffff816ea86b>] system_call_fastpath+0x16/0x1b as do_umount() is inlined. However, you can see release_mounts() in there. Note also that it may be necessary to have multiple CPU cores to be able to trigger this bug. Tested-by: Jeff Layton <jlayton@redhat.com> Tested-by: Ian Kent <raven@themaw.net> Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
1 parent 50338b8
icmp_socket.c
/*
* Copyright (C) 2007-2011 B.A.T.M.A.N. contributors:
*
* Marek Lindner
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA
*
*/
#include "main.h"
#include <linux/debugfs.h>
#include <linux/slab.h>
#include "icmp_socket.h"
#include "send.h"
#include "hash.h"
#include "originator.h"
#include "hard-interface.h"
static struct socket_client *socket_client_hash[256];
static void bat_socket_add_packet(struct socket_client *socket_client,
struct icmp_packet_rr *icmp_packet,
size_t icmp_len);
void bat_socket_init(void)
{
memset(socket_client_hash, 0, sizeof(socket_client_hash));
}
static int bat_socket_open(struct inode *inode, struct file *file)
{
unsigned int i;
struct socket_client *socket_client;
nonseekable_open(inode, file);
socket_client = kmalloc(sizeof(struct socket_client), GFP_KERNEL);
if (!socket_client)
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(socket_client_hash); i++) {
if (!socket_client_hash[i]) {
socket_client_hash[i] = socket_client;
break;
}
}
if (i == ARRAY_SIZE(socket_client_hash)) {
pr_err("Error - can't add another packet client: "
"maximum number of clients reached\n");
kfree(socket_client);
return -EXFULL;
}
INIT_LIST_HEAD(&socket_client->queue_list);
socket_client->queue_len = 0;
socket_client->index = i;
socket_client->bat_priv = inode->i_private;
spin_lock_init(&socket_client->lock);
init_waitqueue_head(&socket_client->queue_wait);
file->private_data = socket_client;
inc_module_count();
return 0;
}
static int bat_socket_release(struct inode *inode, struct file *file)
{
struct socket_client *socket_client = file->private_data;
struct socket_packet *socket_packet;
struct list_head *list_pos, *list_pos_tmp;
spin_lock_bh(&socket_client->lock);
/* for all packets in the queue ... */
list_for_each_safe(list_pos, list_pos_tmp, &socket_client->queue_list) {
socket_packet = list_entry(list_pos,
struct socket_packet, list);
list_del(list_pos);
kfree(socket_packet);
}
socket_client_hash[socket_client->index] = NULL;
spin_unlock_bh(&socket_client->lock);
kfree(socket_client);
dec_module_count();
return 0;
}
static ssize_t bat_socket_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
struct socket_client *socket_client = file->private_data;
struct socket_packet *socket_packet;
size_t packet_len;
int error;
if ((file->f_flags & O_NONBLOCK) && (socket_client->queue_len == 0))
return -EAGAIN;
if ((!buf) || (count < sizeof(struct icmp_packet)))
return -EINVAL;
if (!access_ok(VERIFY_WRITE, buf, count))
return -EFAULT;
error = wait_event_interruptible(socket_client->queue_wait,
socket_client->queue_len);
if (error)
return error;
spin_lock_bh(&socket_client->lock);
socket_packet = list_first_entry(&socket_client->queue_list,
struct socket_packet, list);
list_del(&socket_packet->list);
socket_client->queue_len--;
spin_unlock_bh(&socket_client->lock);
error = __copy_to_user(buf, &socket_packet->icmp_packet,
socket_packet->icmp_len);
packet_len = socket_packet->icmp_len;
kfree(socket_packet);
if (error)
return -EFAULT;
return packet_len;
}
static ssize_t bat_socket_write(struct file *file, const char __user *buff,
size_t len, loff_t *off)
{
struct socket_client *socket_client = file->private_data;
struct bat_priv *bat_priv = socket_client->bat_priv;
struct hard_iface *primary_if = NULL;
struct sk_buff *skb;
struct icmp_packet_rr *icmp_packet;
struct orig_node *orig_node = NULL;
struct neigh_node *neigh_node = NULL;
size_t packet_len = sizeof(struct icmp_packet);
if (len < sizeof(struct icmp_packet)) {
bat_dbg(DBG_BATMAN, bat_priv,
"Error - can't send packet from char device: "
"invalid packet size\n");
return -EINVAL;
}
primary_if = primary_if_get_selected(bat_priv);
if (!primary_if) {
len = -EFAULT;
goto out;
}
if (len >= sizeof(struct icmp_packet_rr))
packet_len = sizeof(struct icmp_packet_rr);
skb = dev_alloc_skb(packet_len + sizeof(struct ethhdr));
if (!skb) {
len = -ENOMEM;
goto out;
}
skb_reserve(skb, sizeof(struct ethhdr));
icmp_packet = (struct icmp_packet_rr *)skb_put(skb, packet_len);
if (!access_ok(VERIFY_READ, buff, packet_len)) {
len = -EFAULT;
goto free_skb;
}
if (__copy_from_user(icmp_packet, buff, packet_len)) {
len = -EFAULT;
goto free_skb;
}
if (icmp_packet->packet_type != BAT_ICMP) {
bat_dbg(DBG_BATMAN, bat_priv,
"Error - can't send packet from char device: "
"got bogus packet type (expected: BAT_ICMP)\n");
len = -EINVAL;
goto free_skb;
}
if (icmp_packet->msg_type != ECHO_REQUEST) {
bat_dbg(DBG_BATMAN, bat_priv,
"Error - can't send packet from char device: "
"got bogus message type (expected: ECHO_REQUEST)\n");
len = -EINVAL;
goto free_skb;
}
icmp_packet->uid = socket_client->index;
if (icmp_packet->version != COMPAT_VERSION) {
icmp_packet->msg_type = PARAMETER_PROBLEM;
icmp_packet->ttl = COMPAT_VERSION;
bat_socket_add_packet(socket_client, icmp_packet, packet_len);
goto free_skb;
}
if (atomic_read(&bat_priv->mesh_state) != MESH_ACTIVE)
goto dst_unreach;
orig_node = orig_hash_find(bat_priv, icmp_packet->dst);
if (!orig_node)
goto dst_unreach;
neigh_node = orig_node_get_router(orig_node);
if (!neigh_node)
goto dst_unreach;
if (!neigh_node->if_incoming)
goto dst_unreach;
if (neigh_node->if_incoming->if_status != IF_ACTIVE)
goto dst_unreach;
memcpy(icmp_packet->orig,
primary_if->net_dev->dev_addr, ETH_ALEN);
if (packet_len == sizeof(struct icmp_packet_rr))
memcpy(icmp_packet->rr,
neigh_node->if_incoming->net_dev->dev_addr, ETH_ALEN);
send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr);
goto out;
dst_unreach:
icmp_packet->msg_type = DESTINATION_UNREACHABLE;
bat_socket_add_packet(socket_client, icmp_packet, packet_len);
free_skb:
kfree_skb(skb);
out:
if (primary_if)
hardif_free_ref(primary_if);
if (neigh_node)
neigh_node_free_ref(neigh_node);
if (orig_node)
orig_node_free_ref(orig_node);
return len;
}
static unsigned int bat_socket_poll(struct file *file, poll_table *wait)
{
struct socket_client *socket_client = file->private_data;
poll_wait(file, &socket_client->queue_wait, wait);
if (socket_client->queue_len > 0)
return POLLIN | POLLRDNORM;
return 0;
}
static const struct file_operations fops = {
.owner = THIS_MODULE,
.open = bat_socket_open,
.release = bat_socket_release,
.read = bat_socket_read,
.write = bat_socket_write,
.poll = bat_socket_poll,
.llseek = no_llseek,
};
int bat_socket_setup(struct bat_priv *bat_priv)
{
struct dentry *d;
if (!bat_priv->debug_dir)
goto err;
d = debugfs_create_file(ICMP_SOCKET, S_IFREG | S_IWUSR | S_IRUSR,
bat_priv->debug_dir, bat_priv, &fops);
if (d)
goto err;
return 0;
err:
return 1;
}
static void bat_socket_add_packet(struct socket_client *socket_client,
struct icmp_packet_rr *icmp_packet,
size_t icmp_len)
{
struct socket_packet *socket_packet;
socket_packet = kmalloc(sizeof(struct socket_packet), GFP_ATOMIC);
if (!socket_packet)
return;
INIT_LIST_HEAD(&socket_packet->list);
memcpy(&socket_packet->icmp_packet, icmp_packet, icmp_len);
socket_packet->icmp_len = icmp_len;
spin_lock_bh(&socket_client->lock);
/* while waiting for the lock the socket_client could have been
* deleted */
if (!socket_client_hash[icmp_packet->uid]) {
spin_unlock_bh(&socket_client->lock);
kfree(socket_packet);
return;
}
list_add_tail(&socket_packet->list, &socket_client->queue_list);
socket_client->queue_len++;
if (socket_client->queue_len > 100) {
socket_packet = list_first_entry(&socket_client->queue_list,
struct socket_packet, list);
list_del(&socket_packet->list);
kfree(socket_packet);
socket_client->queue_len--;
}
spin_unlock_bh(&socket_client->lock);
wake_up(&socket_client->queue_wait);
}
void bat_socket_receive_packet(struct icmp_packet_rr *icmp_packet,
size_t icmp_len)
{
struct socket_client *hash = socket_client_hash[icmp_packet->uid];
if (hash)
bat_socket_add_packet(hash, icmp_packet, icmp_len);
}
Computing file changes ...