Revision 75a6f82a0d10ef8f13cd8fe7212911a0252ab99e authored by Al Viro on 08 July 2015, 01:42:38 UTC, committed by Al Viro on 12 July 2015, 15:27:04 UTC
	Normally opening a file, unlinking it and then closing will have
the inode freed upon close() (provided that it's not otherwise busy and
has no remaining links, of course).  However, there's one case where that
does *not* happen.  Namely, if you open it by fhandle with cold dcache,
then unlink() and close().

	In normal case you get d_delete() in unlink(2) notice that dentry
is busy and unhash it; on the final dput() it will be forcibly evicted from
dcache, triggering iput() and inode removal.  In this case, though, we end
up with *two* dentries - disconnected (created by open-by-fhandle) and
regular one (used by unlink()).  The latter will have its reference to inode
dropped just fine, but the former will not - it's considered hashed (it
is on the ->s_anon list), so it will stay around until the memory pressure
will finally do it in.  As the result, we have the final iput() delayed
indefinitely.  It's trivial to reproduce -

void flush_dcache(void)
{
        system("mount -o remount,rw /");
}

static char buf[20 * 1024 * 1024];

main()
{
        int fd;
        union {
                struct file_handle f;
                char buf[MAX_HANDLE_SZ];
        } x;
        int m;

        x.f.handle_bytes = sizeof(x);
        chdir("/root");
        mkdir("foo", 0700);
        fd = open("foo/bar", O_CREAT | O_RDWR, 0600);
        close(fd);
        name_to_handle_at(AT_FDCWD, "foo/bar", &x.f, &m, 0);
        flush_dcache();
        fd = open_by_handle_at(AT_FDCWD, &x.f, O_RDWR);
        unlink("foo/bar");
        write(fd, buf, sizeof(buf));
        system("df .");			/* 20Mb eaten */
        close(fd);
        system("df .");			/* should've freed those 20Mb */
        flush_dcache();
        system("df .");			/* should be the same as #2 */
}

will spit out something like
Filesystem     1K-blocks   Used Available Use% Mounted on
/dev/root         322023 303843      1131 100% /
Filesystem     1K-blocks   Used Available Use% Mounted on
/dev/root         322023 303843      1131 100% /
Filesystem     1K-blocks   Used Available Use% Mounted on
/dev/root         322023 283282     21692  93% /
- inode gets freed only when dentry is finally evicted (here we trigger
than by remount; normally it would've happened in response to memory
pressure hell knows when).

Cc: stable@vger.kernel.org # v2.6.38+; earlier ones need s/kill_it/unhash_it/
Acked-by: J. Bruce Fields <bfields@fieldses.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
1 parent 9391dd0
Raw File
xt_HL.c
/*
 * TTL modification target for IP tables
 * (C) 2000,2005 by Harald Welte <laforge@netfilter.org>
 *
 * Hop Limit modification target for ip6tables
 * Maciej Soltysiak <solt@dns.toxicfilms.tv>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <net/checksum.h>

#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ipt_TTL.h>
#include <linux/netfilter_ipv6/ip6t_HL.h>

MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
MODULE_DESCRIPTION("Xtables: Hoplimit/TTL Limit field modification target");
MODULE_LICENSE("GPL");

static unsigned int
ttl_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
	struct iphdr *iph;
	const struct ipt_TTL_info *info = par->targinfo;
	int new_ttl;

	if (!skb_make_writable(skb, skb->len))
		return NF_DROP;

	iph = ip_hdr(skb);

	switch (info->mode) {
	case IPT_TTL_SET:
		new_ttl = info->ttl;
		break;
	case IPT_TTL_INC:
		new_ttl = iph->ttl + info->ttl;
		if (new_ttl > 255)
			new_ttl = 255;
		break;
	case IPT_TTL_DEC:
		new_ttl = iph->ttl - info->ttl;
		if (new_ttl < 0)
			new_ttl = 0;
		break;
	default:
		new_ttl = iph->ttl;
		break;
	}

	if (new_ttl != iph->ttl) {
		csum_replace2(&iph->check, htons(iph->ttl << 8),
					   htons(new_ttl << 8));
		iph->ttl = new_ttl;
	}

	return XT_CONTINUE;
}

static unsigned int
hl_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
	struct ipv6hdr *ip6h;
	const struct ip6t_HL_info *info = par->targinfo;
	int new_hl;

	if (!skb_make_writable(skb, skb->len))
		return NF_DROP;

	ip6h = ipv6_hdr(skb);

	switch (info->mode) {
	case IP6T_HL_SET:
		new_hl = info->hop_limit;
		break;
	case IP6T_HL_INC:
		new_hl = ip6h->hop_limit + info->hop_limit;
		if (new_hl > 255)
			new_hl = 255;
		break;
	case IP6T_HL_DEC:
		new_hl = ip6h->hop_limit - info->hop_limit;
		if (new_hl < 0)
			new_hl = 0;
		break;
	default:
		new_hl = ip6h->hop_limit;
		break;
	}

	ip6h->hop_limit = new_hl;

	return XT_CONTINUE;
}

static int ttl_tg_check(const struct xt_tgchk_param *par)
{
	const struct ipt_TTL_info *info = par->targinfo;

	if (info->mode > IPT_TTL_MAXMODE) {
		pr_info("TTL: invalid or unknown mode %u\n", info->mode);
		return -EINVAL;
	}
	if (info->mode != IPT_TTL_SET && info->ttl == 0)
		return -EINVAL;
	return 0;
}

static int hl_tg6_check(const struct xt_tgchk_param *par)
{
	const struct ip6t_HL_info *info = par->targinfo;

	if (info->mode > IP6T_HL_MAXMODE) {
		pr_info("invalid or unknown mode %u\n", info->mode);
		return -EINVAL;
	}
	if (info->mode != IP6T_HL_SET && info->hop_limit == 0) {
		pr_info("increment/decrement does not "
			"make sense with value 0\n");
		return -EINVAL;
	}
	return 0;
}

static struct xt_target hl_tg_reg[] __read_mostly = {
	{
		.name       = "TTL",
		.revision   = 0,
		.family     = NFPROTO_IPV4,
		.target     = ttl_tg,
		.targetsize = sizeof(struct ipt_TTL_info),
		.table      = "mangle",
		.checkentry = ttl_tg_check,
		.me         = THIS_MODULE,
	},
	{
		.name       = "HL",
		.revision   = 0,
		.family     = NFPROTO_IPV6,
		.target     = hl_tg6,
		.targetsize = sizeof(struct ip6t_HL_info),
		.table      = "mangle",
		.checkentry = hl_tg6_check,
		.me         = THIS_MODULE,
	},
};

static int __init hl_tg_init(void)
{
	return xt_register_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg));
}

static void __exit hl_tg_exit(void)
{
	xt_unregister_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg));
}

module_init(hl_tg_init);
module_exit(hl_tg_exit);
MODULE_ALIAS("ipt_TTL");
MODULE_ALIAS("ip6t_HL");
back to top