Revision dbe409e4f5e5075bd9ff7f8dd5c627abf3ee38c1 authored by Mike Kravetz on 11 December 2015, 21:40:52 UTC, committed by Linus Torvalds on 12 December 2015, 18:15:34 UTC
Dmitry Vyukov reported the following memory leak

unreferenced object 0xffff88002eaafd88 (size 32):
  comm "a.out", pid 5063, jiffies 4295774645 (age 15.810s)
  hex dump (first 32 bytes):
    28 e9 4e 63 00 88 ff ff 28 e9 4e 63 00 88 ff ff  (.Nc....(.Nc....
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
     kmalloc include/linux/slab.h:458
     region_chg+0x2d4/0x6b0 mm/hugetlb.c:398
     __vma_reservation_common+0x2c3/0x390 mm/hugetlb.c:1791
     vma_needs_reservation mm/hugetlb.c:1813
     alloc_huge_page+0x19e/0xc70 mm/hugetlb.c:1845
     hugetlb_no_page mm/hugetlb.c:3543
     hugetlb_fault+0x7a1/0x1250 mm/hugetlb.c:3717
     follow_hugetlb_page+0x339/0xc70 mm/hugetlb.c:3880
     __get_user_pages+0x542/0xf30 mm/gup.c:497
     populate_vma_page_range+0xde/0x110 mm/gup.c:919
     __mm_populate+0x1c7/0x310 mm/gup.c:969
     do_mlock+0x291/0x360 mm/mlock.c:637
     SYSC_mlock2 mm/mlock.c:658
     SyS_mlock2+0x4b/0x70 mm/mlock.c:648

Dmitry identified a potential memory leak in the routine region_chg,
where a region descriptor is not free'ed on an error path.

However, the root cause for the above memory leak resides in region_del.
In this specific case, a "placeholder" entry is created in region_chg.
The associated page allocation fails, and the placeholder entry is left
in the reserve map.  This is "by design" as the entry should be deleted
when the map is released.  The bug is in the region_del routine which is
used to delete entries within a specific range (and when the map is
released).  region_del did not handle the case where a placeholder entry
exactly matched the start of the range range to be deleted.  In this
case, the entry would not be deleted and leaked.  The fix is to take
these special placeholder entries into account in region_del.

The region_chg error path leak is also fixed.

Fixes: feba16e25a57 ("mm/hugetlb: add region_del() to delete a specific range of entries")
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: <stable@vger.kernel.org>	[4.3+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 0d777df
Raw File
amigaffs.c
/*
 *  linux/fs/affs/amigaffs.c
 *
 *  (c) 1996  Hans-Joachim Widmaier - Rewritten
 *
 *  (C) 1993  Ray Burr - Amiga FFS filesystem.
 *
 *  Please send bug reports to: hjw@zvw.de
 */

#include "affs.h"

/*
 * Functions for accessing Amiga-FFS structures.
 */


/* Insert a header block bh into the directory dir
 * caller must hold AFFS_DIR->i_hash_lock!
 */

int
affs_insert_hash(struct inode *dir, struct buffer_head *bh)
{
	struct super_block *sb = dir->i_sb;
	struct buffer_head *dir_bh;
	u32 ino, hash_ino;
	int offset;

	ino = bh->b_blocknr;
	offset = affs_hash_name(sb, AFFS_TAIL(sb, bh)->name + 1, AFFS_TAIL(sb, bh)->name[0]);

	pr_debug("%s(dir=%lu, ino=%d)\n", __func__, dir->i_ino, ino);

	dir_bh = affs_bread(sb, dir->i_ino);
	if (!dir_bh)
		return -EIO;

	hash_ino = be32_to_cpu(AFFS_HEAD(dir_bh)->table[offset]);
	while (hash_ino) {
		affs_brelse(dir_bh);
		dir_bh = affs_bread(sb, hash_ino);
		if (!dir_bh)
			return -EIO;
		hash_ino = be32_to_cpu(AFFS_TAIL(sb, dir_bh)->hash_chain);
	}
	AFFS_TAIL(sb, bh)->parent = cpu_to_be32(dir->i_ino);
	AFFS_TAIL(sb, bh)->hash_chain = 0;
	affs_fix_checksum(sb, bh);

	if (dir->i_ino == dir_bh->b_blocknr)
		AFFS_HEAD(dir_bh)->table[offset] = cpu_to_be32(ino);
	else
		AFFS_TAIL(sb, dir_bh)->hash_chain = cpu_to_be32(ino);

	affs_adjust_checksum(dir_bh, ino);
	mark_buffer_dirty_inode(dir_bh, dir);
	affs_brelse(dir_bh);

	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
	dir->i_version++;
	mark_inode_dirty(dir);

	return 0;
}

/* Remove a header block from its directory.
 * caller must hold AFFS_DIR->i_hash_lock!
 */

int
affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh)
{
	struct super_block *sb;
	struct buffer_head *bh;
	u32 rem_ino, hash_ino;
	__be32 ino;
	int offset, retval;

	sb = dir->i_sb;
	rem_ino = rem_bh->b_blocknr;
	offset = affs_hash_name(sb, AFFS_TAIL(sb, rem_bh)->name+1, AFFS_TAIL(sb, rem_bh)->name[0]);
	pr_debug("%s(dir=%lu, ino=%d, hashval=%d)\n", __func__, dir->i_ino,
		 rem_ino, offset);

	bh = affs_bread(sb, dir->i_ino);
	if (!bh)
		return -EIO;

	retval = -ENOENT;
	hash_ino = be32_to_cpu(AFFS_HEAD(bh)->table[offset]);
	while (hash_ino) {
		if (hash_ino == rem_ino) {
			ino = AFFS_TAIL(sb, rem_bh)->hash_chain;
			if (dir->i_ino == bh->b_blocknr)
				AFFS_HEAD(bh)->table[offset] = ino;
			else
				AFFS_TAIL(sb, bh)->hash_chain = ino;
			affs_adjust_checksum(bh, be32_to_cpu(ino) - hash_ino);
			mark_buffer_dirty_inode(bh, dir);
			AFFS_TAIL(sb, rem_bh)->parent = 0;
			retval = 0;
			break;
		}
		affs_brelse(bh);
		bh = affs_bread(sb, hash_ino);
		if (!bh)
			return -EIO;
		hash_ino = be32_to_cpu(AFFS_TAIL(sb, bh)->hash_chain);
	}

	affs_brelse(bh);

	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
	dir->i_version++;
	mark_inode_dirty(dir);

	return retval;
}

static void
affs_fix_dcache(struct inode *inode, u32 entry_ino)
{
	struct dentry *dentry;
	spin_lock(&inode->i_lock);
	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
		if (entry_ino == (u32)(long)dentry->d_fsdata) {
			dentry->d_fsdata = (void *)inode->i_ino;
			break;
		}
	}
	spin_unlock(&inode->i_lock);
}


/* Remove header from link chain */

static int
affs_remove_link(struct dentry *dentry)
{
	struct inode *dir, *inode = d_inode(dentry);
	struct super_block *sb = inode->i_sb;
	struct buffer_head *bh, *link_bh = NULL;
	u32 link_ino, ino;
	int retval;

	pr_debug("%s(key=%ld)\n", __func__, inode->i_ino);
	retval = -EIO;
	bh = affs_bread(sb, inode->i_ino);
	if (!bh)
		goto done;

	link_ino = (u32)(long)dentry->d_fsdata;
	if (inode->i_ino == link_ino) {
		/* we can't remove the head of the link, as its blocknr is still used as ino,
		 * so we remove the block of the first link instead.
		 */ 
		link_ino = be32_to_cpu(AFFS_TAIL(sb, bh)->link_chain);
		link_bh = affs_bread(sb, link_ino);
		if (!link_bh)
			goto done;

		dir = affs_iget(sb, be32_to_cpu(AFFS_TAIL(sb, link_bh)->parent));
		if (IS_ERR(dir)) {
			retval = PTR_ERR(dir);
			goto done;
		}

		affs_lock_dir(dir);
		/*
		 * if there's a dentry for that block, make it
		 * refer to inode itself.
		 */
		affs_fix_dcache(inode, link_ino);
		retval = affs_remove_hash(dir, link_bh);
		if (retval) {
			affs_unlock_dir(dir);
			goto done;
		}
		mark_buffer_dirty_inode(link_bh, inode);

		memcpy(AFFS_TAIL(sb, bh)->name, AFFS_TAIL(sb, link_bh)->name, 32);
		retval = affs_insert_hash(dir, bh);
		if (retval) {
			affs_unlock_dir(dir);
			goto done;
		}
		mark_buffer_dirty_inode(bh, inode);

		affs_unlock_dir(dir);
		iput(dir);
	} else {
		link_bh = affs_bread(sb, link_ino);
		if (!link_bh)
			goto done;
	}

	while ((ino = be32_to_cpu(AFFS_TAIL(sb, bh)->link_chain)) != 0) {
		if (ino == link_ino) {
			__be32 ino2 = AFFS_TAIL(sb, link_bh)->link_chain;
			AFFS_TAIL(sb, bh)->link_chain = ino2;
			affs_adjust_checksum(bh, be32_to_cpu(ino2) - link_ino);
			mark_buffer_dirty_inode(bh, inode);
			retval = 0;
			/* Fix the link count, if bh is a normal header block without links */
			switch (be32_to_cpu(AFFS_TAIL(sb, bh)->stype)) {
			case ST_LINKDIR:
			case ST_LINKFILE:
				break;
			default:
				if (!AFFS_TAIL(sb, bh)->link_chain)
					set_nlink(inode, 1);
			}
			affs_free_block(sb, link_ino);
			goto done;
		}
		affs_brelse(bh);
		bh = affs_bread(sb, ino);
		if (!bh)
			goto done;
	}
	retval = -ENOENT;
done:
	affs_brelse(link_bh);
	affs_brelse(bh);
	return retval;
}


static int
affs_empty_dir(struct inode *inode)
{
	struct super_block *sb = inode->i_sb;
	struct buffer_head *bh;
	int retval, size;

	retval = -EIO;
	bh = affs_bread(sb, inode->i_ino);
	if (!bh)
		goto done;

	retval = -ENOTEMPTY;
	for (size = AFFS_SB(sb)->s_hashsize - 1; size >= 0; size--)
		if (AFFS_HEAD(bh)->table[size])
			goto not_empty;
	retval = 0;
not_empty:
	affs_brelse(bh);
done:
	return retval;
}


/* Remove a filesystem object. If the object to be removed has
 * links to it, one of the links must be changed to inherit
 * the file or directory. As above, any inode will do.
 * The buffer will not be freed. If the header is a link, the
 * block will be marked as free.
 * This function returns a negative error number in case of
 * an error, else 0 if the inode is to be deleted or 1 if not.
 */

int
affs_remove_header(struct dentry *dentry)
{
	struct super_block *sb;
	struct inode *inode, *dir;
	struct buffer_head *bh = NULL;
	int retval;

	dir = d_inode(dentry->d_parent);
	sb = dir->i_sb;

	retval = -ENOENT;
	inode = d_inode(dentry);
	if (!inode)
		goto done;

	pr_debug("%s(key=%ld)\n", __func__, inode->i_ino);
	retval = -EIO;
	bh = affs_bread(sb, (u32)(long)dentry->d_fsdata);
	if (!bh)
		goto done;

	affs_lock_link(inode);
	affs_lock_dir(dir);
	switch (be32_to_cpu(AFFS_TAIL(sb, bh)->stype)) {
	case ST_USERDIR:
		/* if we ever want to support links to dirs
		 * i_hash_lock of the inode must only be
		 * taken after some checks
		 */
		affs_lock_dir(inode);
		retval = affs_empty_dir(inode);
		affs_unlock_dir(inode);
		if (retval)
			goto done_unlock;
		break;
	default:
		break;
	}

	retval = affs_remove_hash(dir, bh);
	if (retval)
		goto done_unlock;
	mark_buffer_dirty_inode(bh, inode);

	affs_unlock_dir(dir);

	if (inode->i_nlink > 1)
		retval = affs_remove_link(dentry);
	else
		clear_nlink(inode);
	affs_unlock_link(inode);
	inode->i_ctime = CURRENT_TIME_SEC;
	mark_inode_dirty(inode);

done:
	affs_brelse(bh);
	return retval;

done_unlock:
	affs_unlock_dir(dir);
	affs_unlock_link(inode);
	goto done;
}

/* Checksum a block, do various consistency checks and optionally return
   the blocks type number.  DATA points to the block.  If their pointers
   are non-null, *PTYPE and *STYPE are set to the primary and secondary
   block types respectively, *HASHSIZE is set to the size of the hashtable
   (which lets us calculate the block size).
   Returns non-zero if the block is not consistent. */

u32
affs_checksum_block(struct super_block *sb, struct buffer_head *bh)
{
	__be32 *ptr = (__be32 *)bh->b_data;
	u32 sum;
	int bsize;

	sum = 0;
	for (bsize = sb->s_blocksize / sizeof(__be32); bsize > 0; bsize--)
		sum += be32_to_cpu(*ptr++);
	return sum;
}

/*
 * Calculate the checksum of a disk block and store it
 * at the indicated position.
 */

void
affs_fix_checksum(struct super_block *sb, struct buffer_head *bh)
{
	int cnt = sb->s_blocksize / sizeof(__be32);
	__be32 *ptr = (__be32 *)bh->b_data;
	u32 checksum;
	__be32 *checksumptr;

	checksumptr = ptr + 5;
	*checksumptr = 0;
	for (checksum = 0; cnt > 0; ptr++, cnt--)
		checksum += be32_to_cpu(*ptr);
	*checksumptr = cpu_to_be32(-checksum);
}

void
secs_to_datestamp(time_t secs, struct affs_date *ds)
{
	u32	 days;
	u32	 minute;

	secs -= sys_tz.tz_minuteswest * 60 + ((8 * 365 + 2) * 24 * 60 * 60);
	if (secs < 0)
		secs = 0;
	days    = secs / 86400;
	secs   -= days * 86400;
	minute  = secs / 60;
	secs   -= minute * 60;

	ds->days = cpu_to_be32(days);
	ds->mins = cpu_to_be32(minute);
	ds->ticks = cpu_to_be32(secs * 50);
}

umode_t
prot_to_mode(u32 prot)
{
	umode_t mode = 0;

	if (!(prot & FIBF_NOWRITE))
		mode |= S_IWUSR;
	if (!(prot & FIBF_NOREAD))
		mode |= S_IRUSR;
	if (!(prot & FIBF_NOEXECUTE))
		mode |= S_IXUSR;
	if (prot & FIBF_GRP_WRITE)
		mode |= S_IWGRP;
	if (prot & FIBF_GRP_READ)
		mode |= S_IRGRP;
	if (prot & FIBF_GRP_EXECUTE)
		mode |= S_IXGRP;
	if (prot & FIBF_OTR_WRITE)
		mode |= S_IWOTH;
	if (prot & FIBF_OTR_READ)
		mode |= S_IROTH;
	if (prot & FIBF_OTR_EXECUTE)
		mode |= S_IXOTH;

	return mode;
}

void
mode_to_prot(struct inode *inode)
{
	u32 prot = AFFS_I(inode)->i_protect;
	umode_t mode = inode->i_mode;

	if (!(mode & S_IXUSR))
		prot |= FIBF_NOEXECUTE;
	if (!(mode & S_IRUSR))
		prot |= FIBF_NOREAD;
	if (!(mode & S_IWUSR))
		prot |= FIBF_NOWRITE;
	if (mode & S_IXGRP)
		prot |= FIBF_GRP_EXECUTE;
	if (mode & S_IRGRP)
		prot |= FIBF_GRP_READ;
	if (mode & S_IWGRP)
		prot |= FIBF_GRP_WRITE;
	if (mode & S_IXOTH)
		prot |= FIBF_OTR_EXECUTE;
	if (mode & S_IROTH)
		prot |= FIBF_OTR_READ;
	if (mode & S_IWOTH)
		prot |= FIBF_OTR_WRITE;

	AFFS_I(inode)->i_protect = prot;
}

void
affs_error(struct super_block *sb, const char *function, const char *fmt, ...)
{
	struct va_format vaf;
	va_list args;

	va_start(args, fmt);
	vaf.fmt = fmt;
	vaf.va = &args;
	pr_crit("error (device %s): %s(): %pV\n", sb->s_id, function, &vaf);
	if (!(sb->s_flags & MS_RDONLY))
		pr_warn("Remounting filesystem read-only\n");
	sb->s_flags |= MS_RDONLY;
	va_end(args);
}

void
affs_warning(struct super_block *sb, const char *function, const char *fmt, ...)
{
	struct va_format vaf;
	va_list args;

	va_start(args, fmt);
	vaf.fmt = fmt;
	vaf.va = &args;
	pr_warn("(device %s): %s(): %pV\n", sb->s_id, function, &vaf);
	va_end(args);
}

bool
affs_nofilenametruncate(const struct dentry *dentry)
{
	struct inode *inode = d_inode(dentry);

	return affs_test_opt(AFFS_SB(inode->i_sb)->s_flags, SF_NO_TRUNCATE);
}

/* Check if the name is valid for a affs object. */

int
affs_check_name(const unsigned char *name, int len, bool notruncate)
{
	int	 i;

	if (len > AFFSNAMEMAX) {
		if (notruncate)
			return -ENAMETOOLONG;
		len = AFFSNAMEMAX;
	}
	for (i = 0; i < len; i++) {
		if (name[i] < ' ' || name[i] == ':'
		    || (name[i] > 0x7e && name[i] < 0xa0))
			return -EINVAL;
	}

	return 0;
}

/* This function copies name to bstr, with at most 30
 * characters length. The bstr will be prepended by
 * a length byte.
 * NOTE: The name will must be already checked by
 *       affs_check_name()!
 */

int
affs_copy_name(unsigned char *bstr, struct dentry *dentry)
{
	u32 len = min(dentry->d_name.len, AFFSNAMEMAX);

	*bstr++ = len;
	memcpy(bstr, dentry->d_name.name, len);
	return len;
}
back to top