Revision 924d8696751c4b9e58263bc82efdafcf875596a6 authored by James Morse on 16 August 2016, 09:46:38 UTC, committed by Rafael J. Wysocki on 16 August 2016, 11:16:36 UTC
rtree_next_node() walks the linked list of leaf nodes to find the next
block of pages in the struct memory_bitmap. If it walks off the end of
the list of nodes, it walks the list of memory zones to find the next
region of memory. If it walks off the end of the list of zones, it
returns false.

This leaves the struct bm_position's node and zone pointers pointing
at their respective struct list_heads in struct mem_zone_bm_rtree.

memory_bm_find_bit() uses struct bm_position's node and zone pointers
to avoid walking lists and trees if the next bit appears in the same
node/zone. It handles these values being stale.

Swap rtree_next_node()s 'step then test' to 'test-next then step',
this means if we reach the end of memory we return false and leave
the node and zone pointers as they were.

This fixes a panic on resume using AMD Seattle with 64K pages:
[    6.868732] Freezing user space processes ... (elapsed 0.000 seconds) done.
[    6.875753] Double checking all user space processes after OOM killer disable... (elapsed 0.000 seconds)
[    6.896453] PM: Using 3 thread(s) for decompression.
[    6.896453] PM: Loading and decompressing image data (5339 pages)...
[    7.318890] PM: Image loading progress:   0%
[    7.323395] Unable to handle kernel paging request at virtual address 00800040
[    7.330611] pgd = ffff000008df0000
[    7.334003] [00800040] *pgd=00000083fffe0003, *pud=00000083fffe0003, *pmd=00000083fffd0003, *pte=0000000000000000
[    7.344266] Internal error: Oops: 96000005 [#1] PREEMPT SMP
[    7.349825] Modules linked in:
[    7.352871] CPU: 2 PID: 1 Comm: swapper/0 Tainted: G        W I     4.8.0-rc1 #4737
[    7.360512] Hardware name: AMD Overdrive/Supercharger/Default string, BIOS ROD1002C 04/08/2016
[    7.369109] task: ffff8003c0220000 task.stack: ffff8003c0280000
[    7.375020] PC is at set_bit+0x18/0x30
[    7.378758] LR is at memory_bm_set_bit+0x24/0x30
[    7.383362] pc : [<ffff00000835bbc8>] lr : [<ffff0000080faf18>] pstate: 60000045
[    7.390743] sp : ffff8003c0283b00
[    7.473551]
[    7.475031] Process swapper/0 (pid: 1, stack limit = 0xffff8003c0280020)
[    7.481718] Stack: (0xffff8003c0283b00 to 0xffff8003c0284000)
[    7.800075] Call trace:
[    7.887097] [<ffff00000835bbc8>] set_bit+0x18/0x30
[    7.891876] [<ffff0000080fb038>] duplicate_memory_bitmap.constprop.38+0x54/0x70
[    7.899172] [<ffff0000080fcc40>] snapshot_write_next+0x22c/0x47c
[    7.905166] [<ffff0000080fe1b4>] load_image_lzo+0x754/0xa88
[    7.910725] [<ffff0000080ff0a8>] swsusp_read+0x144/0x230
[    7.916025] [<ffff0000080fa338>] load_image_and_restore+0x58/0x90
[    7.922105] [<ffff0000080fa660>] software_resume+0x2f0/0x338
[    7.927752] [<ffff000008083350>] do_one_initcall+0x38/0x11c
[    7.933314] [<ffff000008b40cc0>] kernel_init_freeable+0x14c/0x1ec
[    7.939395] [<ffff0000087ce564>] kernel_init+0x10/0xfc
[    7.944520] [<ffff000008082e90>] ret_from_fork+0x10/0x40
[    7.949820] Code: d2800022 8b400c21 f9800031 9ac32043 (c85f7c22)
[    7.955909] ---[ end trace 0024a5986e6ff323 ]---
[    7.960529] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b

Here struct mem_zone_bm_rtree's start_pfn has been returned instead of
struct rtree_node's addr as the node/zone pointers are corrupt after
we walked off the end of the lists during mark_unsafe_pages().

This behaviour was exposed by commit 6dbecfd345a6 ("PM / hibernate:
Simplify mark_unsafe_pages()"), which caused mark_unsafe_pages() to call
duplicate_memory_bitmap(), which uses memory_bm_find_bit() after walking
off the end of the memory bitmap.

Fixes: 3a20cb177961 (PM / Hibernate: Implement position keeping in radix tree)
Signed-off-by: James Morse <james.morse@arm.com>
[ rjw: Subject ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
1 parent 5d87f49
Raw File
inode.c
/*
 *  inode.c - securityfs
 *
 *  Copyright (C) 2005 Greg Kroah-Hartman <gregkh@suse.de>
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License version
 *	2 as published by the Free Software Foundation.
 *
 *  Based on fs/debugfs/inode.c which had the following copyright notice:
 *    Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com>
 *    Copyright (C) 2004 IBM Inc.
 */

/* #define DEBUG */
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/namei.h>
#include <linux/security.h>
#include <linux/magic.h>

static struct vfsmount *mount;
static int mount_count;

static int fill_super(struct super_block *sb, void *data, int silent)
{
	static struct tree_descr files[] = {{""}};

	return simple_fill_super(sb, SECURITYFS_MAGIC, files);
}

static struct dentry *get_sb(struct file_system_type *fs_type,
		  int flags, const char *dev_name,
		  void *data)
{
	return mount_single(fs_type, flags, data, fill_super);
}

static struct file_system_type fs_type = {
	.owner =	THIS_MODULE,
	.name =		"securityfs",
	.mount =	get_sb,
	.kill_sb =	kill_litter_super,
};

/**
 * securityfs_create_file - create a file in the securityfs filesystem
 *
 * @name: a pointer to a string containing the name of the file to create.
 * @mode: the permission that the file should have
 * @parent: a pointer to the parent dentry for this file.  This should be a
 *          directory dentry if set.  If this parameter is %NULL, then the
 *          file will be created in the root of the securityfs filesystem.
 * @data: a pointer to something that the caller will want to get to later
 *        on.  The inode.i_private pointer will point to this value on
 *        the open() call.
 * @fops: a pointer to a struct file_operations that should be used for
 *        this file.
 *
 * This is the basic "create a file" function for securityfs.  It allows for a
 * wide range of flexibility in creating a file, or a directory (if you
 * want to create a directory, the securityfs_create_dir() function is
 * recommended to be used instead).
 *
 * This function returns a pointer to a dentry if it succeeds.  This
 * pointer must be passed to the securityfs_remove() function when the file is
 * to be removed (no automatic cleanup happens if your module is unloaded,
 * you are responsible here).  If an error occurs, the function will return
 * the error value (via ERR_PTR).
 *
 * If securityfs is not enabled in the kernel, the value %-ENODEV is
 * returned.
 */
struct dentry *securityfs_create_file(const char *name, umode_t mode,
				   struct dentry *parent, void *data,
				   const struct file_operations *fops)
{
	struct dentry *dentry;
	int is_dir = S_ISDIR(mode);
	struct inode *dir, *inode;
	int error;

	if (!is_dir) {
		BUG_ON(!fops);
		mode = (mode & S_IALLUGO) | S_IFREG;
	}

	pr_debug("securityfs: creating file '%s'\n",name);

	error = simple_pin_fs(&fs_type, &mount, &mount_count);
	if (error)
		return ERR_PTR(error);

	if (!parent)
		parent = mount->mnt_root;

	dir = d_inode(parent);

	inode_lock(dir);
	dentry = lookup_one_len(name, parent, strlen(name));
	if (IS_ERR(dentry))
		goto out;

	if (d_really_is_positive(dentry)) {
		error = -EEXIST;
		goto out1;
	}

	inode = new_inode(dir->i_sb);
	if (!inode) {
		error = -ENOMEM;
		goto out1;
	}

	inode->i_ino = get_next_ino();
	inode->i_mode = mode;
	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
	inode->i_private = data;
	if (is_dir) {
		inode->i_op = &simple_dir_inode_operations;
		inode->i_fop = &simple_dir_operations;
		inc_nlink(inode);
		inc_nlink(dir);
	} else {
		inode->i_fop = fops;
	}
	d_instantiate(dentry, inode);
	dget(dentry);
	inode_unlock(dir);
	return dentry;

out1:
	dput(dentry);
	dentry = ERR_PTR(error);
out:
	inode_unlock(dir);
	simple_release_fs(&mount, &mount_count);
	return dentry;
}
EXPORT_SYMBOL_GPL(securityfs_create_file);

/**
 * securityfs_create_dir - create a directory in the securityfs filesystem
 *
 * @name: a pointer to a string containing the name of the directory to
 *        create.
 * @parent: a pointer to the parent dentry for this file.  This should be a
 *          directory dentry if set.  If this parameter is %NULL, then the
 *          directory will be created in the root of the securityfs filesystem.
 *
 * This function creates a directory in securityfs with the given @name.
 *
 * This function returns a pointer to a dentry if it succeeds.  This
 * pointer must be passed to the securityfs_remove() function when the file is
 * to be removed (no automatic cleanup happens if your module is unloaded,
 * you are responsible here).  If an error occurs, %NULL will be returned.
 *
 * If securityfs is not enabled in the kernel, the value %-ENODEV is
 * returned.  It is not wise to check for this value, but rather, check for
 * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
 * code.
 */
struct dentry *securityfs_create_dir(const char *name, struct dentry *parent)
{
	return securityfs_create_file(name,
				      S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
				      parent, NULL, NULL);
}
EXPORT_SYMBOL_GPL(securityfs_create_dir);

/**
 * securityfs_remove - removes a file or directory from the securityfs filesystem
 *
 * @dentry: a pointer to a the dentry of the file or directory to be removed.
 *
 * This function removes a file or directory in securityfs that was previously
 * created with a call to another securityfs function (like
 * securityfs_create_file() or variants thereof.)
 *
 * This function is required to be called in order for the file to be
 * removed. No automatic cleanup of files will happen when a module is
 * removed; you are responsible here.
 */
void securityfs_remove(struct dentry *dentry)
{
	struct dentry *parent;

	if (!dentry || IS_ERR(dentry))
		return;

	parent = dentry->d_parent;
	if (!parent || d_really_is_negative(parent))
		return;

	inode_lock(d_inode(parent));
	if (simple_positive(dentry)) {
		if (d_is_dir(dentry))
			simple_rmdir(d_inode(parent), dentry);
		else
			simple_unlink(d_inode(parent), dentry);
		dput(dentry);
	}
	inode_unlock(d_inode(parent));
	simple_release_fs(&mount, &mount_count);
}
EXPORT_SYMBOL_GPL(securityfs_remove);

static int __init securityfs_init(void)
{
	int retval;

	retval = sysfs_create_mount_point(kernel_kobj, "security");
	if (retval)
		return retval;

	retval = register_filesystem(&fs_type);
	if (retval)
		sysfs_remove_mount_point(kernel_kobj, "security");
	return retval;
}

core_initcall(securityfs_init);
MODULE_LICENSE("GPL");

back to top