Revision 011e4b02f1da156ac7fea28a9da878f3c23af739 authored by Srivatsa S. Bhat on 27 May 2014, 10:55:34 UTC, committed by Benjamin Herrenschmidt on 28 May 2014, 03:24:26 UTC
If we try to perform a kexec when the machine is in ST (Single-Threaded) mode
(ppc64_cpu --smt=off), the kexec operation doesn't succeed properly, and we
get the following messages during boot:

[    0.089866] POWER8 performance monitor hardware support registered
[    0.089985] power8-pmu: PMAO restore workaround active.
[    5.095419] Processor 1 is stuck.
[   10.097933] Processor 2 is stuck.
[   15.100480] Processor 3 is stuck.
[   20.102982] Processor 4 is stuck.
[   25.105489] Processor 5 is stuck.
[   30.108005] Processor 6 is stuck.
[   35.110518] Processor 7 is stuck.
[   40.113369] Processor 9 is stuck.
[   45.115879] Processor 10 is stuck.
[   50.118389] Processor 11 is stuck.
[   55.120904] Processor 12 is stuck.
[   60.123425] Processor 13 is stuck.
[   65.125970] Processor 14 is stuck.
[   70.128495] Processor 15 is stuck.
[   75.131316] Processor 17 is stuck.

Note that only the sibling threads are stuck, while the primary threads (0, 8,
16 etc) boot just fine. Looking closer at the previous step of kexec, we observe
that kexec tries to wakeup (bring online) the sibling threads of all the cores,
before performing kexec:

[ 9464.131231] Starting new kernel
[ 9464.148507] kexec: Waking offline cpu 1.
[ 9464.148552] kexec: Waking offline cpu 2.
[ 9464.148600] kexec: Waking offline cpu 3.
[ 9464.148636] kexec: Waking offline cpu 4.
[ 9464.148671] kexec: Waking offline cpu 5.
[ 9464.148708] kexec: Waking offline cpu 6.
[ 9464.148743] kexec: Waking offline cpu 7.
[ 9464.148779] kexec: Waking offline cpu 9.
[ 9464.148815] kexec: Waking offline cpu 10.
[ 9464.148851] kexec: Waking offline cpu 11.
[ 9464.148887] kexec: Waking offline cpu 12.
[ 9464.148922] kexec: Waking offline cpu 13.
[ 9464.148958] kexec: Waking offline cpu 14.
[ 9464.148994] kexec: Waking offline cpu 15.
[ 9464.149030] kexec: Waking offline cpu 17.

Instrumenting this piece of code revealed that the cpu_up() operation actually
fails with -EBUSY. Thus, only the primary threads of all the cores are online
during kexec, and hence this is a sure-shot receipe for disaster, as explained
in commit e8e5c2155b (powerpc/kexec: Fix orphaned offline CPUs across kexec),
as well as in the comment above wake_offline_cpus().

It turns out that cpu_up() was returning -EBUSY because the variable
'cpu_hotplug_disabled' was set to 1; and this disabling of CPU hotplug was done
by migrate_to_reboot_cpu() inside kernel_kexec().

Now, migrate_to_reboot_cpu() was originally written with the assumption that
any further code will not need to perform CPU hotplug, since we are anyway in
the reboot path. However, kexec is clearly not such a case, since we depend on
onlining CPUs, atleast on powerpc.

So re-enable cpu-hotplug after returning from migrate_to_reboot_cpu() in the
kexec path, to fix this regression in kexec on powerpc.

Also, wrap the cpu_up() in powerpc kexec code within a WARN_ON(), so that we
can catch such issues more easily in the future.

Fixes: c97102ba963 (kexec: migrate to reboot cpu)
Cc: stable@vger.kernel.org
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
1 parent 7998eb3
Raw File
af_alg.c
/*
 * af_alg: User-space algorithm interface
 *
 * This file provides the user-space API for algorithms.
 *
 * Copyright (c) 2010 Herbert Xu <herbert@gondor.apana.org.au>
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the Free
 * Software Foundation; either version 2 of the License, or (at your option)
 * any later version.
 *
 */

#include <linux/atomic.h>
#include <crypto/if_alg.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/net.h>
#include <linux/rwsem.h>

struct alg_type_list {
	const struct af_alg_type *type;
	struct list_head list;
};

static atomic_long_t alg_memory_allocated;

static struct proto alg_proto = {
	.name			= "ALG",
	.owner			= THIS_MODULE,
	.memory_allocated	= &alg_memory_allocated,
	.obj_size		= sizeof(struct alg_sock),
};

static LIST_HEAD(alg_types);
static DECLARE_RWSEM(alg_types_sem);

static const struct af_alg_type *alg_get_type(const char *name)
{
	const struct af_alg_type *type = ERR_PTR(-ENOENT);
	struct alg_type_list *node;

	down_read(&alg_types_sem);
	list_for_each_entry(node, &alg_types, list) {
		if (strcmp(node->type->name, name))
			continue;

		if (try_module_get(node->type->owner))
			type = node->type;
		break;
	}
	up_read(&alg_types_sem);

	return type;
}

int af_alg_register_type(const struct af_alg_type *type)
{
	struct alg_type_list *node;
	int err = -EEXIST;

	down_write(&alg_types_sem);
	list_for_each_entry(node, &alg_types, list) {
		if (!strcmp(node->type->name, type->name))
			goto unlock;
	}

	node = kmalloc(sizeof(*node), GFP_KERNEL);
	err = -ENOMEM;
	if (!node)
		goto unlock;

	type->ops->owner = THIS_MODULE;
	node->type = type;
	list_add(&node->list, &alg_types);
	err = 0;

unlock:
	up_write(&alg_types_sem);

	return err;
}
EXPORT_SYMBOL_GPL(af_alg_register_type);

int af_alg_unregister_type(const struct af_alg_type *type)
{
	struct alg_type_list *node;
	int err = -ENOENT;

	down_write(&alg_types_sem);
	list_for_each_entry(node, &alg_types, list) {
		if (strcmp(node->type->name, type->name))
			continue;

		list_del(&node->list);
		kfree(node);
		err = 0;
		break;
	}
	up_write(&alg_types_sem);

	return err;
}
EXPORT_SYMBOL_GPL(af_alg_unregister_type);

static void alg_do_release(const struct af_alg_type *type, void *private)
{
	if (!type)
		return;

	type->release(private);
	module_put(type->owner);
}

int af_alg_release(struct socket *sock)
{
	if (sock->sk)
		sock_put(sock->sk);
	return 0;
}
EXPORT_SYMBOL_GPL(af_alg_release);

static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
	struct sock *sk = sock->sk;
	struct alg_sock *ask = alg_sk(sk);
	struct sockaddr_alg *sa = (void *)uaddr;
	const struct af_alg_type *type;
	void *private;

	if (sock->state == SS_CONNECTED)
		return -EINVAL;

	if (addr_len != sizeof(*sa))
		return -EINVAL;

	sa->salg_type[sizeof(sa->salg_type) - 1] = 0;
	sa->salg_name[sizeof(sa->salg_name) - 1] = 0;

	type = alg_get_type(sa->salg_type);
	if (IS_ERR(type) && PTR_ERR(type) == -ENOENT) {
		request_module("algif-%s", sa->salg_type);
		type = alg_get_type(sa->salg_type);
	}

	if (IS_ERR(type))
		return PTR_ERR(type);

	private = type->bind(sa->salg_name, sa->salg_feat, sa->salg_mask);
	if (IS_ERR(private)) {
		module_put(type->owner);
		return PTR_ERR(private);
	}

	lock_sock(sk);

	swap(ask->type, type);
	swap(ask->private, private);

	release_sock(sk);

	alg_do_release(type, private);

	return 0;
}

static int alg_setkey(struct sock *sk, char __user *ukey,
		      unsigned int keylen)
{
	struct alg_sock *ask = alg_sk(sk);
	const struct af_alg_type *type = ask->type;
	u8 *key;
	int err;

	key = sock_kmalloc(sk, keylen, GFP_KERNEL);
	if (!key)
		return -ENOMEM;

	err = -EFAULT;
	if (copy_from_user(key, ukey, keylen))
		goto out;

	err = type->setkey(ask->private, key, keylen);

out:
	sock_kfree_s(sk, key, keylen);

	return err;
}

static int alg_setsockopt(struct socket *sock, int level, int optname,
			  char __user *optval, unsigned int optlen)
{
	struct sock *sk = sock->sk;
	struct alg_sock *ask = alg_sk(sk);
	const struct af_alg_type *type;
	int err = -ENOPROTOOPT;

	lock_sock(sk);
	type = ask->type;

	if (level != SOL_ALG || !type)
		goto unlock;

	switch (optname) {
	case ALG_SET_KEY:
		if (sock->state == SS_CONNECTED)
			goto unlock;
		if (!type->setkey)
			goto unlock;

		err = alg_setkey(sk, optval, optlen);
	}

unlock:
	release_sock(sk);

	return err;
}

int af_alg_accept(struct sock *sk, struct socket *newsock)
{
	struct alg_sock *ask = alg_sk(sk);
	const struct af_alg_type *type;
	struct sock *sk2;
	int err;

	lock_sock(sk);
	type = ask->type;

	err = -EINVAL;
	if (!type)
		goto unlock;

	sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto);
	err = -ENOMEM;
	if (!sk2)
		goto unlock;

	sock_init_data(newsock, sk2);
	sock_graft(sk2, newsock);

	err = type->accept(ask->private, sk2);
	if (err) {
		sk_free(sk2);
		goto unlock;
	}

	sk2->sk_family = PF_ALG;

	sock_hold(sk);
	alg_sk(sk2)->parent = sk;
	alg_sk(sk2)->type = type;

	newsock->ops = type->ops;
	newsock->state = SS_CONNECTED;

	err = 0;

unlock:
	release_sock(sk);

	return err;
}
EXPORT_SYMBOL_GPL(af_alg_accept);

static int alg_accept(struct socket *sock, struct socket *newsock, int flags)
{
	return af_alg_accept(sock->sk, newsock);
}

static const struct proto_ops alg_proto_ops = {
	.family		=	PF_ALG,
	.owner		=	THIS_MODULE,

	.connect	=	sock_no_connect,
	.socketpair	=	sock_no_socketpair,
	.getname	=	sock_no_getname,
	.ioctl		=	sock_no_ioctl,
	.listen		=	sock_no_listen,
	.shutdown	=	sock_no_shutdown,
	.getsockopt	=	sock_no_getsockopt,
	.mmap		=	sock_no_mmap,
	.sendpage	=	sock_no_sendpage,
	.sendmsg	=	sock_no_sendmsg,
	.recvmsg	=	sock_no_recvmsg,
	.poll		=	sock_no_poll,

	.bind		=	alg_bind,
	.release	=	af_alg_release,
	.setsockopt	=	alg_setsockopt,
	.accept		=	alg_accept,
};

static void alg_sock_destruct(struct sock *sk)
{
	struct alg_sock *ask = alg_sk(sk);

	alg_do_release(ask->type, ask->private);
}

static int alg_create(struct net *net, struct socket *sock, int protocol,
		      int kern)
{
	struct sock *sk;
	int err;

	if (sock->type != SOCK_SEQPACKET)
		return -ESOCKTNOSUPPORT;
	if (protocol != 0)
		return -EPROTONOSUPPORT;

	err = -ENOMEM;
	sk = sk_alloc(net, PF_ALG, GFP_KERNEL, &alg_proto);
	if (!sk)
		goto out;

	sock->ops = &alg_proto_ops;
	sock_init_data(sock, sk);

	sk->sk_family = PF_ALG;
	sk->sk_destruct = alg_sock_destruct;

	return 0;
out:
	return err;
}

static const struct net_proto_family alg_family = {
	.family	=	PF_ALG,
	.create	=	alg_create,
	.owner	=	THIS_MODULE,
};

int af_alg_make_sg(struct af_alg_sgl *sgl, void __user *addr, int len,
		   int write)
{
	unsigned long from = (unsigned long)addr;
	unsigned long npages;
	unsigned off;
	int err;
	int i;

	err = -EFAULT;
	if (!access_ok(write ? VERIFY_READ : VERIFY_WRITE, addr, len))
		goto out;

	off = from & ~PAGE_MASK;
	npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
	if (npages > ALG_MAX_PAGES)
		npages = ALG_MAX_PAGES;

	err = get_user_pages_fast(from, npages, write, sgl->pages);
	if (err < 0)
		goto out;

	npages = err;
	err = -EINVAL;
	if (WARN_ON(npages == 0))
		goto out;

	err = 0;

	sg_init_table(sgl->sg, npages);

	for (i = 0; i < npages; i++) {
		int plen = min_t(int, len, PAGE_SIZE - off);

		sg_set_page(sgl->sg + i, sgl->pages[i], plen, off);

		off = 0;
		len -= plen;
		err += plen;
	}

out:
	return err;
}
EXPORT_SYMBOL_GPL(af_alg_make_sg);

void af_alg_free_sg(struct af_alg_sgl *sgl)
{
	int i;

	i = 0;
	do {
		put_page(sgl->pages[i]);
	} while (!sg_is_last(sgl->sg + (i++)));
}
EXPORT_SYMBOL_GPL(af_alg_free_sg);

int af_alg_cmsg_send(struct msghdr *msg, struct af_alg_control *con)
{
	struct cmsghdr *cmsg;

	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
		if (!CMSG_OK(msg, cmsg))
			return -EINVAL;
		if (cmsg->cmsg_level != SOL_ALG)
			continue;

		switch(cmsg->cmsg_type) {
		case ALG_SET_IV:
			if (cmsg->cmsg_len < CMSG_LEN(sizeof(*con->iv)))
				return -EINVAL;
			con->iv = (void *)CMSG_DATA(cmsg);
			if (cmsg->cmsg_len < CMSG_LEN(con->iv->ivlen +
						      sizeof(*con->iv)))
				return -EINVAL;
			break;

		case ALG_SET_OP:
			if (cmsg->cmsg_len < CMSG_LEN(sizeof(u32)))
				return -EINVAL;
			con->op = *(u32 *)CMSG_DATA(cmsg);
			break;

		default:
			return -EINVAL;
		}
	}

	return 0;
}
EXPORT_SYMBOL_GPL(af_alg_cmsg_send);

int af_alg_wait_for_completion(int err, struct af_alg_completion *completion)
{
	switch (err) {
	case -EINPROGRESS:
	case -EBUSY:
		wait_for_completion(&completion->completion);
		reinit_completion(&completion->completion);
		err = completion->err;
		break;
	};

	return err;
}
EXPORT_SYMBOL_GPL(af_alg_wait_for_completion);

void af_alg_complete(struct crypto_async_request *req, int err)
{
	struct af_alg_completion *completion = req->data;

	completion->err = err;
	complete(&completion->completion);
}
EXPORT_SYMBOL_GPL(af_alg_complete);

static int __init af_alg_init(void)
{
	int err = proto_register(&alg_proto, 0);

	if (err)
		goto out;

	err = sock_register(&alg_family);
	if (err != 0)
		goto out_unregister_proto;

out:
	return err;

out_unregister_proto:
	proto_unregister(&alg_proto);
	goto out;
}

static void __exit af_alg_exit(void)
{
	sock_unregister(PF_ALG);
	proto_unregister(&alg_proto);
}

module_init(af_alg_init);
module_exit(af_alg_exit);
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(AF_ALG);
back to top