Revision 595d153dd1022392083ac93a1550382cbee127e0 authored by Michael Ellerman on 26 May 2020, 06:18:08 UTC, committed by Michael Ellerman on 26 May 2020, 07:32:37 UTC
Commit 702f09805222 ("powerpc/64s/exception: Remove lite interrupt
return") changed the interrupt return path to not restore non-volatile
registers by default, and explicitly restore them in paths where it is
required.

But it missed that the facility unavailable exception can sometimes
modify user registers, ie. when it does emulation of move from DSCR.

This is seen as a failure of the dscr_sysfs_thread_test:
  test: dscr_sysfs_thread_test
  [cpu 0] User DSCR should be 1 but is 0
  failure: dscr_sysfs_thread_test

So restore non-volatile GPRs after facility unavailable exceptions.

Currently the hypervisor facility unavailable exception is also wired
up to call facility_unavailable_exception().

In practice we should never take a hypervisor facility unavailable
exception for the DSCR. On older bare metal systems we set HFSCR_DSCR
unconditionally in __init_HFSCR, or on newer systems it should be
enabled via the "data-stream-control-register" device tree CPU
feature.

Even if it's not, since commit f3c99f97a3cd ("KVM: PPC: Book3S HV:
Don't access HFSCR, LPIDR or LPCR when running nested"), the KVM code
has unconditionally set HFSCR_DSCR when running guests.

So we should only get a hypervisor facility unavailable for the DSCR
if skiboot has disabled the "data-stream-control-register" feature,
and we are somehow in guest context but not via KVM.

Given all that, it should be unnecessary to add a restore of
non-volatile GPRs after the hypervisor facility exception, because we
never expect to hit that path. But equally we may as well add the
restore, because we never expect to hit that path, and if we ever did,
at least we would correctly restore the registers to their post
emulation state.

In future we can split the non-HV and HV facility unavailable handling
so that there is no emulation in the HV handler, and then remove the
restore for the HV case.

Fixes: 702f09805222 ("powerpc/64s/exception: Remove lite interrupt return")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200526061808.2472279-1-mpe@ellerman.id.au
1 parent 8659a0e
Raw File
perf_event.c
// SPDX-License-Identifier: GPL-2.0
/*
 * Performance event support for s390x
 *
 *  Copyright IBM Corp. 2012, 2013
 *  Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
 */
#define KMSG_COMPONENT	"perf"
#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt

#include <linux/kernel.h>
#include <linux/perf_event.h>
#include <linux/kvm_host.h>
#include <linux/percpu.h>
#include <linux/export.h>
#include <linux/seq_file.h>
#include <linux/spinlock.h>
#include <linux/sysfs.h>
#include <asm/irq.h>
#include <asm/cpu_mf.h>
#include <asm/lowcore.h>
#include <asm/processor.h>
#include <asm/sysinfo.h>
#include <asm/unwind.h>

const char *perf_pmu_name(void)
{
	if (cpum_cf_avail() || cpum_sf_avail())
		return "CPU-Measurement Facilities (CPU-MF)";
	return "pmu";
}
EXPORT_SYMBOL(perf_pmu_name);

int perf_num_counters(void)
{
	int num = 0;

	if (cpum_cf_avail())
		num += PERF_CPUM_CF_MAX_CTR;
	if (cpum_sf_avail())
		num += PERF_CPUM_SF_MAX_CTR;

	return num;
}
EXPORT_SYMBOL(perf_num_counters);

static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs)
{
	struct stack_frame *stack = (struct stack_frame *) regs->gprs[15];

	if (!stack)
		return NULL;

	return (struct kvm_s390_sie_block *) stack->empty1[0];
}

static bool is_in_guest(struct pt_regs *regs)
{
	if (user_mode(regs))
		return false;
#if IS_ENABLED(CONFIG_KVM)
	return instruction_pointer(regs) == (unsigned long) &sie_exit;
#else
	return false;
#endif
}

static unsigned long guest_is_user_mode(struct pt_regs *regs)
{
	return sie_block(regs)->gpsw.mask & PSW_MASK_PSTATE;
}

static unsigned long instruction_pointer_guest(struct pt_regs *regs)
{
	return sie_block(regs)->gpsw.addr;
}

unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
	return is_in_guest(regs) ? instruction_pointer_guest(regs)
				 : instruction_pointer(regs);
}

static unsigned long perf_misc_guest_flags(struct pt_regs *regs)
{
	return guest_is_user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
					: PERF_RECORD_MISC_GUEST_KERNEL;
}

static unsigned long perf_misc_flags_sf(struct pt_regs *regs)
{
	struct perf_sf_sde_regs *sde_regs;
	unsigned long flags;

	sde_regs = (struct perf_sf_sde_regs *) &regs->int_parm_long;
	if (sde_regs->in_guest)
		flags = user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
					: PERF_RECORD_MISC_GUEST_KERNEL;
	else
		flags = user_mode(regs) ? PERF_RECORD_MISC_USER
					: PERF_RECORD_MISC_KERNEL;
	return flags;
}

unsigned long perf_misc_flags(struct pt_regs *regs)
{
	/* Check if the cpum_sf PMU has created the pt_regs structure.
	 * In this case, perf misc flags can be easily extracted.  Otherwise,
	 * do regular checks on the pt_regs content.
	 */
	if (regs->int_code == 0x1407 && regs->int_parm == CPU_MF_INT_SF_PRA)
		if (!regs->gprs[15])
			return perf_misc_flags_sf(regs);

	if (is_in_guest(regs))
		return perf_misc_guest_flags(regs);

	return user_mode(regs) ? PERF_RECORD_MISC_USER
			       : PERF_RECORD_MISC_KERNEL;
}

static void print_debug_cf(void)
{
	struct cpumf_ctr_info cf_info;
	int cpu = smp_processor_id();

	memset(&cf_info, 0, sizeof(cf_info));
	if (!qctri(&cf_info))
		pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n",
			cpu, cf_info.cfvn, cf_info.csvn,
			cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl);
}

static void print_debug_sf(void)
{
	struct hws_qsi_info_block si;
	int cpu = smp_processor_id();

	memset(&si, 0, sizeof(si));
	if (qsi(&si))
		return;

	pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%lu max=%lu cpu_speed=%u\n",
		cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate,
		si.cpu_speed);

	if (si.as)
		pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i"
			" bsdes=%i tear=%016lx dear=%016lx\n", cpu,
			si.as, si.es, si.cs, si.bsdes, si.tear, si.dear);
	if (si.ad)
		pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i"
			" dsdes=%i tear=%016lx dear=%016lx\n", cpu,
			si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear);
}

void perf_event_print_debug(void)
{
	unsigned long flags;

	local_irq_save(flags);
	if (cpum_cf_avail())
		print_debug_cf();
	if (cpum_sf_avail())
		print_debug_sf();
	local_irq_restore(flags);
}

/* Service level infrastructure */
static void sl_print_counter(struct seq_file *m)
{
	struct cpumf_ctr_info ci;

	memset(&ci, 0, sizeof(ci));
	if (qctri(&ci))
		return;

	seq_printf(m, "CPU-MF: Counter facility: version=%u.%u "
		   "authorization=%04x\n", ci.cfvn, ci.csvn, ci.auth_ctl);
}

static void sl_print_sampling(struct seq_file *m)
{
	struct hws_qsi_info_block si;

	memset(&si, 0, sizeof(si));
	if (qsi(&si))
		return;

	if (!si.as && !si.ad)
		return;

	seq_printf(m, "CPU-MF: Sampling facility: min_rate=%lu max_rate=%lu"
		   " cpu_speed=%u\n", si.min_sampl_rate, si.max_sampl_rate,
		   si.cpu_speed);
	if (si.as)
		seq_printf(m, "CPU-MF: Sampling facility: mode=basic"
			   " sample_size=%u\n", si.bsdes);
	if (si.ad)
		seq_printf(m, "CPU-MF: Sampling facility: mode=diagnostic"
			   " sample_size=%u\n", si.dsdes);
}

static void service_level_perf_print(struct seq_file *m,
				     struct service_level *sl)
{
	if (cpum_cf_avail())
		sl_print_counter(m);
	if (cpum_sf_avail())
		sl_print_sampling(m);
}

static struct service_level service_level_perf = {
	.seq_print = service_level_perf_print,
};

static int __init service_level_perf_register(void)
{
	return register_service_level(&service_level_perf);
}
arch_initcall(service_level_perf_register);

void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
			   struct pt_regs *regs)
{
	struct unwind_state state;
	unsigned long addr;

	unwind_for_each_frame(&state, current, regs, 0) {
		addr = unwind_get_return_address(&state);
		if (!addr || perf_callchain_store(entry, addr))
			return;
	}
}

/* Perf definitions for PMU event attributes in sysfs */
ssize_t cpumf_events_sysfs_show(struct device *dev,
				struct device_attribute *attr, char *page)
{
	struct perf_pmu_events_attr *pmu_attr;

	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
	return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
}
back to top