Revision 2d63ba3e41db3ceb0d23924ed2879b910276e24c authored by Linus Torvalds on 16 August 2019, 16:13:16 UTC, committed by Linus Torvalds on 16 August 2019, 16:13:16 UTC
Pull power management fixes from Rafael Wysocki:
 "These add a check to avoid recent suspend-to-idle power regression on
  systems with NVMe drives where the PCIe ASPM policy is "performance"
  (or when the kernel is built without ASPM support), fix an issue
  related to frequency limits in the schedutil cpufreq governor and fix
  a mistake related to the PM QoS usage in the cpufreq core introduced
  recently.

  Specifics:

   - Disable NVMe power optimization related to suspend-to-idle added
     recently on systems where PCIe ASPM is not able to put PCIe links
     into low-power states to prevent excess power from being drawn by
     the system while suspended (Rafael Wysocki).

   - Make the schedutil governor handle frequency limits changes
     properly in all cases (Viresh Kumar).

   - Prevent the cpufreq core from treating positive values returned by
     dev_pm_qos_update_request() as errors (Viresh Kumar)"

* tag 'pm-5.3-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  nvme-pci: Allow PCI bus-level PM to be used if ASPM is disabled
  PCI/ASPM: Add pcie_aspm_enabled()
  cpufreq: schedutil: Don't skip freq update when limits change
  cpufreq: dev_pm_qos_update_request() can return 1 on success
2 parent s 9da5bb2 + a3ee247
Raw File
xor.c
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * xor.c : Multiple Devices driver for Linux
 *
 * Copyright (C) 1996, 1997, 1998, 1999, 2000,
 * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson.
 *
 * Dispatch optimized RAID-5 checksumming functions.
 */

#define BH_TRACE 0
#include <linux/module.h>
#include <linux/gfp.h>
#include <linux/raid/xor.h>
#include <linux/jiffies.h>
#include <linux/preempt.h>
#include <asm/xor.h>

#ifndef XOR_SELECT_TEMPLATE
#define XOR_SELECT_TEMPLATE(x) (x)
#endif

/* The xor routines to use.  */
static struct xor_block_template *active_template;

void
xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs)
{
	unsigned long *p1, *p2, *p3, *p4;

	p1 = (unsigned long *) srcs[0];
	if (src_count == 1) {
		active_template->do_2(bytes, dest, p1);
		return;
	}

	p2 = (unsigned long *) srcs[1];
	if (src_count == 2) {
		active_template->do_3(bytes, dest, p1, p2);
		return;
	}

	p3 = (unsigned long *) srcs[2];
	if (src_count == 3) {
		active_template->do_4(bytes, dest, p1, p2, p3);
		return;
	}

	p4 = (unsigned long *) srcs[3];
	active_template->do_5(bytes, dest, p1, p2, p3, p4);
}
EXPORT_SYMBOL(xor_blocks);

/* Set of all registered templates.  */
static struct xor_block_template *__initdata template_list;

#define BENCH_SIZE (PAGE_SIZE)

static void __init
do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
{
	int speed;
	unsigned long now, j;
	int i, count, max;

	tmpl->next = template_list;
	template_list = tmpl;

	preempt_disable();

	/*
	 * Count the number of XORs done during a whole jiffy, and use
	 * this to calculate the speed of checksumming.  We use a 2-page
	 * allocation to have guaranteed color L1-cache layout.
	 */
	max = 0;
	for (i = 0; i < 5; i++) {
		j = jiffies;
		count = 0;
		while ((now = jiffies) == j)
			cpu_relax();
		while (time_before(jiffies, now + 1)) {
			mb(); /* prevent loop optimzation */
			tmpl->do_2(BENCH_SIZE, b1, b2);
			mb();
			count++;
			mb();
		}
		if (count > max)
			max = count;
	}

	preempt_enable();

	speed = max * (HZ * BENCH_SIZE / 1024);
	tmpl->speed = speed;

	printk(KERN_INFO "   %-10s: %5d.%03d MB/sec\n", tmpl->name,
	       speed / 1000, speed % 1000);
}

static int __init
calibrate_xor_blocks(void)
{
	void *b1, *b2;
	struct xor_block_template *f, *fastest;

	fastest = XOR_SELECT_TEMPLATE(NULL);

	if (fastest) {
		printk(KERN_INFO "xor: automatically using best "
				 "checksumming function   %-10s\n",
		       fastest->name);
		goto out;
	}

	b1 = (void *) __get_free_pages(GFP_KERNEL, 2);
	if (!b1) {
		printk(KERN_WARNING "xor: Yikes!  No memory available.\n");
		return -ENOMEM;
	}
	b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE;

	/*
	 * If this arch/cpu has a short-circuited selection, don't loop through
	 * all the possible functions, just test the best one
	 */

#define xor_speed(templ)	do_xor_speed((templ), b1, b2)

	printk(KERN_INFO "xor: measuring software checksum speed\n");
	XOR_TRY_TEMPLATES;
	fastest = template_list;
	for (f = fastest; f; f = f->next)
		if (f->speed > fastest->speed)
			fastest = f;

	printk(KERN_INFO "xor: using function: %s (%d.%03d MB/sec)\n",
	       fastest->name, fastest->speed / 1000, fastest->speed % 1000);

#undef xor_speed

	free_pages((unsigned long)b1, 2);
out:
	active_template = fastest;
	return 0;
}

static __exit void xor_exit(void) { }

MODULE_LICENSE("GPL");

/* when built-in xor.o must initialize before drivers/md/md.o */
core_initcall(calibrate_xor_blocks);
module_exit(xor_exit);
back to top