Revision 0ee7c3e25d8c28845fceb4dd1c3cb5f50b9c45a9 authored by Linus Torvalds on 31 August 2021, 18:13:35 UTC, committed by Linus Torvalds on 31 August 2021, 18:13:35 UTC
Pull iomap updates from Darrick Wong:
 "The most notable externally visible change for this cycle is the
  addition of support for reads to inline tail fragments of files, which
  was requested by the erofs developers; and a correction for a kernel
  memory corruption bug if the sysadmin tries to activate a swapfile
  with more pages than the swapfile header suggests.

  We also now report writeback completion errors to the file mapping
  correctly, instead of munging all errors into EIO.

  Internally, the bulk of the changes are Christoph's patchset to reduce
  the indirect function call count by a third to a half by converting
  iomap iteration from a loop pattern to a generator/consumer pattern.
  As an added bonus, fsdax no longer open-codes iomap apply loops.

  Summary:

   - Simplify the bio_end_page usage in the buffered IO code.

   - Support reading inline data at nonzero offsets for erofs.

   - Fix some typos and bad grammar.

   - Convert kmap_atomic usage in the inline data read path.

   - Add some extra inline data input checking.

   - Fix a memory corruption bug stemming from iomap_swapfile_activate
     trying to activate more pages than mm was expecting.

   - Pass errnos through the page writeback code so that writeback
     errors are reported correctly instead of being munged to EIO.

   - Replace iomap_apply with a open-coded iterator loops to reduce the
     number of indirect calls by a third to a half.

   - Refactor the fsdax code to use iomap iterators instead of the
     open-coded iomap_apply code that it had before.

   - Format file range iomap tracepoint data in hexadecimal and
     standardize the names used in the pretty-print string"

* tag 'iomap-5.15-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (41 commits)
  iomap: standardize tracepoint formatting and storage
  mm/swap: consider max pages in iomap_swapfile_add_extent
  iomap: move loop control code to iter.c
  iomap: constify iomap_iter_srcmap
  fsdax: switch the fault handlers to use iomap_iter
  fsdax: factor out a dax_fault_actor() helper
  fsdax: factor out helpers to simplify the dax fault code
  iomap: rework unshare flag
  iomap: pass an iomap_iter to various buffered I/O helpers
  iomap: remove iomap_apply
  fsdax: switch dax_iomap_rw to use iomap_iter
  iomap: switch iomap_swapfile_activate to use iomap_iter
  iomap: switch iomap_seek_data to use iomap_iter
  iomap: switch iomap_seek_hole to use iomap_iter
  iomap: switch iomap_bmap to use iomap_iter
  iomap: switch iomap_fiemap to use iomap_iter
  iomap: switch __iomap_dio_rw to use iomap_iter
  iomap: switch iomap_page_mkwrite to use iomap_iter
  iomap: switch iomap_zero_range to use iomap_iter
  iomap: switch iomap_file_unshare to use iomap_iter
  ...
2 parent s 916d636 + 03b8df8
Raw File
percpu-stats.c
// SPDX-License-Identifier: GPL-2.0-only
/*
 * mm/percpu-debug.c
 *
 * Copyright (C) 2017		Facebook Inc.
 * Copyright (C) 2017		Dennis Zhou <dennis@kernel.org>
 *
 * Prints statistics about the percpu allocator and backing chunks.
 */
#include <linux/debugfs.h>
#include <linux/list.h>
#include <linux/percpu.h>
#include <linux/seq_file.h>
#include <linux/sort.h>
#include <linux/vmalloc.h>

#include "percpu-internal.h"

#define P(X, Y) \
	seq_printf(m, "  %-20s: %12lld\n", X, (long long int)Y)

struct percpu_stats pcpu_stats;
struct pcpu_alloc_info pcpu_stats_ai;

static int cmpint(const void *a, const void *b)
{
	return *(int *)a - *(int *)b;
}

/*
 * Iterates over all chunks to find the max nr_alloc entries.
 */
static int find_max_nr_alloc(void)
{
	struct pcpu_chunk *chunk;
	int slot, max_nr_alloc;

	max_nr_alloc = 0;
	for (slot = 0; slot < pcpu_nr_slots; slot++)
		list_for_each_entry(chunk, &pcpu_chunk_lists[slot], list)
			max_nr_alloc = max(max_nr_alloc, chunk->nr_alloc);

	return max_nr_alloc;
}

/*
 * Prints out chunk state. Fragmentation is considered between
 * the beginning of the chunk to the last allocation.
 *
 * All statistics are in bytes unless stated otherwise.
 */
static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk,
			    int *buffer)
{
	struct pcpu_block_md *chunk_md = &chunk->chunk_md;
	int i, last_alloc, as_len, start, end;
	int *alloc_sizes, *p;
	/* statistics */
	int sum_frag = 0, max_frag = 0;
	int cur_min_alloc = 0, cur_med_alloc = 0, cur_max_alloc = 0;

	alloc_sizes = buffer;

	/*
	 * find_last_bit returns the start value if nothing found.
	 * Therefore, we must determine if it is a failure of find_last_bit
	 * and set the appropriate value.
	 */
	last_alloc = find_last_bit(chunk->alloc_map,
				   pcpu_chunk_map_bits(chunk) -
				   chunk->end_offset / PCPU_MIN_ALLOC_SIZE - 1);
	last_alloc = test_bit(last_alloc, chunk->alloc_map) ?
		     last_alloc + 1 : 0;

	as_len = 0;
	start = chunk->start_offset / PCPU_MIN_ALLOC_SIZE;

	/*
	 * If a bit is set in the allocation map, the bound_map identifies
	 * where the allocation ends.  If the allocation is not set, the
	 * bound_map does not identify free areas as it is only kept accurate
	 * on allocation, not free.
	 *
	 * Positive values are allocations and negative values are free
	 * fragments.
	 */
	while (start < last_alloc) {
		if (test_bit(start, chunk->alloc_map)) {
			end = find_next_bit(chunk->bound_map, last_alloc,
					    start + 1);
			alloc_sizes[as_len] = 1;
		} else {
			end = find_next_bit(chunk->alloc_map, last_alloc,
					    start + 1);
			alloc_sizes[as_len] = -1;
		}

		alloc_sizes[as_len++] *= (end - start) * PCPU_MIN_ALLOC_SIZE;

		start = end;
	}

	/*
	 * The negative values are free fragments and thus sorting gives the
	 * free fragments at the beginning in largest first order.
	 */
	if (as_len > 0) {
		sort(alloc_sizes, as_len, sizeof(int), cmpint, NULL);

		/* iterate through the unallocated fragments */
		for (i = 0, p = alloc_sizes; *p < 0 && i < as_len; i++, p++) {
			sum_frag -= *p;
			max_frag = max(max_frag, -1 * (*p));
		}

		cur_min_alloc = alloc_sizes[i];
		cur_med_alloc = alloc_sizes[(i + as_len - 1) / 2];
		cur_max_alloc = alloc_sizes[as_len - 1];
	}

	P("nr_alloc", chunk->nr_alloc);
	P("max_alloc_size", chunk->max_alloc_size);
	P("empty_pop_pages", chunk->nr_empty_pop_pages);
	P("first_bit", chunk_md->first_free);
	P("free_bytes", chunk->free_bytes);
	P("contig_bytes", chunk_md->contig_hint * PCPU_MIN_ALLOC_SIZE);
	P("sum_frag", sum_frag);
	P("max_frag", max_frag);
	P("cur_min_alloc", cur_min_alloc);
	P("cur_med_alloc", cur_med_alloc);
	P("cur_max_alloc", cur_max_alloc);
	seq_putc(m, '\n');
}

static int percpu_stats_show(struct seq_file *m, void *v)
{
	struct pcpu_chunk *chunk;
	int slot, max_nr_alloc;
	int *buffer;

alloc_buffer:
	spin_lock_irq(&pcpu_lock);
	max_nr_alloc = find_max_nr_alloc();
	spin_unlock_irq(&pcpu_lock);

	/* there can be at most this many free and allocated fragments */
	buffer = vmalloc(array_size(sizeof(int), (2 * max_nr_alloc + 1)));
	if (!buffer)
		return -ENOMEM;

	spin_lock_irq(&pcpu_lock);

	/* if the buffer allocated earlier is too small */
	if (max_nr_alloc < find_max_nr_alloc()) {
		spin_unlock_irq(&pcpu_lock);
		vfree(buffer);
		goto alloc_buffer;
	}

#define PL(X)								\
	seq_printf(m, "  %-20s: %12lld\n", #X, (long long int)pcpu_stats_ai.X)

	seq_printf(m,
			"Percpu Memory Statistics\n"
			"Allocation Info:\n"
			"----------------------------------------\n");
	PL(unit_size);
	PL(static_size);
	PL(reserved_size);
	PL(dyn_size);
	PL(atom_size);
	PL(alloc_size);
	seq_putc(m, '\n');

#undef PL

#define PU(X) \
	seq_printf(m, "  %-20s: %12llu\n", #X, (unsigned long long)pcpu_stats.X)

	seq_printf(m,
			"Global Stats:\n"
			"----------------------------------------\n");
	PU(nr_alloc);
	PU(nr_dealloc);
	PU(nr_cur_alloc);
	PU(nr_max_alloc);
	PU(nr_chunks);
	PU(nr_max_chunks);
	PU(min_alloc_size);
	PU(max_alloc_size);
	P("empty_pop_pages", pcpu_nr_empty_pop_pages);
	seq_putc(m, '\n');

#undef PU

	seq_printf(m,
			"Per Chunk Stats:\n"
			"----------------------------------------\n");

	if (pcpu_reserved_chunk) {
		seq_puts(m, "Chunk: <- Reserved Chunk\n");
		chunk_map_stats(m, pcpu_reserved_chunk, buffer);
	}

	for (slot = 0; slot < pcpu_nr_slots; slot++) {
		list_for_each_entry(chunk, &pcpu_chunk_lists[slot], list) {
			if (chunk == pcpu_first_chunk)
				seq_puts(m, "Chunk: <- First Chunk\n");
			else if (slot == pcpu_to_depopulate_slot)
				seq_puts(m, "Chunk (to_depopulate)\n");
			else if (slot == pcpu_sidelined_slot)
				seq_puts(m, "Chunk (sidelined):\n");
			else
				seq_puts(m, "Chunk:\n");
			chunk_map_stats(m, chunk, buffer);
		}
	}

	spin_unlock_irq(&pcpu_lock);

	vfree(buffer);

	return 0;
}
DEFINE_SHOW_ATTRIBUTE(percpu_stats);

static int __init init_percpu_stats_debugfs(void)
{
	debugfs_create_file("percpu_stats", 0444, NULL, NULL,
			&percpu_stats_fops);

	return 0;
}

late_initcall(init_percpu_stats_debugfs);
back to top