Revision d2d4edbebe07ddb77980656abe7b9bc7a9e0cdf7 authored by Masami Hiramatsu on 11 January 2017, 05:59:38 UTC, committed by Arnaldo Carvalho de Melo on 16 January 2017, 18:14:06 UTC
Fix to show correct locations for events on modules by relocating given
address instead of retrying after failure.

This happens when the module text size is big enough, bigger than
sh_addr, because the original code retries with given address + sh_addr
if it failed to find CU DIE at the given address.

Any address smaller than sh_addr always fails and it retries with the
correct address, but addresses bigger than sh_addr will get a CU DIE
which is on the given address (not adjusted by sh_addr).

In my environment(x86-64), the sh_addr of ".text" section is 0x10030.
Since i915 is a huge kernel module, we can see this issue as below.

  $ grep "[Tt] .*\[i915\]" /proc/kallsyms | sort | head -n1
  ffffffffc0270000 t i915_switcheroo_can_switch	[i915]

ffffffffc0270000 + 0x10030 = ffffffffc0280030, so we'll check
symbols cross this boundary.

  $ grep "[Tt] .*\[i915\]" /proc/kallsyms | grep -B1 ^ffffffffc028\
  | head -n 2
  ffffffffc027ff80 t haswell_init_clock_gating	[i915]
  ffffffffc0280110 t valleyview_init_clock_gating	[i915]

So setup probes on both function and see what happen.

  $ sudo ./perf probe -m i915 -a haswell_init_clock_gating \
        -a valleyview_init_clock_gating
  Added new events:
    probe:haswell_init_clock_gating (on haswell_init_clock_gating in i915)
    probe:valleyview_init_clock_gating (on valleyview_init_clock_gating in i915)

  You can now use it in all perf tools, such as:

  	perf record -e probe:valleyview_init_clock_gating -aR sleep 1

  $ sudo ./perf probe -l
    probe:haswell_init_clock_gating (on haswell_init_clock_gating@gpu/drm/i915/intel_pm.c in i915)
    probe:valleyview_init_clock_gating (on i915_vga_set_decode:4@gpu/drm/i915/i915_drv.c in i915)

As you can see, haswell_init_clock_gating is correctly shown,
but valleyview_init_clock_gating is not.

With this patch, both events are shown correctly.

  $ sudo ./perf probe -l
    probe:haswell_init_clock_gating (on haswell_init_clock_gating@gpu/drm/i915/intel_pm.c in i915)
    probe:valleyview_init_clock_gating (on valleyview_init_clock_gating@gpu/drm/i915/intel_pm.c in i915)

Committer notes:

In my case:

  # perf probe -m i915 -a haswell_init_clock_gating -a valleyview_init_clock_gating
  Added new events:
    probe:haswell_init_clock_gating (on haswell_init_clock_gating in i915)
    probe:valleyview_init_clock_gating (on valleyview_init_clock_gating in i915)

  You can now use it in all perf tools, such as:

	  perf record -e probe:valleyview_init_clock_gating -aR sleep 1

  # perf probe -l
    probe:haswell_init_clock_gating (on i915_getparam+432@gpu/drm/i915/i915_drv.c in i915)
    probe:valleyview_init_clock_gating (on __i915_printk+240@gpu/drm/i915/i915_drv.c in i915)
  #

  # readelf -SW /lib/modules/4.9.0+/build/vmlinux | egrep -w '.text|Name'
   [Nr] Name   Type      Address          Off    Size   ES Flg Lk Inf Al
   [ 1] .text  PROGBITS  ffffffff81000000 200000 822fd3 00  AX  0   0 4096
  #

  So both are b0rked, now with the fix:

  # perf probe -m i915 -a haswell_init_clock_gating -a valleyview_init_clock_gating
  Added new events:
    probe:haswell_init_clock_gating (on haswell_init_clock_gating in i915)
    probe:valleyview_init_clock_gating (on valleyview_init_clock_gating in i915)

  You can now use it in all perf tools, such as:

	perf record -e probe:valleyview_init_clock_gating -aR sleep 1

  # perf probe -l
    probe:haswell_init_clock_gating (on haswell_init_clock_gating@gpu/drm/i915/intel_pm.c in i915)
    probe:valleyview_init_clock_gating (on valleyview_init_clock_gating@gpu/drm/i915/intel_pm.c in i915)
  #

Both looks correct.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/148411436777.9978.1440275861947194930.stgit@devbox
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
1 parent 18e7a45
Raw File
vmacache.c
/*
 * Copyright (C) 2014 Davidlohr Bueso.
 */
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/vmacache.h>

/*
 * Flush vma caches for threads that share a given mm.
 *
 * The operation is safe because the caller holds the mmap_sem
 * exclusively and other threads accessing the vma cache will
 * have mmap_sem held at least for read, so no extra locking
 * is required to maintain the vma cache.
 */
void vmacache_flush_all(struct mm_struct *mm)
{
	struct task_struct *g, *p;

	count_vm_vmacache_event(VMACACHE_FULL_FLUSHES);

	/*
	 * Single threaded tasks need not iterate the entire
	 * list of process. We can avoid the flushing as well
	 * since the mm's seqnum was increased and don't have
	 * to worry about other threads' seqnum. Current's
	 * flush will occur upon the next lookup.
	 */
	if (atomic_read(&mm->mm_users) == 1)
		return;

	rcu_read_lock();
	for_each_process_thread(g, p) {
		/*
		 * Only flush the vmacache pointers as the
		 * mm seqnum is already set and curr's will
		 * be set upon invalidation when the next
		 * lookup is done.
		 */
		if (mm == p->mm)
			vmacache_flush(p);
	}
	rcu_read_unlock();
}

/*
 * This task may be accessing a foreign mm via (for example)
 * get_user_pages()->find_vma().  The vmacache is task-local and this
 * task's vmacache pertains to a different mm (ie, its own).  There is
 * nothing we can do here.
 *
 * Also handle the case where a kernel thread has adopted this mm via use_mm().
 * That kernel thread's vmacache is not applicable to this mm.
 */
static inline bool vmacache_valid_mm(struct mm_struct *mm)
{
	return current->mm == mm && !(current->flags & PF_KTHREAD);
}

void vmacache_update(unsigned long addr, struct vm_area_struct *newvma)
{
	if (vmacache_valid_mm(newvma->vm_mm))
		current->vmacache[VMACACHE_HASH(addr)] = newvma;
}

static bool vmacache_valid(struct mm_struct *mm)
{
	struct task_struct *curr;

	if (!vmacache_valid_mm(mm))
		return false;

	curr = current;
	if (mm->vmacache_seqnum != curr->vmacache_seqnum) {
		/*
		 * First attempt will always be invalid, initialize
		 * the new cache for this task here.
		 */
		curr->vmacache_seqnum = mm->vmacache_seqnum;
		vmacache_flush(curr);
		return false;
	}
	return true;
}

struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
{
	int i;

	count_vm_vmacache_event(VMACACHE_FIND_CALLS);

	if (!vmacache_valid(mm))
		return NULL;

	for (i = 0; i < VMACACHE_SIZE; i++) {
		struct vm_area_struct *vma = current->vmacache[i];

		if (!vma)
			continue;
		if (WARN_ON_ONCE(vma->vm_mm != mm))
			break;
		if (vma->vm_start <= addr && vma->vm_end > addr) {
			count_vm_vmacache_event(VMACACHE_FIND_HITS);
			return vma;
		}
	}

	return NULL;
}

#ifndef CONFIG_MMU
struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
					   unsigned long start,
					   unsigned long end)
{
	int i;

	count_vm_vmacache_event(VMACACHE_FIND_CALLS);

	if (!vmacache_valid(mm))
		return NULL;

	for (i = 0; i < VMACACHE_SIZE; i++) {
		struct vm_area_struct *vma = current->vmacache[i];

		if (vma && vma->vm_start == start && vma->vm_end == end) {
			count_vm_vmacache_event(VMACACHE_FIND_HITS);
			return vma;
		}
	}

	return NULL;
}
#endif
back to top