https://github.com/torvalds/linux
Revision 7b2b55da1db10a5525460633ae4b6fb0be060c41 authored by Jason Xing on 25 August 2019, 00:54:53 UTC, committed by Linus Torvalds on 25 August 2019, 02:48:42 UTC
Only when calling the poll syscall the first time can user receive
POLLPRI correctly.  After that, user always fails to acquire the event
signal.

Reproduce case:
 1. Get the monitor code in Documentation/accounting/psi.txt
 2. Run it, and wait for the event triggered.
 3. Kill and restart the process.

The question is why we can end up with poll_scheduled = 1 but the work
not running (which would reset it to 0).  And the answer is because the
scheduling side sees group->poll_kworker under RCU protection and then
schedules it, but here we cancel the work and destroy the worker.  The
cancel needs to pair with resetting the poll_scheduled flag.

Link: http://lkml.kernel.org/r/1566357985-97781-1-git-send-email-joseph.qi@linux.alibaba.com
Signed-off-by: Jason Xing <kerneljasonxing@linux.alibaba.com>
Signed-off-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Reviewed-by: Caspar Zhang <caspar@linux.alibaba.com>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent bb65f89
Raw File
Tip revision: 7b2b55da1db10a5525460633ae4b6fb0be060c41 authored by Jason Xing on 25 August 2019, 00:54:53 UTC
psi: get poll_work to run when calling poll syscall next time
Tip revision: 7b2b55d
fs_pin.c
// SPDX-License-Identifier: GPL-2.0
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include "internal.h"
#include "mount.h"

static DEFINE_SPINLOCK(pin_lock);

void pin_remove(struct fs_pin *pin)
{
	spin_lock(&pin_lock);
	hlist_del_init(&pin->m_list);
	hlist_del_init(&pin->s_list);
	spin_unlock(&pin_lock);
	spin_lock_irq(&pin->wait.lock);
	pin->done = 1;
	wake_up_locked(&pin->wait);
	spin_unlock_irq(&pin->wait.lock);
}

void pin_insert(struct fs_pin *pin, struct vfsmount *m)
{
	spin_lock(&pin_lock);
	hlist_add_head(&pin->s_list, &m->mnt_sb->s_pins);
	hlist_add_head(&pin->m_list, &real_mount(m)->mnt_pins);
	spin_unlock(&pin_lock);
}

void pin_kill(struct fs_pin *p)
{
	wait_queue_entry_t wait;

	if (!p) {
		rcu_read_unlock();
		return;
	}
	init_wait(&wait);
	spin_lock_irq(&p->wait.lock);
	if (likely(!p->done)) {
		p->done = -1;
		spin_unlock_irq(&p->wait.lock);
		rcu_read_unlock();
		p->kill(p);
		return;
	}
	if (p->done > 0) {
		spin_unlock_irq(&p->wait.lock);
		rcu_read_unlock();
		return;
	}
	__add_wait_queue(&p->wait, &wait);
	while (1) {
		set_current_state(TASK_UNINTERRUPTIBLE);
		spin_unlock_irq(&p->wait.lock);
		rcu_read_unlock();
		schedule();
		rcu_read_lock();
		if (likely(list_empty(&wait.entry)))
			break;
		/* OK, we know p couldn't have been freed yet */
		spin_lock_irq(&p->wait.lock);
		if (p->done > 0) {
			spin_unlock_irq(&p->wait.lock);
			break;
		}
	}
	rcu_read_unlock();
}

void mnt_pin_kill(struct mount *m)
{
	while (1) {
		struct hlist_node *p;
		rcu_read_lock();
		p = READ_ONCE(m->mnt_pins.first);
		if (!p) {
			rcu_read_unlock();
			break;
		}
		pin_kill(hlist_entry(p, struct fs_pin, m_list));
	}
}

void group_pin_kill(struct hlist_head *p)
{
	while (1) {
		struct hlist_node *q;
		rcu_read_lock();
		q = READ_ONCE(p->first);
		if (!q) {
			rcu_read_unlock();
			break;
		}
		pin_kill(hlist_entry(q, struct fs_pin, s_list));
	}
}
back to top