Revision f6ba488073fe8159851fe398cc3c5ee383bb4c7a authored by Vladimir Davydov on 18 August 2017, 22:16:08 UTC, committed by Linus Torvalds on 18 August 2017, 22:32:01 UTC
To avoid a possible deadlock, sysfs_slab_remove() schedules an asynchronous work to delete sysfs entries corresponding to the kmem cache. To ensure the cache isn't freed before the work function is called, it takes a reference to the cache kobject. The reference is supposed to be released by the work function. However, the work function (sysfs_slab_remove_workfn()) does nothing in case the cache sysfs entry has already been deleted, leaking the kobject and the corresponding cache. This may happen on a per memcg cache destruction, because sysfs entries of a per memcg cache are deleted on memcg offline if the cache is empty (see __kmemcg_cache_deactivate()). The kmemleak report looks like this: unreferenced object 0xffff9f798a79f540 (size 32): comm "kworker/1:4", pid 15416, jiffies 4307432429 (age 28687.554s) hex dump (first 32 bytes): 6b 6d 61 6c 6c 6f 63 2d 31 36 28 31 35 39 39 3a kmalloc-16(1599: 6e 65 77 72 6f 6f 74 29 00 23 6b c0 ff ff ff ff newroot).#k..... backtrace: kmemleak_alloc+0x4a/0xa0 __kmalloc_track_caller+0x148/0x2c0 kvasprintf+0x66/0xd0 kasprintf+0x49/0x70 memcg_create_kmem_cache+0xe6/0x160 memcg_kmem_cache_create_func+0x20/0x110 process_one_work+0x205/0x5d0 worker_thread+0x4e/0x3a0 kthread+0x109/0x140 ret_from_fork+0x2a/0x40 unreferenced object 0xffff9f79b6136840 (size 416): comm "kworker/1:4", pid 15416, jiffies 4307432429 (age 28687.573s) hex dump (first 32 bytes): 40 fb 80 c2 3e 33 00 00 00 00 00 40 00 00 00 00 @...>3.....@.... 00 00 00 00 00 00 00 00 10 00 00 00 10 00 00 00 ................ backtrace: kmemleak_alloc+0x4a/0xa0 kmem_cache_alloc+0x128/0x280 create_cache+0x3b/0x1e0 memcg_create_kmem_cache+0x118/0x160 memcg_kmem_cache_create_func+0x20/0x110 process_one_work+0x205/0x5d0 worker_thread+0x4e/0x3a0 kthread+0x109/0x140 ret_from_fork+0x2a/0x40 Fix the leak by adding the missing call to kobject_put() to sysfs_slab_remove_workfn(). Link: http://lkml.kernel.org/r/20170812181134.25027-1-vdavydov.dev@gmail.com Fixes: 3b7b314053d02 ("slub: make sysfs file removal asynchronous") Signed-off-by: Vladimir Davydov <vdavydov.dev@gmail.com> Reported-by: Andrei Vagin <avagin@gmail.com> Tested-by: Andrei Vagin <avagin@gmail.com> Acked-by: Tejun Heo <tj@kernel.org> Acked-by: David Rientjes <rientjes@google.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: <stable@vger.kernel.org> [4.12.x] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 3010f87
irq_poll.c
/*
* Functions related to interrupt-poll handling in the block layer. This
* is similar to NAPI for network devices.
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/bio.h>
#include <linux/interrupt.h>
#include <linux/cpu.h>
#include <linux/irq_poll.h>
#include <linux/delay.h>
static unsigned int irq_poll_budget __read_mostly = 256;
static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
/**
* irq_poll_sched - Schedule a run of the iopoll handler
* @iop: The parent iopoll structure
*
* Description:
* Add this irq_poll structure to the pending poll list and trigger the
* raise of the blk iopoll softirq.
**/
void irq_poll_sched(struct irq_poll *iop)
{
unsigned long flags;
if (test_bit(IRQ_POLL_F_DISABLE, &iop->state))
return;
if (test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state))
return;
local_irq_save(flags);
list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
__raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
local_irq_restore(flags);
}
EXPORT_SYMBOL(irq_poll_sched);
/**
* __irq_poll_complete - Mark this @iop as un-polled again
* @iop: The parent iopoll structure
*
* Description:
* See irq_poll_complete(). This function must be called with interrupts
* disabled.
**/
static void __irq_poll_complete(struct irq_poll *iop)
{
list_del(&iop->list);
smp_mb__before_atomic();
clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state);
}
/**
* irq_poll_complete - Mark this @iop as un-polled again
* @iop: The parent iopoll structure
*
* Description:
* If a driver consumes less than the assigned budget in its run of the
* iopoll handler, it'll end the polled mode by calling this function. The
* iopoll handler will not be invoked again before irq_poll_sched()
* is called.
**/
void irq_poll_complete(struct irq_poll *iop)
{
unsigned long flags;
local_irq_save(flags);
__irq_poll_complete(iop);
local_irq_restore(flags);
}
EXPORT_SYMBOL(irq_poll_complete);
static void __latent_entropy irq_poll_softirq(struct softirq_action *h)
{
struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll);
int rearm = 0, budget = irq_poll_budget;
unsigned long start_time = jiffies;
local_irq_disable();
while (!list_empty(list)) {
struct irq_poll *iop;
int work, weight;
/*
* If softirq window is exhausted then punt.
*/
if (budget <= 0 || time_after(jiffies, start_time)) {
rearm = 1;
break;
}
local_irq_enable();
/* Even though interrupts have been re-enabled, this
* access is safe because interrupts can only add new
* entries to the tail of this list, and only ->poll()
* calls can remove this head entry from the list.
*/
iop = list_entry(list->next, struct irq_poll, list);
weight = iop->weight;
work = 0;
if (test_bit(IRQ_POLL_F_SCHED, &iop->state))
work = iop->poll(iop, weight);
budget -= work;
local_irq_disable();
/*
* Drivers must not modify the iopoll state, if they
* consume their assigned weight (or more, some drivers can't
* easily just stop processing, they have to complete an
* entire mask of commands).In such cases this code
* still "owns" the iopoll instance and therefore can
* move the instance around on the list at-will.
*/
if (work >= weight) {
if (test_bit(IRQ_POLL_F_DISABLE, &iop->state))
__irq_poll_complete(iop);
else
list_move_tail(&iop->list, list);
}
}
if (rearm)
__raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
local_irq_enable();
}
/**
* irq_poll_disable - Disable iopoll on this @iop
* @iop: The parent iopoll structure
*
* Description:
* Disable io polling and wait for any pending callbacks to have completed.
**/
void irq_poll_disable(struct irq_poll *iop)
{
set_bit(IRQ_POLL_F_DISABLE, &iop->state);
while (test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state))
msleep(1);
clear_bit(IRQ_POLL_F_DISABLE, &iop->state);
}
EXPORT_SYMBOL(irq_poll_disable);
/**
* irq_poll_enable - Enable iopoll on this @iop
* @iop: The parent iopoll structure
*
* Description:
* Enable iopoll on this @iop. Note that the handler run will not be
* scheduled, it will only mark it as active.
**/
void irq_poll_enable(struct irq_poll *iop)
{
BUG_ON(!test_bit(IRQ_POLL_F_SCHED, &iop->state));
smp_mb__before_atomic();
clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state);
}
EXPORT_SYMBOL(irq_poll_enable);
/**
* irq_poll_init - Initialize this @iop
* @iop: The parent iopoll structure
* @weight: The default weight (or command completion budget)
* @poll_fn: The handler to invoke
*
* Description:
* Initialize and enable this irq_poll structure.
**/
void irq_poll_init(struct irq_poll *iop, int weight, irq_poll_fn *poll_fn)
{
memset(iop, 0, sizeof(*iop));
INIT_LIST_HEAD(&iop->list);
iop->weight = weight;
iop->poll = poll_fn;
}
EXPORT_SYMBOL(irq_poll_init);
static int irq_poll_cpu_dead(unsigned int cpu)
{
/*
* If a CPU goes away, splice its entries to the current CPU
* and trigger a run of the softirq
*/
local_irq_disable();
list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
this_cpu_ptr(&blk_cpu_iopoll));
__raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
local_irq_enable();
return 0;
}
static __init int irq_poll_setup(void)
{
int i;
for_each_possible_cpu(i)
INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
open_softirq(IRQ_POLL_SOFTIRQ, irq_poll_softirq);
cpuhp_setup_state_nocalls(CPUHP_IRQ_POLL_DEAD, "irq_poll:dead", NULL,
irq_poll_cpu_dead);
return 0;
}
subsys_initcall(irq_poll_setup);
Computing file changes ...