Revision 8c0bec2151a47906bf779c6715a10ce04453ab77 authored by Jiaying Zhang on 31 August 2011, 15:50:51 UTC, committed by Theodore Ts'o on 31 August 2011, 15:50:51 UTC
The i_mutex lock and flush_completed_IO() added by commit 2581fdc810 in ext4_evict_inode() causes lockdep complaining about potential deadlock in several places. In most/all of these LOCKDEP complaints it looks like it's a false positive, since many of the potential circular locking cases can't take place by the time the ext4_evict_inode() is called; but since at the very least it may mask real problems, we need to address this. This change removes the flush_completed_IO() and i_mutex lock in ext4_evict_inode(). Instead, we take a different approach to resolve the software lockup that commit 2581fdc810 intends to fix. Rather than having ext4-dio-unwritten thread wait for grabing the i_mutex lock of an inode, we use mutex_trylock() instead, and simply requeue the work item if we fail to grab the inode's i_mutex lock. This should speed up work queue processing in general and also prevents the following deadlock scenario: During page fault, shrink_icache_memory is called that in turn evicts another inode B. Inode B has some pending io_end work so it calls ext4_ioend_wait() that waits for inode B's i_ioend_count to become zero. However, inode B's ioend work was queued behind some of inode A's ioend work on the same cpu's ext4-dio-unwritten workqueue. As the ext4-dio-unwritten thread on that cpu is processing inode A's ioend work, it tries to grab inode A's i_mutex lock. Since the i_mutex lock of inode A is still hold before the page fault happened, we enter a deadlock. Signed-off-by: Jiaying Zhang <jiayingz@google.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
1 parent fcb8ce5
min_addr.c
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/security.h>
#include <linux/sysctl.h>
/* amount of vm to protect from userspace access by both DAC and the LSM*/
unsigned long mmap_min_addr;
/* amount of vm to protect from userspace using CAP_SYS_RAWIO (DAC) */
unsigned long dac_mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
/* amount of vm to protect from userspace using the LSM = CONFIG_LSM_MMAP_MIN_ADDR */
/*
* Update mmap_min_addr = max(dac_mmap_min_addr, CONFIG_LSM_MMAP_MIN_ADDR)
*/
static void update_mmap_min_addr(void)
{
#ifdef CONFIG_LSM_MMAP_MIN_ADDR
if (dac_mmap_min_addr > CONFIG_LSM_MMAP_MIN_ADDR)
mmap_min_addr = dac_mmap_min_addr;
else
mmap_min_addr = CONFIG_LSM_MMAP_MIN_ADDR;
#else
mmap_min_addr = dac_mmap_min_addr;
#endif
}
/*
* sysctl handler which just sets dac_mmap_min_addr = the new value and then
* calls update_mmap_min_addr() so non MAP_FIXED hints get rounded properly
*/
int mmap_min_addr_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
int ret;
if (write && !capable(CAP_SYS_RAWIO))
return -EPERM;
ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
update_mmap_min_addr();
return ret;
}
static int __init init_mmap_min_addr(void)
{
update_mmap_min_addr();
return 0;
}
pure_initcall(init_mmap_min_addr);
![swh spinner](/static/img/swh-spinner.gif)
Computing file changes ...