Revision a5ae50dea9111db63d30d700766dd5509602f7ad authored by Filipe Manana on 20 February 2020, 13:29:49 UTC, committed by David Sterba on 21 February 2020, 15:21:19 UTC
While logging the prealloc extents of an inode during a fast fsync we call btrfs_truncate_inode_items(), through btrfs_log_prealloc_extents(), while holding a read lock on a leaf of the inode's root (not the log root, the fs/subvol root), and then that function locks the file range in the inode's iotree. This can lead to a deadlock when: * the fsync is ranged * the file has prealloc extents beyond eof * writeback for a range different from the fsync range starts during the fsync * the size of the file is not sector size aligned Because when finishing an ordered extent we lock first a file range and then try to COW the fs/subvol tree to insert an extent item. The following diagram shows how the deadlock can happen. CPU 1 CPU 2 btrfs_sync_file() --> for range [0, 1MiB) --> inode has a size of 1MiB and has 1 prealloc extent beyond the i_size, starting at offset 4MiB flushes all delalloc for the range [0MiB, 1MiB) and waits for the respective ordered extents to complete --> before task at CPU 1 locks the inode, a write into file range [1MiB, 2MiB + 1KiB) is made --> i_size is updated to 2MiB + 1KiB --> writeback is started for that range, [1MiB, 2MiB + 4KiB) --> end offset rounded up to be sector size aligned btrfs_log_dentry_safe() btrfs_log_inode_parent() btrfs_log_inode() btrfs_log_changed_extents() btrfs_log_prealloc_extents() --> does a search on the inode's root --> holds a read lock on leaf X btrfs_finish_ordered_io() --> locks range [1MiB, 2MiB + 4KiB) --> end offset rounded up to be sector size aligned --> tries to cow leaf X, through insert_reserved_file_extent() --> already locked by the task at CPU 1 btrfs_truncate_inode_items() --> gets an i_size of 2MiB + 1KiB, which is not sector size aligned --> tries to lock file range [2MiB, (u64)-1) --> the start range is rounded down from 2MiB + 1K to 2MiB to be sector size aligned --> but the subrange [2MiB, 2MiB + 4KiB) is already locked by task at CPU 2 which is waiting to get a write lock on leaf X for which we are holding a read lock *** deadlock *** This results in a stack trace like the following, triggered by test case generic/561 from fstests: [ 2779.973608] INFO: task kworker/u8:6:247 blocked for more than 120 seconds. [ 2779.979536] Not tainted 5.6.0-rc2-btrfs-next-53 #1 [ 2779.984503] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 2779.990136] kworker/u8:6 D 0 247 2 0x80004000 [ 2779.990457] Workqueue: btrfs-endio-write btrfs_work_helper [btrfs] [ 2779.990466] Call Trace: [ 2779.990491] ? __schedule+0x384/0xa30 [ 2779.990521] schedule+0x33/0xe0 [ 2779.990616] btrfs_tree_read_lock+0x19e/0x2e0 [btrfs] [ 2779.990632] ? remove_wait_queue+0x60/0x60 [ 2779.990730] btrfs_read_lock_root_node+0x2f/0x40 [btrfs] [ 2779.990782] btrfs_search_slot+0x510/0x1000 [btrfs] [ 2779.990869] btrfs_lookup_file_extent+0x4a/0x70 [btrfs] [ 2779.990944] __btrfs_drop_extents+0x161/0x1060 [btrfs] [ 2779.990987] ? mark_held_locks+0x6d/0xc0 [ 2779.990994] ? __slab_alloc.isra.49+0x99/0x100 [ 2779.991060] ? insert_reserved_file_extent.constprop.19+0x64/0x300 [btrfs] [ 2779.991145] insert_reserved_file_extent.constprop.19+0x97/0x300 [btrfs] [ 2779.991222] ? start_transaction+0xdd/0x5c0 [btrfs] [ 2779.991291] btrfs_finish_ordered_io+0x4f4/0x840 [btrfs] [ 2779.991405] btrfs_work_helper+0xaa/0x720 [btrfs] [ 2779.991432] process_one_work+0x26d/0x6a0 [ 2779.991460] worker_thread+0x4f/0x3e0 [ 2779.991481] ? process_one_work+0x6a0/0x6a0 [ 2779.991489] kthread+0x103/0x140 [ 2779.991499] ? kthread_create_worker_on_cpu+0x70/0x70 [ 2779.991515] ret_from_fork+0x3a/0x50 (...) [ 2780.026211] INFO: task fsstress:17375 blocked for more than 120 seconds. [ 2780.027480] Not tainted 5.6.0-rc2-btrfs-next-53 #1 [ 2780.028482] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 2780.030035] fsstress D 0 17375 17373 0x00004000 [ 2780.030038] Call Trace: [ 2780.030044] ? __schedule+0x384/0xa30 [ 2780.030052] schedule+0x33/0xe0 [ 2780.030075] lock_extent_bits+0x20c/0x320 [btrfs] [ 2780.030094] ? btrfs_truncate_inode_items+0xf4/0x1150 [btrfs] [ 2780.030098] ? rcu_read_lock_sched_held+0x59/0xa0 [ 2780.030102] ? remove_wait_queue+0x60/0x60 [ 2780.030122] btrfs_truncate_inode_items+0x133/0x1150 [btrfs] [ 2780.030151] ? btrfs_set_path_blocking+0xb2/0x160 [btrfs] [ 2780.030165] ? btrfs_search_slot+0x379/0x1000 [btrfs] [ 2780.030195] btrfs_log_changed_extents.isra.8+0x841/0x93e [btrfs] [ 2780.030202] ? do_raw_spin_unlock+0x49/0xc0 [ 2780.030215] ? btrfs_get_num_csums+0x10/0x10 [btrfs] [ 2780.030239] btrfs_log_inode+0xf83/0x1124 [btrfs] [ 2780.030251] ? __mutex_unlock_slowpath+0x45/0x2a0 [ 2780.030275] btrfs_log_inode_parent+0x2a0/0xe40 [btrfs] [ 2780.030282] ? dget_parent+0xa1/0x370 [ 2780.030309] btrfs_log_dentry_safe+0x4a/0x70 [btrfs] [ 2780.030329] btrfs_sync_file+0x3f3/0x490 [btrfs] [ 2780.030339] do_fsync+0x38/0x60 [ 2780.030343] __x64_sys_fdatasync+0x13/0x20 [ 2780.030345] do_syscall_64+0x5c/0x280 [ 2780.030348] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 2780.030356] RIP: 0033:0x7f2d80f6d5f0 [ 2780.030361] Code: Bad RIP value. [ 2780.030362] RSP: 002b:00007ffdba3c8548 EFLAGS: 00000246 ORIG_RAX: 000000000000004b [ 2780.030364] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f2d80f6d5f0 [ 2780.030365] RDX: 00007ffdba3c84b0 RSI: 00007ffdba3c84b0 RDI: 0000000000000003 [ 2780.030367] RBP: 000000000000004a R08: 0000000000000001 R09: 00007ffdba3c855c [ 2780.030368] R10: 0000000000000078 R11: 0000000000000246 R12: 00000000000001f4 [ 2780.030369] R13: 0000000051eb851f R14: 00007ffdba3c85f0 R15: 0000557a49220d90 So fix this by making btrfs_truncate_inode_items() not lock the range in the inode's iotree when the target root is a log root, since it's not needed to lock the range for log roots as the protection from the inode's lock and log_mutex are all that's needed. Fixes: 28553fa992cb28 ("Btrfs: fix race between shrinking truncate and fiemap") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent e75fd33
File | Mode | Size |
---|---|---|
842 | ||
crypto | ||
dim | ||
fonts | ||
kunit | ||
livepatch | ||
lz4 | ||
lzo | ||
math | ||
mpi | ||
raid6 | ||
reed_solomon | ||
vdso | ||
xz | ||
zlib_deflate | ||
zlib_inflate | ||
zstd | ||
.gitignore | -rw-r--r-- | 98 bytes |
Kconfig | -rw-r--r-- | 14.7 KB |
Kconfig.debug | -rw-r--r-- | 71.2 KB |
Kconfig.kasan | -rw-r--r-- | 5.9 KB |
Kconfig.kgdb | -rw-r--r-- | 4.1 KB |
Kconfig.ubsan | -rw-r--r-- | 1.7 KB |
Makefile | -rw-r--r-- | 9.6 KB |
argv_split.c | -rw-r--r-- | 2.1 KB |
ashldi3.c | -rw-r--r-- | 541 bytes |
ashrdi3.c | -rw-r--r-- | 565 bytes |
asn1_decoder.c | -rw-r--r-- | 13.2 KB |
assoc_array.c | -rw-r--r-- | 51.8 KB |
atomic64.c | -rw-r--r-- | 4.5 KB |
atomic64_test.c | -rw-r--r-- | 6.4 KB |
audit.c | -rw-r--r-- | 1.8 KB |
bcd.c | -rw-r--r-- | 297 bytes |
bch.c | -rw-r--r-- | 36.1 KB |
bitmap.c | -rw-r--r-- | 36.3 KB |
bitrev.c | -rw-r--r-- | 1.9 KB |
bsearch.c | -rw-r--r-- | 1.4 KB |
btree.c | -rw-r--r-- | 19.2 KB |
bucket_locks.c | -rw-r--r-- | 1.4 KB |
bug.c | -rw-r--r-- | 5.8 KB |
build_OID_registry | -rwxr-xr-x | 4.5 KB |
bust_spinlocks.c | -rw-r--r-- | 676 bytes |
check_signature.c | -rw-r--r-- | 635 bytes |
checksum.c | -rw-r--r-- | 4.8 KB |
clz_ctz.c | -rw-r--r-- | 1.2 KB |
clz_tab.c | -rw-r--r-- | 891 bytes |
cmdline.c | -rw-r--r-- | 5.1 KB |
cmpdi2.c | -rw-r--r-- | 501 bytes |
compat_audit.c | -rw-r--r-- | 832 bytes |
cpu_rmap.c | -rw-r--r-- | 7.6 KB |
cpumask.c | -rw-r--r-- | 6.0 KB |
crc-ccitt.c | -rw-r--r-- | 5.6 KB |
crc-itu-t.c | -rw-r--r-- | 2.7 KB |
crc-t10dif.c | -rw-r--r-- | 2.9 KB |
crc16.c | -rw-r--r-- | 2.7 KB |
crc32.c | -rw-r--r-- | 9.3 KB |
crc32defs.h | -rw-r--r-- | 1.6 KB |
crc32test.c | -rw-r--r-- | 37.5 KB |
crc4.c | -rw-r--r-- | 1003 bytes |
crc64.c | -rw-r--r-- | 1.7 KB |
crc7.c | -rw-r--r-- | 2.5 KB |
crc8.c | -rw-r--r-- | 2.4 KB |
ctype.c | -rw-r--r-- | 1.4 KB |
debug_info.c | -rw-r--r-- | 777 bytes |
debug_locks.c | -rw-r--r-- | 1.2 KB |
debugobjects.c | -rw-r--r-- | 34.2 KB |
dec_and_lock.c | -rw-r--r-- | 1.2 KB |
decompress.c | -rw-r--r-- | 1.7 KB |
decompress_bunzip2.c | -rw-r--r-- | 23.5 KB |
decompress_inflate.c | -rw-r--r-- | 4.5 KB |
decompress_unlz4.c | -rw-r--r-- | 4.0 KB |
decompress_unlzma.c | -rw-r--r-- | 15.8 KB |
decompress_unlzo.c | -rw-r--r-- | 6.4 KB |
decompress_unxz.c | -rw-r--r-- | 10.9 KB |
devres.c | -rw-r--r-- | 12.3 KB |
digsig.c | -rw-r--r-- | 5.5 KB |
dump_stack.c | -rw-r--r-- | 3.2 KB |
dynamic_debug.c | -rw-r--r-- | 26.1 KB |
dynamic_queue_limits.c | -rw-r--r-- | 4.3 KB |
earlycpio.c | -rw-r--r-- | 3.6 KB |
errname.c | -rw-r--r-- | 3.6 KB |
error-inject.c | -rw-r--r-- | 5.4 KB |
errseq.c | -rw-r--r-- | 6.6 KB |
extable.c | -rw-r--r-- | 3.0 KB |
fault-inject.c | -rw-r--r-- | 5.8 KB |
fdt.c | -rw-r--r-- | 69 bytes |
fdt_empty_tree.c | -rw-r--r-- | 80 bytes |
fdt_ro.c | -rw-r--r-- | 72 bytes |
fdt_rw.c | -rw-r--r-- | 72 bytes |
fdt_strerror.c | -rw-r--r-- | 78 bytes |
fdt_sw.c | -rw-r--r-- | 72 bytes |
fdt_wip.c | -rw-r--r-- | 73 bytes |
find_bit.c | -rw-r--r-- | 5.4 KB |
find_bit_benchmark.c | -rw-r--r-- | 3.9 KB |
flex_proportions.c | -rw-r--r-- | 6.9 KB |
gen_crc32table.c | -rw-r--r-- | 3.3 KB |
gen_crc64table.c | -rw-r--r-- | 1.4 KB |
genalloc.c | -rw-r--r-- | 26.1 KB |
generic-radix-tree.c | -rw-r--r-- | 5.3 KB |
glob.c | -rw-r--r-- | 3.5 KB |
globtest.c | -rw-r--r-- | 4.2 KB |
hexdump.c | -rw-r--r-- | 7.3 KB |
hweight.c | -rw-r--r-- | 1.9 KB |
idr.c | -rw-r--r-- | 17.3 KB |
inflate.c | -rw-r--r-- | 38.7 KB |
interval_tree.c | -rw-r--r-- | 540 bytes |
interval_tree_test.c | -rw-r--r-- | 3.4 KB |
iomap.c | -rw-r--r-- | 9.1 KB |
iomap_copy.c | -rw-r--r-- | 2.2 KB |
iommu-helper.c | -rw-r--r-- | 755 bytes |
ioremap.c | -rw-r--r-- | 6.1 KB |
iov_iter.c | -rw-r--r-- | 42.0 KB |
irq_poll.c | -rw-r--r-- | 5.4 KB |
irq_regs.c | -rw-r--r-- | 394 bytes |
is_single_threaded.c | -rw-r--r-- | 1.2 KB |
kasprintf.c | -rw-r--r-- | 1.4 KB |
kfifo.c | -rw-r--r-- | 12.1 KB |
klist.c | -rw-r--r-- | 10.4 KB |
kobject.c | -rw-r--r-- | 27.9 KB |
kobject_uevent.c | -rw-r--r-- | 18.8 KB |
kstrtox.c | -rw-r--r-- | 10.5 KB |
kstrtox.h | -rw-r--r-- | 293 bytes |
libcrc32c.c | -rw-r--r-- | 2.0 KB |
list-test.c | -rw-r--r-- | 17.4 KB |
list_debug.c | -rw-r--r-- | 1.8 KB |
list_sort.c | -rw-r--r-- | 8.4 KB |
llist.c | -rw-r--r-- | 2.5 KB |
locking-selftest-hardirq.h | -rw-r--r-- | 246 bytes |
locking-selftest-mutex.h | -rw-r--r-- | 159 bytes |
locking-selftest-rlock-hardirq.h | -rw-r--r-- | 74 bytes |
locking-selftest-rlock-softirq.h | -rw-r--r-- | 74 bytes |
locking-selftest-rlock.h | -rw-r--r-- | 197 bytes |
locking-selftest-rsem.h | -rw-r--r-- | 202 bytes |
locking-selftest-rtmutex.h | -rw-r--r-- | 162 bytes |
locking-selftest-softirq.h | -rw-r--r-- | 246 bytes |
locking-selftest-spin-hardirq.h | -rw-r--r-- | 73 bytes |
locking-selftest-spin-softirq.h | -rw-r--r-- | 73 bytes |
locking-selftest-spin.h | -rw-r--r-- | 157 bytes |
locking-selftest-wlock-hardirq.h | -rw-r--r-- | 74 bytes |
locking-selftest-wlock-softirq.h | -rw-r--r-- | 74 bytes |
locking-selftest-wlock.h | -rw-r--r-- | 197 bytes |
locking-selftest-wsem.h | -rw-r--r-- | 202 bytes |
locking-selftest.c | -rw-r--r-- | 43.7 KB |
lockref.c | -rw-r--r-- | 4.5 KB |
logic_pio.c | -rw-r--r-- | 8.4 KB |
lru_cache.c | -rw-r--r-- | 18.8 KB |
lshrdi3.c | -rw-r--r-- | 559 bytes |
memcat_p.c | -rw-r--r-- | 753 bytes |
memory-notifier-error-inject.c | -rw-r--r-- | 1.1 KB |
memregion.c | -rw-r--r-- | 400 bytes |
memweight.c | -rw-r--r-- | 1.0 KB |
muldi3.c | -rw-r--r-- | 1.7 KB |
net_utils.c | -rw-r--r-- | 640 bytes |
netdev-notifier-error-inject.c | -rw-r--r-- | 1.5 KB |
nlattr.c | -rw-r--r-- | 22.6 KB |
nmi_backtrace.c | -rw-r--r-- | 3.0 KB |
nodemask.c | -rw-r--r-- | 653 bytes |
notifier-error-inject.c | -rw-r--r-- | 2.5 KB |
notifier-error-inject.h | -rw-r--r-- | 653 bytes |
objagg.c | -rw-r--r-- | 28.3 KB |
of-reconfig-notifier-error-inject.c | -rw-r--r-- | 1.3 KB |
oid_registry.c | -rw-r--r-- | 3.7 KB |
once.c | -rw-r--r-- | 1.4 KB |
packing.c | -rw-r--r-- | 6.5 KB |
parman.c | -rw-r--r-- | 10.6 KB |
parser.c | -rw-r--r-- | 8.1 KB |
pci_iomap.c | -rw-r--r-- | 4.2 KB |
percpu-refcount.c | -rw-r--r-- | 13.5 KB |
percpu_counter.c | -rw-r--r-- | 5.8 KB |
percpu_test.c | -rw-r--r-- | 3.2 KB |
plist.c | -rw-r--r-- | 5.9 KB |
pm-notifier-error-inject.c | -rw-r--r-- | 1.2 KB |
radix-tree.c | -rw-r--r-- | 43.3 KB |
random32.c | -rw-r--r-- | 12.8 KB |
ratelimit.c | -rw-r--r-- | 1.6 KB |
rbtree.c | -rw-r--r-- | 17.1 KB |
rbtree_test.c | -rw-r--r-- | 9.4 KB |
refcount.c | -rw-r--r-- | 4.8 KB |
rhashtable.c | -rw-r--r-- | 29.4 KB |
sbitmap.c | -rw-r--r-- | 16.3 KB |
scatterlist.c | -rw-r--r-- | 25.2 KB |
seq_buf.c | -rw-r--r-- | 9.9 KB |
sg_pool.c | -rw-r--r-- | 4.2 KB |
sg_split.c | -rw-r--r-- | 5.0 KB |
sha1.c | -rw-r--r-- | 6.1 KB |
show_mem.c | -rw-r--r-- | 1.1 KB |
siphash.c | -rw-r--r-- | 12.0 KB |
smp_processor_id.c | -rw-r--r-- | 1.4 KB |
sort.c | -rw-r--r-- | 8.4 KB |
stackdepot.c | -rw-r--r-- | 8.6 KB |
stmp_device.c | -rw-r--r-- | 1.9 KB |
string.c | -rw-r--r-- | 23.5 KB |
string_helpers.c | -rw-r--r-- | 13.9 KB |
strncpy_from_user.c | -rw-r--r-- | 3.2 KB |
strnlen_user.c | -rw-r--r-- | 3.3 KB |
syscall.c | -rw-r--r-- | 2.5 KB |
test-kstrtox.c | -rw-r--r-- | 17.3 KB |
test-string_helpers.c | -rw-r--r-- | 10.3 KB |
test_bitfield.c | -rw-r--r-- | 4.3 KB |
test_bitmap.c | -rw-r--r-- | 13.7 KB |
test_blackhole_dev.c | -rw-r--r-- | 2.5 KB |
test_bpf.c | -rw-r--r-- | 161.1 KB |
test_debug_virtual.c | -rw-r--r-- | 981 bytes |
test_firmware.c | -rw-r--r-- | 22.9 KB |
test_hash.c | -rw-r--r-- | 6.3 KB |
test_hexdump.c | -rw-r--r-- | 6.3 KB |
test_ida.c | -rw-r--r-- | 4.3 KB |
test_kasan.c | -rw-r--r-- | 16.8 KB |
test_kmod.c | -rw-r--r-- | 30.0 KB |
test_list_sort.c | -rw-r--r-- | 3.3 KB |
test_memcat_p.c | -rw-r--r-- | 2.2 KB |
test_meminit.c | -rw-r--r-- | 9.7 KB |
test_module.c | -rw-r--r-- | 794 bytes |
test_objagg.c | -rw-r--r-- | 24.6 KB |
test_overflow.c | -rw-r--r-- | 22.3 KB |
test_parman.c | -rw-r--r-- | 11.2 KB |
test_printf.c | -rw-r--r-- | 16.0 KB |
test_rhashtable.c | -rw-r--r-- | 20.0 KB |
test_siphash.c | -rw-r--r-- | 7.5 KB |
test_sort.c | -rw-r--r-- | 870 bytes |
test_stackinit.c | -rw-r--r-- | 10.9 KB |
test_static_key_base.c | -rw-r--r-- | 1.6 KB |
test_static_keys.c | -rw-r--r-- | 5.6 KB |
test_string.c | -rw-r--r-- | 3.8 KB |
test_strscpy.c | -rw-r--r-- | 4.0 KB |
test_sysctl.c | -rw-r--r-- | 3.7 KB |
test_ubsan.c | -rw-r--r-- | 2.4 KB |
test_user_copy.c | -rw-r--r-- | 9.1 KB |
test_uuid.c | -rw-r--r-- | 3.4 KB |
test_vmalloc.c | -rw-r--r-- | 10.6 KB |
test_xarray.c | -rw-r--r-- | 42.0 KB |
textsearch.c | -rw-r--r-- | 9.3 KB |
timerqueue.c | -rw-r--r-- | 2.5 KB |
ts_bm.c | -rw-r--r-- | 5.1 KB |
ts_fsm.c | -rw-r--r-- | 10.4 KB |
ts_kmp.c | -rw-r--r-- | 4.1 KB |
ubsan.c | -rw-r--r-- | 10.0 KB |
ubsan.h | -rw-r--r-- | 1.6 KB |
ucmpdi2.c | -rw-r--r-- | 568 bytes |
ucs2_string.c | -rw-r--r-- | 2.5 KB |
usercopy.c | -rw-r--r-- | 2.0 KB |
uuid.c | -rw-r--r-- | 2.6 KB |
vsprintf.c | -rw-r--r-- | 80.9 KB |
win_minmax.c | -rw-r--r-- | 3.4 KB |
xarray.c | -rw-r--r-- | 52.8 KB |
xxhash.c | -rw-r--r-- | 12.7 KB |
Computing file changes ...