Revision 628185cfddf1dfb701c4efe2cfd72cf5b09f5702 authored by Daniel Borkmann on 21 December 2016, 17:04:11 UTC, committed by David S. Miller on 26 December 2016, 16:24:10 UTC
Shahar reported a soft lockup in tc_classify(), where we run into an endless loop when walking the classifier chain due to tp->next == tp which is a state we should never run into. The issue only seems to trigger under load in the tc control path. What happens is that in tc_ctl_tfilter(), thread A allocates a new tp, initializes it, sets tp_created to 1, and calls into tp->ops->change() with it. In that classifier callback we had to unlock/lock the rtnl mutex and returned with -EAGAIN. One reason why we need to drop there is, for example, that we need to request an action module to be loaded. This happens via tcf_exts_validate() -> tcf_action_init/_1() meaning after we loaded and found the requested action, we need to redo the whole request so we don't race against others. While we had to unlock rtnl in that time, thread B's request was processed next on that CPU. Thread B added a new tp instance successfully to the classifier chain. When thread A returned grabbing the rtnl mutex again, propagating -EAGAIN and destroying its tp instance which never got linked, we goto replay and redo A's request. This time when walking the classifier chain in tc_ctl_tfilter() for checking for existing tp instances we had a priority match and found the tp instance that was created and linked by thread B. Now calling again into tp->ops->change() with that tp was successful and returned without error. tp_created was never cleared in the second round, thus kernel thinks that we need to link it into the classifier chain (once again). tp and *back point to the same object due to the match we had earlier on. Thus for thread B's already public tp, we reset tp->next to tp itself and link it into the chain, which eventually causes the mentioned endless loop in tc_classify() once a packet hits the data path. Fix is to clear tp_created at the beginning of each request, also when we replay it. On the paths that can cause -EAGAIN we already destroy the original tp instance we had and on replay we really need to start from scratch. It seems that this issue was first introduced in commit 12186be7d2e1 ("net_cls: fix unconfigured struct tcf_proto keeps chaining and avoid kernel panic when we use cls_cgroup"). Fixes: 12186be7d2e1 ("net_cls: fix unconfigured struct tcf_proto keeps chaining and avoid kernel panic when we use cls_cgroup") Reported-by: Shahar Klein <shahark@mellanox.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Cc: Cong Wang <xiyou.wangcong@gmail.com> Acked-by: Eric Dumazet <edumazet@google.com> Tested-by: Shahar Klein <shahark@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent eb9def6
Kbuild
#
# Kbuild for top-level directory of the kernel
# This file takes care of the following:
# 1) Generate bounds.h
# 2) Generate timeconst.h
# 3) Generate asm-offsets.h (may need bounds.h and timeconst.h)
# 4) Check for missing system calls
# 5) Generate constants.py (may need bounds.h)
# Default sed regexp - multiline due to syntax constraints
define sed-y
"/^->/{s:->#\(.*\):/* \1 */:; \
s:^->\([^ ]*\) [\$$#]*\([-0-9]*\) \(.*\):#define \1 \2 /* \3 */:; \
s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \
s:->::; p;}"
endef
# Use filechk to avoid rebuilds when a header changes, but the resulting file
# does not
define filechk_offsets
(set -e; \
echo "#ifndef $2"; \
echo "#define $2"; \
echo "/*"; \
echo " * DO NOT MODIFY."; \
echo " *"; \
echo " * This file was generated by Kbuild"; \
echo " */"; \
echo ""; \
sed -ne $(sed-y); \
echo ""; \
echo "#endif" )
endef
#####
# 1) Generate bounds.h
bounds-file := include/generated/bounds.h
always := $(bounds-file)
targets := kernel/bounds.s
# We use internal kbuild rules to avoid the "is up to date" message from make
kernel/bounds.s: kernel/bounds.c FORCE
$(Q)mkdir -p $(dir $@)
$(call if_changed_dep,cc_s_c)
$(obj)/$(bounds-file): kernel/bounds.s FORCE
$(call filechk,offsets,__LINUX_BOUNDS_H__)
#####
# 2) Generate timeconst.h
timeconst-file := include/generated/timeconst.h
targets += $(timeconst-file)
quiet_cmd_gentimeconst = GEN $@
define cmd_gentimeconst
(echo $(CONFIG_HZ) | bc -q $< ) > $@
endef
define filechk_gentimeconst
(echo $(CONFIG_HZ) | bc -q $< )
endef
$(obj)/$(timeconst-file): kernel/time/timeconst.bc FORCE
$(call filechk,gentimeconst)
#####
# 3) Generate asm-offsets.h
#
offsets-file := include/generated/asm-offsets.h
always += $(offsets-file)
targets += arch/$(SRCARCH)/kernel/asm-offsets.s
# We use internal kbuild rules to avoid the "is up to date" message from make
arch/$(SRCARCH)/kernel/asm-offsets.s: arch/$(SRCARCH)/kernel/asm-offsets.c \
$(obj)/$(timeconst-file) $(obj)/$(bounds-file) FORCE
$(Q)mkdir -p $(dir $@)
$(call if_changed_dep,cc_s_c)
$(obj)/$(offsets-file): arch/$(SRCARCH)/kernel/asm-offsets.s FORCE
$(call filechk,offsets,__ASM_OFFSETS_H__)
#####
# 4) Check for missing system calls
#
always += missing-syscalls
targets += missing-syscalls
quiet_cmd_syscalls = CALL $<
cmd_syscalls = $(CONFIG_SHELL) $< $(CC) $(c_flags) $(missing_syscalls_flags)
missing-syscalls: scripts/checksyscalls.sh $(offsets-file) FORCE
$(call cmd,syscalls)
#####
# 5) Generate constants for Python GDB integration
#
extra-$(CONFIG_GDB_SCRIPTS) += build_constants_py
build_constants_py: $(obj)/$(timeconst-file) $(obj)/$(bounds-file)
@$(MAKE) $(build)=scripts/gdb/linux $@
# Keep these three files during make clean
no-clean-files := $(bounds-file) $(offsets-file) $(timeconst-file)
Computing file changes ...