Revision a8950e49bd241dc9ad90c24e92e23e95dbe9f018 authored by Romain Perier on 19 April 2016, 10:17:32 UTC, committed by Ley Foon Tan on 27 April 2016, 08:35:55 UTC
Depending on the size of the area to be memset'ed, the nios2 memset implementation either uses a naive loop (for buffers smaller or equal than 8 bytes) or a more optimized implementation (for buffers larger than 8 bytes). This implementation does 4-byte stores rather than 1-byte stores to speed up memset. However, we discovered that on our nios2 platform, memset() was not properly setting the buffer to the expected value. A memset of 0xff would not set the entire buffer to 0xff, but to: 0xff 0x00 0xff 0x00 0xff 0x00 0xff 0x00 ... Which is obviously incorrect. Our investigation has revealed that the problem lies in the incorrect constraints used in the inline assembly. The following piece of assembly, from the nios2 memset implementation, is supposed to create a 4-byte value that repeats 4 times the 1-byte pattern passed as memset argument: /* fill8 %3, %5 (c & 0xff) */ " slli %4, %5, 8\n" " or %4, %4, %5\n" " slli %3, %4, 16\n" " or %3, %3, %4\n" However, depending on the compiler and optimization level, this code might be compiled as: 34: 280a923a slli r5,r5,8 38: 294ab03a or r5,r5,r5 3c: 2808943a slli r4,r5,16 40: 2148b03a or r4,r4,r5 This is wrong because r5 gets used both for %5 and %4, which leads to the final pattern stored in r4 to be 0xff00ff00 rather than the expected 0xffffffff. %4 is defined with the "=r" constraint, i.e as an output operand. However, as explained in http://www.ethernut.de/en/documents/arm-inline-asm.html, this does not prevent gcc from using the same register for an output operand (%4) and input operand (%5). By using the constraint modifier '&', we indicate that the register should be used for output only. With this change, we get the following assembly output: 34: 2810923a slli r8,r5,8 38: 4150b03a or r8,r8,r5 3c: 400e943a slli r7,r8,16 40: 3a0eb03a or r7,r7,r8 Which correctly produces the 0xffffffff pattern when 0xff is passed as the memset() pattern. It is worth mentioning the observed consequence of this bug: we were hitting the kernel BUG() in mm/bootmem.c:__free() that verifies when marking a page as free that it was previously marked as occupied (i.e that the bit was set to 1). The entire bootmem bitmap is set to 0xff bit via a memset() during the bootmem initialization. The bootmem_free() call right after the initialization was finding some bits to be set to 0, which didn't make sense since the bitmap has just been memset'ed to 0xff. Except that due to the bug explained above, the bitmap was in fact initialized to 0xff00ff00. Thanks to Marek Vasut for his help and feedback. Signed-off-by: Romain Perier <romain.perier@free-electrons.com> Acked-by: Marek Vasut <marex@denx.de> Acked-by: Ley Foon Tan <lftan@altera.com>
1 parent 02da2d7
Kbuild
#
# Kbuild for top-level directory of the kernel
# This file takes care of the following:
# 1) Generate bounds.h
# 2) Generate timeconst.h
# 3) Generate asm-offsets.h (may need bounds.h and timeconst.h)
# 4) Check for missing system calls
# Default sed regexp - multiline due to syntax constraints
define sed-y
"/^->/{s:->#\(.*\):/* \1 */:; \
s:^->\([^ ]*\) [\$$#]*\([-0-9]*\) \(.*\):#define \1 \2 /* \3 */:; \
s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \
s:->::; p;}"
endef
# Use filechk to avoid rebuilds when a header changes, but the resulting file
# does not
define filechk_offsets
(set -e; \
echo "#ifndef $2"; \
echo "#define $2"; \
echo "/*"; \
echo " * DO NOT MODIFY."; \
echo " *"; \
echo " * This file was generated by Kbuild"; \
echo " */"; \
echo ""; \
sed -ne $(sed-y); \
echo ""; \
echo "#endif" )
endef
#####
# 1) Generate bounds.h
bounds-file := include/generated/bounds.h
always := $(bounds-file)
targets := kernel/bounds.s
# We use internal kbuild rules to avoid the "is up to date" message from make
kernel/bounds.s: kernel/bounds.c FORCE
$(Q)mkdir -p $(dir $@)
$(call if_changed_dep,cc_s_c)
$(obj)/$(bounds-file): kernel/bounds.s FORCE
$(call filechk,offsets,__LINUX_BOUNDS_H__)
#####
# 2) Generate timeconst.h
timeconst-file := include/generated/timeconst.h
targets += $(timeconst-file)
quiet_cmd_gentimeconst = GEN $@
define cmd_gentimeconst
(echo $(CONFIG_HZ) | bc -q $< ) > $@
endef
define filechk_gentimeconst
(echo $(CONFIG_HZ) | bc -q $< )
endef
$(obj)/$(timeconst-file): kernel/time/timeconst.bc FORCE
$(call filechk,gentimeconst)
#####
# 3) Generate asm-offsets.h
#
offsets-file := include/generated/asm-offsets.h
always += $(offsets-file)
targets += arch/$(SRCARCH)/kernel/asm-offsets.s
# We use internal kbuild rules to avoid the "is up to date" message from make
arch/$(SRCARCH)/kernel/asm-offsets.s: arch/$(SRCARCH)/kernel/asm-offsets.c \
$(obj)/$(timeconst-file) $(obj)/$(bounds-file) FORCE
$(Q)mkdir -p $(dir $@)
$(call if_changed_dep,cc_s_c)
$(obj)/$(offsets-file): arch/$(SRCARCH)/kernel/asm-offsets.s FORCE
$(call filechk,offsets,__ASM_OFFSETS_H__)
#####
# 4) Check for missing system calls
#
always += missing-syscalls
targets += missing-syscalls
quiet_cmd_syscalls = CALL $<
cmd_syscalls = $(CONFIG_SHELL) $< $(CC) $(c_flags) $(missing_syscalls_flags)
missing-syscalls: scripts/checksyscalls.sh $(offsets-file) FORCE
$(call cmd,syscalls)
# Keep these three files during make clean
no-clean-files := $(bounds-file) $(offsets-file) $(timeconst-file)
Computing file changes ...