Revision 4df910620bebb5cfe234af16ac8f6474b60215fd authored by Feng Tang on 25 November 2020, 05:22:21 UTC, committed by Linus Torvalds on 26 November 2020, 17:35:49 UTC
0day reported one -22.7% regression for will-it-scale page_fault2 case [1] on a 4 sockets 144 CPU platform, and bisected to it to be caused by Waiman's optimization (commit bd0b230fe1) of saving one 'struct page_counter' space for 'struct mem_cgroup'. Initially we thought it was due to the cache alignment change introduced by the patch, but further debug shows that it is due to some hot data members ('vmstats_local', 'vmstats_percpu', 'vmstats') sit in 2 adjacent cacheline (2N and 2N+1 cacheline), and when adjacent cache line prefetch is enabled, it triggers an "extended level" of cache false sharing for 2 adjacent cache lines. So exchange the 2 member blocks, while keeping mostly the original cache alignment, which can restore and even enhance the performance, and save 64 bytes of space for 'struct mem_cgroup' (from 2880 to 2816, with 0day's default RHEL-8.3 kernel config) [1]. https://lore.kernel.org/lkml/20201102091543.GM31092@shao2-debian/ Fixes: bd0b230fe145 ("mm/memcg: unify swap and memsw page counters") Reported-by: kernel test robot <rong.a.chen@intel.com> Signed-off-by: Feng Tang <feng.tang@intel.com> Acked-by: Waiman Long <longman@redhat.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent fa02fcd
File | Mode | Size |
---|---|---|
bpfilter | ||
netfilter | ||
Kconfig | -rw-r--r-- | 25.9 KB |
Makefile | -rw-r--r-- | 2.8 KB |
af_inet.c | -rw-r--r-- | 51.4 KB |
ah4.c | -rw-r--r-- | 13.6 KB |
arp.c | -rw-r--r-- | 35.8 KB |
bpf_tcp_ca.c | -rw-r--r-- | 6.4 KB |
cipso_ipv4.c | -rw-r--r-- | 61.3 KB |
datagram.c | -rw-r--r-- | 3.1 KB |
devinet.c | -rw-r--r-- | 68.6 KB |
esp4.c | -rw-r--r-- | 27.6 KB |
esp4_offload.c | -rw-r--r-- | 8.5 KB |
fib_frontend.c | -rw-r--r-- | 38.6 KB |
fib_lookup.h | -rw-r--r-- | 1.6 KB |
fib_notifier.c | -rw-r--r-- | 1.6 KB |
fib_rules.c | -rw-r--r-- | 9.8 KB |
fib_semantics.c | -rw-r--r-- | 52.2 KB |
fib_trie.c | -rw-r--r-- | 71.7 KB |
fou.c | -rw-r--r-- | 28.1 KB |
gre_demux.c | -rw-r--r-- | 4.8 KB |
gre_offload.c | -rw-r--r-- | 7.0 KB |
icmp.c | -rw-r--r-- | 32.7 KB |
igmp.c | -rw-r--r-- | 72.6 KB |
inet_connection_sock.c | -rw-r--r-- | 30.7 KB |
inet_diag.c | -rw-r--r-- | 35.3 KB |
inet_fragment.c | -rw-r--r-- | 14.4 KB |
inet_hashtables.c | -rw-r--r-- | 22.2 KB |
inet_timewait_sock.c | -rw-r--r-- | 8.9 KB |
inetpeer.c | -rw-r--r-- | 8.5 KB |
ip_forward.c | -rw-r--r-- | 4.1 KB |
ip_fragment.c | -rw-r--r-- | 17.6 KB |
ip_gre.c | -rw-r--r-- | 45.4 KB |
ip_input.c | -rw-r--r-- | 18.3 KB |
ip_options.c | -rw-r--r-- | 14.8 KB |
ip_output.c | -rw-r--r-- | 43.0 KB |
ip_sockglue.c | -rw-r--r-- | 40.9 KB |
ip_tunnel.c | -rw-r--r-- | 29.6 KB |
ip_tunnel_core.c | -rw-r--r-- | 29.3 KB |
ip_vti.c | -rw-r--r-- | 17.0 KB |
ipcomp.c | -rw-r--r-- | 4.4 KB |
ipconfig.c | -rw-r--r-- | 42.3 KB |
ipip.c | -rw-r--r-- | 18.4 KB |
ipmr.c | -rw-r--r-- | 75.8 KB |
ipmr_base.c | -rw-r--r-- | 10.2 KB |
metrics.c | -rw-r--r-- | 2.2 KB |
netfilter.c | -rw-r--r-- | 2.5 KB |
netlink.c | -rw-r--r-- | 737 bytes |
nexthop.c | -rw-r--r-- | 44.8 KB |
ping.c | -rw-r--r-- | 27.6 KB |
proc.c | -rw-r--r-- | 20.2 KB |
protocol.c | -rw-r--r-- | 2.1 KB |
raw.c | -rw-r--r-- | 26.1 KB |
raw_diag.c | -rw-r--r-- | 6.3 KB |
route.c | -rw-r--r-- | 89.5 KB |
syncookies.c | -rw-r--r-- | 12.6 KB |
sysctl_net_ipv4.c | -rw-r--r-- | 35.4 KB |
tcp.c | -rw-r--r-- | 109.2 KB |
tcp_bbr.c | -rw-r--r-- | 40.9 KB |
tcp_bic.c | -rw-r--r-- | 6.0 KB |
tcp_bpf.c | -rw-r--r-- | 14.6 KB |
tcp_cdg.c | -rw-r--r-- | 11.1 KB |
tcp_cong.c | -rw-r--r-- | 11.4 KB |
tcp_cubic.c | -rw-r--r-- | 15.5 KB |
tcp_dctcp.c | -rw-r--r-- | 7.2 KB |
tcp_dctcp.h | -rw-r--r-- | 1.0 KB |
tcp_diag.c | -rw-r--r-- | 5.5 KB |
tcp_fastopen.c | -rw-r--r-- | 16.5 KB |
tcp_highspeed.c | -rw-r--r-- | 4.9 KB |
tcp_htcp.c | -rw-r--r-- | 7.4 KB |
tcp_hybla.c | -rw-r--r-- | 4.9 KB |
tcp_illinois.c | -rw-r--r-- | 8.3 KB |
tcp_input.c | -rw-r--r-- | 195.9 KB |
tcp_ipv4.c | -rw-r--r-- | 78.1 KB |
tcp_lp.c | -rw-r--r-- | 8.8 KB |
tcp_metrics.c | -rw-r--r-- | 26.8 KB |
tcp_minisocks.c | -rw-r--r-- | 26.6 KB |
tcp_nv.c | -rw-r--r-- | 15.7 KB |
tcp_offload.c | -rw-r--r-- | 8.1 KB |
tcp_output.c | -rw-r--r-- | 118.6 KB |
tcp_rate.c | -rw-r--r-- | 7.8 KB |
tcp_recovery.c | -rw-r--r-- | 7.5 KB |
tcp_scalable.c | -rw-r--r-- | 1.4 KB |
tcp_timer.c | -rw-r--r-- | 22.5 KB |
tcp_ulp.c | -rw-r--r-- | 3.4 KB |
tcp_vegas.c | -rw-r--r-- | 9.7 KB |
tcp_vegas.h | -rw-r--r-- | 940 bytes |
tcp_veno.c | -rw-r--r-- | 5.8 KB |
tcp_westwood.c | -rw-r--r-- | 8.3 KB |
tcp_yeah.c | -rw-r--r-- | 6.6 KB |
tunnel4.c | -rw-r--r-- | 6.5 KB |
udp.c | -rw-r--r-- | 82.0 KB |
udp_bpf.c | -rw-r--r-- | 1.3 KB |
udp_diag.c | -rw-r--r-- | 7.2 KB |
udp_impl.h | -rw-r--r-- | 966 bytes |
udp_offload.c | -rw-r--r-- | 16.5 KB |
udp_tunnel_core.c | -rw-r--r-- | 5.6 KB |
udp_tunnel_nic.c | -rw-r--r-- | 24.2 KB |
udp_tunnel_stub.c | -rw-r--r-- | 199 bytes |
udplite.c | -rw-r--r-- | 2.9 KB |
xfrm4_input.c | -rw-r--r-- | 4.2 KB |
xfrm4_output.c | -rw-r--r-- | 1.1 KB |
xfrm4_policy.c | -rw-r--r-- | 5.9 KB |
xfrm4_protocol.c | -rw-r--r-- | 6.6 KB |
xfrm4_state.c | -rw-r--r-- | 469 bytes |
xfrm4_tunnel.c | -rw-r--r-- | 2.7 KB |
Computing file changes ...