Revision a82c25c366b0963d33ddf699196e6cf57f6d89b1 authored by Florian Westphal on 08 March 2022, 12:52:11 UTC, committed by Florian Westphal on 08 March 2022, 12:52:11 UTC
This reverts commit 878aed8db324bec64f3c3f956e64d5ae7375a5de.

This change breaks existing setups where conntrack is used with
asymmetric paths.

In these cases, the NAT transformation occurs on the syn-ack instead of
the syn:

1. SYN    x:12345 -> y -> 443 // sent by initiator, receiverd by responder
2. SYNACK y:443 -> x:12345 // First packet seen by conntrack, as sent by responder
3. tuple_force_port_remap() gets called, sees:
  'tcp from 443 to port 12345 NAT' -> pick a new source port, inititor receives
4. SYNACK y:$RANDOM -> x:12345   // connection is never established

While its possible to avoid the breakage with NOTRACK rules, a kernel
update should not break working setups.

An alternative to the revert is to augment conntrack to tag
mid-stream connections plus more code in the nat core to skip NAT
for such connections, however, this leads to more interaction/integration
between conntrack and NAT.

Therefore, revert, users will need to add explicit nat rules to avoid
port shadowing.

Link: https://lore.kernel.org/netfilter-devel/20220302105908.GA5852@breakpoint.cc/#R
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2051413
Signed-off-by: Florian Westphal <fw@strlen.de>
1 parent f8e9bd3
Raw File
init-mm.c
// SPDX-License-Identifier: GPL-2.0
#include <linux/mm_types.h>
#include <linux/rbtree.h>
#include <linux/rwsem.h>
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/cpumask.h>
#include <linux/mman.h>
#include <linux/pgtable.h>

#include <linux/atomic.h>
#include <linux/user_namespace.h>
#include <asm/mmu.h>

#ifndef INIT_MM_CONTEXT
#define INIT_MM_CONTEXT(name)
#endif

/*
 * For dynamically allocated mm_structs, there is a dynamically sized cpumask
 * at the end of the structure, the size of which depends on the maximum CPU
 * number the system can see. That way we allocate only as much memory for
 * mm_cpumask() as needed for the hundreds, or thousands of processes that
 * a system typically runs.
 *
 * Since there is only one init_mm in the entire system, keep it simple
 * and size this cpu_bitmask to NR_CPUS.
 */
struct mm_struct init_mm = {
	.mm_rb		= RB_ROOT,
	.pgd		= swapper_pg_dir,
	.mm_users	= ATOMIC_INIT(2),
	.mm_count	= ATOMIC_INIT(1),
	.write_protect_seq = SEQCNT_ZERO(init_mm.write_protect_seq),
	MMAP_LOCK_INITIALIZER(init_mm)
	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
	.arg_lock	=  __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist),
	.user_ns	= &init_user_ns,
	.cpu_bitmap	= CPU_BITS_NONE,
	INIT_MM_CONTEXT(init_mm)
};

void setup_initial_init_mm(void *start_code, void *end_code,
			   void *end_data, void *brk)
{
	init_mm.start_code = (unsigned long)start_code;
	init_mm.end_code = (unsigned long)end_code;
	init_mm.end_data = (unsigned long)end_data;
	init_mm.brk = (unsigned long)brk;
}
back to top