Revision df9576def004d2cd5beedc00cb6e8901427634b9 authored by Yang Shi on 03 August 2019, 04:48:37 UTC, committed by Linus Torvalds on 03 August 2019, 14:02:00 UTC
When running ltp's oom test with kmemleak enabled, the below warning was
triggerred since kernel detects __GFP_NOFAIL & ~__GFP_DIRECT_RECLAIM is
passed in:

  WARNING: CPU: 105 PID: 2138 at mm/page_alloc.c:4608 __alloc_pages_nodemask+0x1c31/0x1d50
  Modules linked in: loop dax_pmem dax_pmem_core ip_tables x_tables xfs virtio_net net_failover virtio_blk failover ata_generic virtio_pci virtio_ring virtio libata
  CPU: 105 PID: 2138 Comm: oom01 Not tainted 5.2.0-next-20190710+ #7
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014
  RIP: 0010:__alloc_pages_nodemask+0x1c31/0x1d50
  ...
   kmemleak_alloc+0x4e/0xb0
   kmem_cache_alloc+0x2a7/0x3e0
   mempool_alloc_slab+0x2d/0x40
   mempool_alloc+0x118/0x2b0
   bio_alloc_bioset+0x19d/0x350
   get_swap_bio+0x80/0x230
   __swap_writepage+0x5ff/0xb20

The mempool_alloc_slab() clears __GFP_DIRECT_RECLAIM, however kmemleak
has __GFP_NOFAIL set all the time due to d9570ee3bd1d4f2 ("kmemleak:
allow to coexist with fault injection").  But, it doesn't make any sense
to have __GFP_NOFAIL and ~__GFP_DIRECT_RECLAIM specified at the same
time.

According to the discussion on the mailing list, the commit should be
reverted for short term solution.  Catalin Marinas would follow up with
a better solution for longer term.

The failure rate of kmemleak metadata allocation may increase in some
circumstances, but this should be expected side effect.

Link: http://lkml.kernel.org/r/1563299431-111710-1-git-send-email-yang.shi@linux.alibaba.com
Fixes: d9570ee3bd1d4f2 ("kmemleak: allow to coexist with fault injection")
Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
Suggested-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Qian Cai <cai@lca.pw>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 68d8681
Raw File
orangefs-cache.c
// SPDX-License-Identifier: GPL-2.0
/*
 * (C) 2001 Clemson University and The University of Chicago
 *
 * See COPYING in top-level directory.
 */

#include "protocol.h"
#include "orangefs-kernel.h"

/* tags assigned to kernel upcall operations */
static __u64 next_tag_value;
static DEFINE_SPINLOCK(next_tag_value_lock);

/* the orangefs memory caches */

/* a cache for orangefs upcall/downcall operations */
static struct kmem_cache *op_cache;

int op_cache_initialize(void)
{
	op_cache = kmem_cache_create("orangefs_op_cache",
				     sizeof(struct orangefs_kernel_op_s),
				     0,
				     ORANGEFS_CACHE_CREATE_FLAGS,
				     NULL);

	if (!op_cache) {
		gossip_err("Cannot create orangefs_op_cache\n");
		return -ENOMEM;
	}

	/* initialize our atomic tag counter */
	spin_lock(&next_tag_value_lock);
	next_tag_value = 100;
	spin_unlock(&next_tag_value_lock);
	return 0;
}

int op_cache_finalize(void)
{
	kmem_cache_destroy(op_cache);
	return 0;
}

char *get_opname_string(struct orangefs_kernel_op_s *new_op)
{
	if (new_op) {
		__s32 type = new_op->upcall.type;

		if (type == ORANGEFS_VFS_OP_FILE_IO)
			return "OP_FILE_IO";
		else if (type == ORANGEFS_VFS_OP_LOOKUP)
			return "OP_LOOKUP";
		else if (type == ORANGEFS_VFS_OP_CREATE)
			return "OP_CREATE";
		else if (type == ORANGEFS_VFS_OP_GETATTR)
			return "OP_GETATTR";
		else if (type == ORANGEFS_VFS_OP_REMOVE)
			return "OP_REMOVE";
		else if (type == ORANGEFS_VFS_OP_MKDIR)
			return "OP_MKDIR";
		else if (type == ORANGEFS_VFS_OP_READDIR)
			return "OP_READDIR";
		else if (type == ORANGEFS_VFS_OP_READDIRPLUS)
			return "OP_READDIRPLUS";
		else if (type == ORANGEFS_VFS_OP_SETATTR)
			return "OP_SETATTR";
		else if (type == ORANGEFS_VFS_OP_SYMLINK)
			return "OP_SYMLINK";
		else if (type == ORANGEFS_VFS_OP_RENAME)
			return "OP_RENAME";
		else if (type == ORANGEFS_VFS_OP_STATFS)
			return "OP_STATFS";
		else if (type == ORANGEFS_VFS_OP_TRUNCATE)
			return "OP_TRUNCATE";
		else if (type == ORANGEFS_VFS_OP_RA_FLUSH)
			return "OP_RA_FLUSH";
		else if (type == ORANGEFS_VFS_OP_FS_MOUNT)
			return "OP_FS_MOUNT";
		else if (type == ORANGEFS_VFS_OP_FS_UMOUNT)
			return "OP_FS_UMOUNT";
		else if (type == ORANGEFS_VFS_OP_GETXATTR)
			return "OP_GETXATTR";
		else if (type == ORANGEFS_VFS_OP_SETXATTR)
			return "OP_SETXATTR";
		else if (type == ORANGEFS_VFS_OP_LISTXATTR)
			return "OP_LISTXATTR";
		else if (type == ORANGEFS_VFS_OP_REMOVEXATTR)
			return "OP_REMOVEXATTR";
		else if (type == ORANGEFS_VFS_OP_PARAM)
			return "OP_PARAM";
		else if (type == ORANGEFS_VFS_OP_PERF_COUNT)
			return "OP_PERF_COUNT";
		else if (type == ORANGEFS_VFS_OP_CANCEL)
			return "OP_CANCEL";
		else if (type == ORANGEFS_VFS_OP_FSYNC)
			return "OP_FSYNC";
		else if (type == ORANGEFS_VFS_OP_FSKEY)
			return "OP_FSKEY";
		else if (type == ORANGEFS_VFS_OP_FEATURES)
			return "OP_FEATURES";
	}
	return "OP_UNKNOWN?";
}

void orangefs_new_tag(struct orangefs_kernel_op_s *op)
{
	spin_lock(&next_tag_value_lock);
	op->tag = next_tag_value++;
	if (next_tag_value == 0)
		next_tag_value = 100;
	spin_unlock(&next_tag_value_lock);
}

struct orangefs_kernel_op_s *op_alloc(__s32 type)
{
	struct orangefs_kernel_op_s *new_op = NULL;

	new_op = kmem_cache_zalloc(op_cache, GFP_KERNEL);
	if (new_op) {
		INIT_LIST_HEAD(&new_op->list);
		spin_lock_init(&new_op->lock);
		init_completion(&new_op->waitq);

		new_op->upcall.type = ORANGEFS_VFS_OP_INVALID;
		new_op->downcall.type = ORANGEFS_VFS_OP_INVALID;
		new_op->downcall.status = -1;

		new_op->op_state = OP_VFS_STATE_UNKNOWN;

		/* initialize the op specific tag and upcall credentials */
		orangefs_new_tag(new_op);
		new_op->upcall.type = type;
		new_op->attempts = 0;
		gossip_debug(GOSSIP_CACHE_DEBUG,
			     "Alloced OP (%p: %llu %s)\n",
			     new_op,
			     llu(new_op->tag),
			     get_opname_string(new_op));

		new_op->upcall.uid = from_kuid(&init_user_ns,
					       current_fsuid());

		new_op->upcall.gid = from_kgid(&init_user_ns,
					       current_fsgid());
	} else {
		gossip_err("op_alloc: kmem_cache_zalloc failed!\n");
	}
	return new_op;
}

void op_release(struct orangefs_kernel_op_s *orangefs_op)
{
	if (orangefs_op) {
		gossip_debug(GOSSIP_CACHE_DEBUG,
			     "Releasing OP (%p: %llu)\n",
			     orangefs_op,
			     llu(orangefs_op->tag));
		kmem_cache_free(op_cache, orangefs_op);
	} else {
		gossip_err("NULL pointer in op_release\n");
	}
}
back to top