Revision 6f6acb00514c10be35529402f36ad7a288f08c2e authored by Michal Hocko on 22 May 2014, 18:54:19 UTC, committed by Linus Torvalds on 23 May 2014, 16:37:29 UTC
Commit 284f39afeaa4 ("mm: memcg: push !mm handling out to page cache
charge function") explicitly checks for page cache charges without any
mm context (from kernel thread context[1]).

This seemed to be the only possible case where memory could be charged
without mm context so commit 03583f1a631c ("memcg: remove unnecessary
!mm check from try_get_mem_cgroup_from_mm()") removed the mm check from
get_mem_cgroup_from_mm().  This however caused another NULL ptr
dereference during early boot when loopback kernel thread splices to
tmpfs as reported by Stephan Kulow:

  BUG: unable to handle kernel NULL pointer dereference at 0000000000000360
  IP: get_mem_cgroup_from_mm.isra.42+0x2b/0x60
  Oops: 0000 [#1] SMP
  Modules linked in: btrfs dm_multipath dm_mod scsi_dh multipath raid10 raid456 async_raid6_recov async_memcpy async_pq raid6_pq async_xor xor async_tx raid1 raid0 md_mod parport_pc parport nls_utf8 isofs usb_storage iscsi_ibft iscsi_boot_sysfs arc4 ecb fan thermal nfs lockd fscache nls_iso8859_1 nls_cp437 sg st hid_generic usbhid af_packet sunrpc sr_mod cdrom ata_generic uhci_hcd virtio_net virtio_blk ehci_hcd usbcore ata_piix floppy processor button usb_common virtio_pci virtio_ring virtio edd squashfs loop ppa]
  CPU: 0 PID: 97 Comm: loop1 Not tainted 3.15.0-rc5-5-default #1
  Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
  Call Trace:
    __mem_cgroup_try_charge_swapin+0x40/0xe0
    mem_cgroup_charge_file+0x8b/0xd0
    shmem_getpage_gfp+0x66b/0x7b0
    shmem_file_splice_read+0x18f/0x430
    splice_direct_to_actor+0xa2/0x1c0
    do_lo_receive+0x5a/0x60 [loop]
    loop_thread+0x298/0x720 [loop]
    kthread+0xc6/0xe0
    ret_from_fork+0x7c/0xb0

Also Branimir Maksimovic reported the following oops which is tiggered
for the swapcache charge path from the accounting code for kernel threads:

  CPU: 1 PID: 160 Comm: kworker/u8:5 Tainted: P           OE 3.15.0-rc5-core2-custom #159
  Hardware name: System manufacturer System Product Name/MAXIMUSV GENE, BIOS 1903 08/19/2013
  task: ffff880404e349b0 ti: ffff88040486a000 task.ti: ffff88040486a000
  RIP: get_mem_cgroup_from_mm.isra.42+0x2b/0x60
  Call Trace:
    __mem_cgroup_try_charge_swapin+0x45/0xf0
    mem_cgroup_charge_file+0x9c/0xe0
    shmem_getpage_gfp+0x62c/0x770
    shmem_write_begin+0x38/0x40
    generic_perform_write+0xc5/0x1c0
    __generic_file_aio_write+0x1d1/0x3f0
    generic_file_aio_write+0x4f/0xc0
    do_sync_write+0x5a/0x90
    do_acct_process+0x4b1/0x550
    acct_process+0x6d/0xa0
    do_exit+0x827/0xa70
    kthread+0xc3/0xf0

This patch fixes the issue by reintroducing mm check into
get_mem_cgroup_from_mm.  We could do the same trick in
__mem_cgroup_try_charge_swapin as we do for the regular page cache path
but it is not worth troubles.  The check is not that expensive and it is
better to have get_mem_cgroup_from_mm more robust.

[1] - http://marc.info/?l=linux-mm&m=139463617808941&w=2

Fixes: 03583f1a631c ("memcg: remove unnecessary !mm check from try_get_mem_cgroup_from_mm()")
Reported-and-tested-by: Stephan Kulow <coolo@suse.com>
Reported-by: Branimir Maksimovic <branimir.maksimovic@gmail.com>
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 55231e5
Raw File
zlib.c
/*
 * Cryptographic API.
 *
 * Zlib algorithm
 *
 * Copyright 2008 Sony Corporation
 *
 * Based on deflate.c, which is
 * Copyright (c) 2003 James Morris <jmorris@intercode.com.au>
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the Free
 * Software Foundation; either version 2 of the License, or (at your option)
 * any later version.
 *
 * FIXME: deflate transforms will require up to a total of about 436k of kernel
 * memory on i386 (390k for compression, the rest for decompression), as the
 * current zlib kernel code uses a worst case pre-allocation system by default.
 * This needs to be fixed so that the amount of memory required is properly
 * related to the winbits and memlevel parameters.
 */

#define pr_fmt(fmt)	"%s: " fmt, __func__

#include <linux/init.h>
#include <linux/module.h>
#include <linux/zlib.h>
#include <linux/vmalloc.h>
#include <linux/interrupt.h>
#include <linux/mm.h>
#include <linux/net.h>

#include <crypto/internal/compress.h>

#include <net/netlink.h>


struct zlib_ctx {
	struct z_stream_s comp_stream;
	struct z_stream_s decomp_stream;
	int decomp_windowBits;
};


static void zlib_comp_exit(struct zlib_ctx *ctx)
{
	struct z_stream_s *stream = &ctx->comp_stream;

	if (stream->workspace) {
		zlib_deflateEnd(stream);
		vfree(stream->workspace);
		stream->workspace = NULL;
	}
}

static void zlib_decomp_exit(struct zlib_ctx *ctx)
{
	struct z_stream_s *stream = &ctx->decomp_stream;

	if (stream->workspace) {
		zlib_inflateEnd(stream);
		vfree(stream->workspace);
		stream->workspace = NULL;
	}
}

static int zlib_init(struct crypto_tfm *tfm)
{
	return 0;
}

static void zlib_exit(struct crypto_tfm *tfm)
{
	struct zlib_ctx *ctx = crypto_tfm_ctx(tfm);

	zlib_comp_exit(ctx);
	zlib_decomp_exit(ctx);
}


static int zlib_compress_setup(struct crypto_pcomp *tfm, void *params,
			       unsigned int len)
{
	struct zlib_ctx *ctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
	struct z_stream_s *stream = &ctx->comp_stream;
	struct nlattr *tb[ZLIB_COMP_MAX + 1];
	int window_bits, mem_level;
	size_t workspacesize;
	int ret;

	ret = nla_parse(tb, ZLIB_COMP_MAX, params, len, NULL);
	if (ret)
		return ret;

	zlib_comp_exit(ctx);

	window_bits = tb[ZLIB_COMP_WINDOWBITS]
					? nla_get_u32(tb[ZLIB_COMP_WINDOWBITS])
					: MAX_WBITS;
	mem_level = tb[ZLIB_COMP_MEMLEVEL]
					? nla_get_u32(tb[ZLIB_COMP_MEMLEVEL])
					: DEF_MEM_LEVEL;

	workspacesize = zlib_deflate_workspacesize(window_bits, mem_level);
	stream->workspace = vzalloc(workspacesize);
	if (!stream->workspace)
		return -ENOMEM;

	ret = zlib_deflateInit2(stream,
				tb[ZLIB_COMP_LEVEL]
					? nla_get_u32(tb[ZLIB_COMP_LEVEL])
					: Z_DEFAULT_COMPRESSION,
				tb[ZLIB_COMP_METHOD]
					? nla_get_u32(tb[ZLIB_COMP_METHOD])
					: Z_DEFLATED,
				window_bits,
				mem_level,
				tb[ZLIB_COMP_STRATEGY]
					? nla_get_u32(tb[ZLIB_COMP_STRATEGY])
					: Z_DEFAULT_STRATEGY);
	if (ret != Z_OK) {
		vfree(stream->workspace);
		stream->workspace = NULL;
		return -EINVAL;
	}

	return 0;
}

static int zlib_compress_init(struct crypto_pcomp *tfm)
{
	int ret;
	struct zlib_ctx *dctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
	struct z_stream_s *stream = &dctx->comp_stream;

	ret = zlib_deflateReset(stream);
	if (ret != Z_OK)
		return -EINVAL;

	return 0;
}

static int zlib_compress_update(struct crypto_pcomp *tfm,
				struct comp_request *req)
{
	int ret;
	struct zlib_ctx *dctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
	struct z_stream_s *stream = &dctx->comp_stream;

	pr_debug("avail_in %u, avail_out %u\n", req->avail_in, req->avail_out);
	stream->next_in = req->next_in;
	stream->avail_in = req->avail_in;
	stream->next_out = req->next_out;
	stream->avail_out = req->avail_out;

	ret = zlib_deflate(stream, Z_NO_FLUSH);
	switch (ret) {
	case Z_OK:
		break;

	case Z_BUF_ERROR:
		pr_debug("zlib_deflate could not make progress\n");
		return -EAGAIN;

	default:
		pr_debug("zlib_deflate failed %d\n", ret);
		return -EINVAL;
	}

	ret = req->avail_out - stream->avail_out;
	pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
		 stream->avail_in, stream->avail_out,
		 req->avail_in - stream->avail_in, ret);
	req->next_in = stream->next_in;
	req->avail_in = stream->avail_in;
	req->next_out = stream->next_out;
	req->avail_out = stream->avail_out;
	return ret;
}

static int zlib_compress_final(struct crypto_pcomp *tfm,
			       struct comp_request *req)
{
	int ret;
	struct zlib_ctx *dctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
	struct z_stream_s *stream = &dctx->comp_stream;

	pr_debug("avail_in %u, avail_out %u\n", req->avail_in, req->avail_out);
	stream->next_in = req->next_in;
	stream->avail_in = req->avail_in;
	stream->next_out = req->next_out;
	stream->avail_out = req->avail_out;

	ret = zlib_deflate(stream, Z_FINISH);
	if (ret != Z_STREAM_END) {
		pr_debug("zlib_deflate failed %d\n", ret);
		return -EINVAL;
	}

	ret = req->avail_out - stream->avail_out;
	pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
		 stream->avail_in, stream->avail_out,
		 req->avail_in - stream->avail_in, ret);
	req->next_in = stream->next_in;
	req->avail_in = stream->avail_in;
	req->next_out = stream->next_out;
	req->avail_out = stream->avail_out;
	return ret;
}


static int zlib_decompress_setup(struct crypto_pcomp *tfm, void *params,
				 unsigned int len)
{
	struct zlib_ctx *ctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
	struct z_stream_s *stream = &ctx->decomp_stream;
	struct nlattr *tb[ZLIB_DECOMP_MAX + 1];
	int ret = 0;

	ret = nla_parse(tb, ZLIB_DECOMP_MAX, params, len, NULL);
	if (ret)
		return ret;

	zlib_decomp_exit(ctx);

	ctx->decomp_windowBits = tb[ZLIB_DECOMP_WINDOWBITS]
				 ? nla_get_u32(tb[ZLIB_DECOMP_WINDOWBITS])
				 : DEF_WBITS;

	stream->workspace = vzalloc(zlib_inflate_workspacesize());
	if (!stream->workspace)
		return -ENOMEM;

	ret = zlib_inflateInit2(stream, ctx->decomp_windowBits);
	if (ret != Z_OK) {
		vfree(stream->workspace);
		stream->workspace = NULL;
		return -EINVAL;
	}

	return 0;
}

static int zlib_decompress_init(struct crypto_pcomp *tfm)
{
	int ret;
	struct zlib_ctx *dctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
	struct z_stream_s *stream = &dctx->decomp_stream;

	ret = zlib_inflateReset(stream);
	if (ret != Z_OK)
		return -EINVAL;

	return 0;
}

static int zlib_decompress_update(struct crypto_pcomp *tfm,
				  struct comp_request *req)
{
	int ret;
	struct zlib_ctx *dctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
	struct z_stream_s *stream = &dctx->decomp_stream;

	pr_debug("avail_in %u, avail_out %u\n", req->avail_in, req->avail_out);
	stream->next_in = req->next_in;
	stream->avail_in = req->avail_in;
	stream->next_out = req->next_out;
	stream->avail_out = req->avail_out;

	ret = zlib_inflate(stream, Z_SYNC_FLUSH);
	switch (ret) {
	case Z_OK:
	case Z_STREAM_END:
		break;

	case Z_BUF_ERROR:
		pr_debug("zlib_inflate could not make progress\n");
		return -EAGAIN;

	default:
		pr_debug("zlib_inflate failed %d\n", ret);
		return -EINVAL;
	}

	ret = req->avail_out - stream->avail_out;
	pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
		 stream->avail_in, stream->avail_out,
		 req->avail_in - stream->avail_in, ret);
	req->next_in = stream->next_in;
	req->avail_in = stream->avail_in;
	req->next_out = stream->next_out;
	req->avail_out = stream->avail_out;
	return ret;
}

static int zlib_decompress_final(struct crypto_pcomp *tfm,
				 struct comp_request *req)
{
	int ret;
	struct zlib_ctx *dctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
	struct z_stream_s *stream = &dctx->decomp_stream;

	pr_debug("avail_in %u, avail_out %u\n", req->avail_in, req->avail_out);
	stream->next_in = req->next_in;
	stream->avail_in = req->avail_in;
	stream->next_out = req->next_out;
	stream->avail_out = req->avail_out;

	if (dctx->decomp_windowBits < 0) {
		ret = zlib_inflate(stream, Z_SYNC_FLUSH);
		/*
		 * Work around a bug in zlib, which sometimes wants to taste an
		 * extra byte when being used in the (undocumented) raw deflate
		 * mode. (From USAGI).
		 */
		if (ret == Z_OK && !stream->avail_in && stream->avail_out) {
			const void *saved_next_in = stream->next_in;
			u8 zerostuff = 0;

			stream->next_in = &zerostuff;
			stream->avail_in = 1;
			ret = zlib_inflate(stream, Z_FINISH);
			stream->next_in = saved_next_in;
			stream->avail_in = 0;
		}
	} else
		ret = zlib_inflate(stream, Z_FINISH);
	if (ret != Z_STREAM_END) {
		pr_debug("zlib_inflate failed %d\n", ret);
		return -EINVAL;
	}

	ret = req->avail_out - stream->avail_out;
	pr_debug("avail_in %u, avail_out %u (consumed %u, produced %u)\n",
		 stream->avail_in, stream->avail_out,
		 req->avail_in - stream->avail_in, ret);
	req->next_in = stream->next_in;
	req->avail_in = stream->avail_in;
	req->next_out = stream->next_out;
	req->avail_out = stream->avail_out;
	return ret;
}


static struct pcomp_alg zlib_alg = {
	.compress_setup		= zlib_compress_setup,
	.compress_init		= zlib_compress_init,
	.compress_update	= zlib_compress_update,
	.compress_final		= zlib_compress_final,
	.decompress_setup	= zlib_decompress_setup,
	.decompress_init	= zlib_decompress_init,
	.decompress_update	= zlib_decompress_update,
	.decompress_final	= zlib_decompress_final,

	.base			= {
		.cra_name	= "zlib",
		.cra_flags	= CRYPTO_ALG_TYPE_PCOMPRESS,
		.cra_ctxsize	= sizeof(struct zlib_ctx),
		.cra_module	= THIS_MODULE,
		.cra_init	= zlib_init,
		.cra_exit	= zlib_exit,
	}
};

static int __init zlib_mod_init(void)
{
	return crypto_register_pcomp(&zlib_alg);
}

static void __exit zlib_mod_fini(void)
{
	crypto_unregister_pcomp(&zlib_alg);
}

module_init(zlib_mod_init);
module_exit(zlib_mod_fini);

MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Zlib Compression Algorithm");
MODULE_AUTHOR("Sony Corporation");
back to top