Revision c0cf4512a31eb3cec70b066bc36ed55f7d05b8c0 authored by Bart Van Assche on 23 June 2016, 07:35:48 UTC, committed by Doug Ledford on 23 June 2016, 16:04:09 UTC
The memory needed for the send and receive queues associated with
a QP is proportional to the max_sge parameter. The current value
of that parameter is such that with an mlx4 HCA the QP buffer size
is 8 MB. Since DMA is used for communication between HCA and CPU
that buffer either has to be allocated coherently or map_single()
must succeed for that buffer. Since large contiguous allocations
are fragile and since the maximum segment size for e.g. swiotlb
is 256 KB, reduce the max_sge parameter. This patch avoids that
the following text appears on the console after SRP logout and
relogin on a system equipped with multiple IB HCAs:

mlx4_core 0000:05:00.0: swiotlb buffer is full (sz: 8388608 bytes)
swiotlb: coherent allocation failed for device 0000:05:00.0 size=8388608
CPU: 11 PID: 148 Comm: kworker/11:1 Not tainted 4.7.0-rc4-dbg+ #1
Call Trace:
 [<ffffffff812c6d35>] dump_stack+0x67/0x92
 [<ffffffff812efe71>] swiotlb_alloc_coherent+0x141/0x150
 [<ffffffff810458be>] x86_swiotlb_alloc_coherent+0x3e/0x50
 [<ffffffffa03861fa>] mlx4_buf_direct_alloc.isra.5+0x9a/0x120 [mlx4_core]
 [<ffffffffa0386545>] mlx4_buf_alloc+0x165/0x1a0 [mlx4_core]
 [<ffffffffa035053d>] create_qp_common.isra.29+0x57d/0xff0 [mlx4_ib]
 [<ffffffffa03510da>] mlx4_ib_create_qp+0x12a/0x3f0 [mlx4_ib]
 [<ffffffffa031154a>] ib_create_qp+0x3a/0x250 [ib_core]
 [<ffffffffa055dd4b>] srpt_cm_handler+0x4bb/0xcad [ib_srpt]
 [<ffffffffa02c1ab0>] cm_process_work+0x20/0xf0 [ib_cm]
 [<ffffffffa02c3640>] cm_work_handler+0x1ac0/0x2059 [ib_cm]
 [<ffffffff810737ed>] process_one_work+0x19d/0x490
 [<ffffffff81073b29>] worker_thread+0x49/0x490
 [<ffffffff8107a0ea>] kthread+0xea/0x100
 [<ffffffff815b25af>] ret_from_fork+0x1f/0x40

Fixes: b99f8e4d7bcd ("IB/srpt: convert to the generic RDMA READ/WRITE API")
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Laurence Oberman <loberman@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Doug Ledford <dledford@redhat.com>
1 parent 37e07cd
Raw File
sg_pool.c
#include <linux/module.h>
#include <linux/scatterlist.h>
#include <linux/mempool.h>
#include <linux/slab.h>

#define SG_MEMPOOL_NR		ARRAY_SIZE(sg_pools)
#define SG_MEMPOOL_SIZE		2

struct sg_pool {
	size_t		size;
	char		*name;
	struct kmem_cache	*slab;
	mempool_t	*pool;
};

#define SP(x) { .size = x, "sgpool-" __stringify(x) }
#if (SG_CHUNK_SIZE < 32)
#error SG_CHUNK_SIZE is too small (must be 32 or greater)
#endif
static struct sg_pool sg_pools[] = {
	SP(8),
	SP(16),
#if (SG_CHUNK_SIZE > 32)
	SP(32),
#if (SG_CHUNK_SIZE > 64)
	SP(64),
#if (SG_CHUNK_SIZE > 128)
	SP(128),
#if (SG_CHUNK_SIZE > 256)
#error SG_CHUNK_SIZE is too large (256 MAX)
#endif
#endif
#endif
#endif
	SP(SG_CHUNK_SIZE)
};
#undef SP

static inline unsigned int sg_pool_index(unsigned short nents)
{
	unsigned int index;

	BUG_ON(nents > SG_CHUNK_SIZE);

	if (nents <= 8)
		index = 0;
	else
		index = get_count_order(nents) - 3;

	return index;
}

static void sg_pool_free(struct scatterlist *sgl, unsigned int nents)
{
	struct sg_pool *sgp;

	sgp = sg_pools + sg_pool_index(nents);
	mempool_free(sgl, sgp->pool);
}

static struct scatterlist *sg_pool_alloc(unsigned int nents, gfp_t gfp_mask)
{
	struct sg_pool *sgp;

	sgp = sg_pools + sg_pool_index(nents);
	return mempool_alloc(sgp->pool, gfp_mask);
}

/**
 * sg_free_table_chained - Free a previously mapped sg table
 * @table:	The sg table header to use
 * @first_chunk: was first_chunk not NULL in sg_alloc_table_chained?
 *
 *  Description:
 *    Free an sg table previously allocated and setup with
 *    sg_alloc_table_chained().
 *
 **/
void sg_free_table_chained(struct sg_table *table, bool first_chunk)
{
	if (first_chunk && table->orig_nents <= SG_CHUNK_SIZE)
		return;
	__sg_free_table(table, SG_CHUNK_SIZE, first_chunk, sg_pool_free);
}
EXPORT_SYMBOL_GPL(sg_free_table_chained);

/**
 * sg_alloc_table_chained - Allocate and chain SGLs in an sg table
 * @table:	The sg table header to use
 * @nents:	Number of entries in sg list
 * @first_chunk: first SGL
 *
 *  Description:
 *    Allocate and chain SGLs in an sg table. If @nents@ is larger than
 *    SG_CHUNK_SIZE a chained sg table will be setup.
 *
 **/
int sg_alloc_table_chained(struct sg_table *table, int nents,
		struct scatterlist *first_chunk)
{
	int ret;

	BUG_ON(!nents);

	if (first_chunk) {
		if (nents <= SG_CHUNK_SIZE) {
			table->nents = table->orig_nents = nents;
			sg_init_table(table->sgl, nents);
			return 0;
		}
	}

	ret = __sg_alloc_table(table, nents, SG_CHUNK_SIZE,
			       first_chunk, GFP_ATOMIC, sg_pool_alloc);
	if (unlikely(ret))
		sg_free_table_chained(table, (bool)first_chunk);
	return ret;
}
EXPORT_SYMBOL_GPL(sg_alloc_table_chained);

static __init int sg_pool_init(void)
{
	int i;

	for (i = 0; i < SG_MEMPOOL_NR; i++) {
		struct sg_pool *sgp = sg_pools + i;
		int size = sgp->size * sizeof(struct scatterlist);

		sgp->slab = kmem_cache_create(sgp->name, size, 0,
				SLAB_HWCACHE_ALIGN, NULL);
		if (!sgp->slab) {
			printk(KERN_ERR "SG_POOL: can't init sg slab %s\n",
					sgp->name);
			goto cleanup_sdb;
		}

		sgp->pool = mempool_create_slab_pool(SG_MEMPOOL_SIZE,
						     sgp->slab);
		if (!sgp->pool) {
			printk(KERN_ERR "SG_POOL: can't init sg mempool %s\n",
					sgp->name);
			goto cleanup_sdb;
		}
	}

	return 0;

cleanup_sdb:
	for (i = 0; i < SG_MEMPOOL_NR; i++) {
		struct sg_pool *sgp = sg_pools + i;
		if (sgp->pool)
			mempool_destroy(sgp->pool);
		if (sgp->slab)
			kmem_cache_destroy(sgp->slab);
	}

	return -ENOMEM;
}

static __exit void sg_pool_exit(void)
{
	int i;

	for (i = 0; i < SG_MEMPOOL_NR; i++) {
		struct sg_pool *sgp = sg_pools + i;
		mempool_destroy(sgp->pool);
		kmem_cache_destroy(sgp->slab);
	}
}

module_init(sg_pool_init);
module_exit(sg_pool_exit);
back to top