Revision c0cf4512a31eb3cec70b066bc36ed55f7d05b8c0 authored by Bart Van Assche on 23 June 2016, 07:35:48 UTC, committed by Doug Ledford on 23 June 2016, 16:04:09 UTC
The memory needed for the send and receive queues associated with
a QP is proportional to the max_sge parameter. The current value
of that parameter is such that with an mlx4 HCA the QP buffer size
is 8 MB. Since DMA is used for communication between HCA and CPU
that buffer either has to be allocated coherently or map_single()
must succeed for that buffer. Since large contiguous allocations
are fragile and since the maximum segment size for e.g. swiotlb
is 256 KB, reduce the max_sge parameter. This patch avoids that
the following text appears on the console after SRP logout and
relogin on a system equipped with multiple IB HCAs:

mlx4_core 0000:05:00.0: swiotlb buffer is full (sz: 8388608 bytes)
swiotlb: coherent allocation failed for device 0000:05:00.0 size=8388608
CPU: 11 PID: 148 Comm: kworker/11:1 Not tainted 4.7.0-rc4-dbg+ #1
Call Trace:
 [<ffffffff812c6d35>] dump_stack+0x67/0x92
 [<ffffffff812efe71>] swiotlb_alloc_coherent+0x141/0x150
 [<ffffffff810458be>] x86_swiotlb_alloc_coherent+0x3e/0x50
 [<ffffffffa03861fa>] mlx4_buf_direct_alloc.isra.5+0x9a/0x120 [mlx4_core]
 [<ffffffffa0386545>] mlx4_buf_alloc+0x165/0x1a0 [mlx4_core]
 [<ffffffffa035053d>] create_qp_common.isra.29+0x57d/0xff0 [mlx4_ib]
 [<ffffffffa03510da>] mlx4_ib_create_qp+0x12a/0x3f0 [mlx4_ib]
 [<ffffffffa031154a>] ib_create_qp+0x3a/0x250 [ib_core]
 [<ffffffffa055dd4b>] srpt_cm_handler+0x4bb/0xcad [ib_srpt]
 [<ffffffffa02c1ab0>] cm_process_work+0x20/0xf0 [ib_cm]
 [<ffffffffa02c3640>] cm_work_handler+0x1ac0/0x2059 [ib_cm]
 [<ffffffff810737ed>] process_one_work+0x19d/0x490
 [<ffffffff81073b29>] worker_thread+0x49/0x490
 [<ffffffff8107a0ea>] kthread+0xea/0x100
 [<ffffffff815b25af>] ret_from_fork+0x1f/0x40

Fixes: b99f8e4d7bcd ("IB/srpt: convert to the generic RDMA READ/WRITE API")
Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Laurence Oberman <loberman@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Doug Ledford <dledford@redhat.com>
1 parent 37e07cd
Raw File
ioremap.c
/*
 * Re-map IO memory to kernel address space so that we can access it.
 * This is needed for high PCI addresses that aren't mapped in the
 * 640k-1MB IO memory area on PC's
 *
 * (C) Copyright 1995 1996 Linus Torvalds
 */
#include <linux/vmalloc.h>
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/io.h>
#include <linux/export.h>
#include <asm/cacheflush.h>
#include <asm/pgtable.h>

#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
static int __read_mostly ioremap_pud_capable;
static int __read_mostly ioremap_pmd_capable;
static int __read_mostly ioremap_huge_disabled;

static int __init set_nohugeiomap(char *str)
{
	ioremap_huge_disabled = 1;
	return 0;
}
early_param("nohugeiomap", set_nohugeiomap);

void __init ioremap_huge_init(void)
{
	if (!ioremap_huge_disabled) {
		if (arch_ioremap_pud_supported())
			ioremap_pud_capable = 1;
		if (arch_ioremap_pmd_supported())
			ioremap_pmd_capable = 1;
	}
}

static inline int ioremap_pud_enabled(void)
{
	return ioremap_pud_capable;
}

static inline int ioremap_pmd_enabled(void)
{
	return ioremap_pmd_capable;
}

#else	/* !CONFIG_HAVE_ARCH_HUGE_VMAP */
static inline int ioremap_pud_enabled(void) { return 0; }
static inline int ioremap_pmd_enabled(void) { return 0; }
#endif	/* CONFIG_HAVE_ARCH_HUGE_VMAP */

static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
{
	pte_t *pte;
	u64 pfn;

	pfn = phys_addr >> PAGE_SHIFT;
	pte = pte_alloc_kernel(pmd, addr);
	if (!pte)
		return -ENOMEM;
	do {
		BUG_ON(!pte_none(*pte));
		set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
		pfn++;
	} while (pte++, addr += PAGE_SIZE, addr != end);
	return 0;
}

static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
{
	pmd_t *pmd;
	unsigned long next;

	phys_addr -= addr;
	pmd = pmd_alloc(&init_mm, pud, addr);
	if (!pmd)
		return -ENOMEM;
	do {
		next = pmd_addr_end(addr, end);

		if (ioremap_pmd_enabled() &&
		    ((next - addr) == PMD_SIZE) &&
		    IS_ALIGNED(phys_addr + addr, PMD_SIZE)) {
			if (pmd_set_huge(pmd, phys_addr + addr, prot))
				continue;
		}

		if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot))
			return -ENOMEM;
	} while (pmd++, addr = next, addr != end);
	return 0;
}

static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr,
		unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
{
	pud_t *pud;
	unsigned long next;

	phys_addr -= addr;
	pud = pud_alloc(&init_mm, pgd, addr);
	if (!pud)
		return -ENOMEM;
	do {
		next = pud_addr_end(addr, end);

		if (ioremap_pud_enabled() &&
		    ((next - addr) == PUD_SIZE) &&
		    IS_ALIGNED(phys_addr + addr, PUD_SIZE)) {
			if (pud_set_huge(pud, phys_addr + addr, prot))
				continue;
		}

		if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot))
			return -ENOMEM;
	} while (pud++, addr = next, addr != end);
	return 0;
}

int ioremap_page_range(unsigned long addr,
		       unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
{
	pgd_t *pgd;
	unsigned long start;
	unsigned long next;
	int err;

	BUG_ON(addr >= end);

	start = addr;
	phys_addr -= addr;
	pgd = pgd_offset_k(addr);
	do {
		next = pgd_addr_end(addr, end);
		err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, prot);
		if (err)
			break;
	} while (pgd++, addr = next, addr != end);

	flush_cache_vmap(start, end);

	return err;
}
EXPORT_SYMBOL_GPL(ioremap_page_range);
back to top