Revision 1cbe06c3cf542d48eb22180163e00f91760ef8cd authored by Linus Torvalds on 28 May 2016, 18:04:16 UTC, committed by Linus Torvalds on 28 May 2016, 18:04:16 UTC
Pull more rdma updates from Doug Ledford:
 "This is the second group of code for the 4.7 merge window.  It looks
  large, but only in one sense.  I'll get to that in a minute.  The list
  of changes here breaks down as follows:

   - Dynamic counter infrastructure in the IB drivers

     This is a sysfs based code to allow free form access to the
     hardware counters RDMA devices might support so drivers don't need
     to code this up repeatedly themselves

   - SendOnlyFullMember multicast support

   - IB router support

   - A couple misc fixes

   - The big item on the list: hfi1 driver updates, plus moving the hfi1
     driver out of staging

  There was a group of 15 patches in the hfi1 list that I thought I had
  in the first pull request but they weren't.  So that added to the
  length of the hfi1 section here.

  As far as these go, everything but the hfi1 is pretty straight
  forward.

  The hfi1 is, if you recall, the driver that Al had complaints about
  how it used the write/writev interfaces in an overloaded fashion.  The
  write portion of their interface behaved like the write handler in the
  IB stack proper and did bi-directional communications.  The writev
  interface, on the other hand, only accepts SDMA request structures.
  The completions for those structures are sent back via an entirely
  different event mechanism.

  With the security patch, we put security checks on the write
  interface, however, we also knew they would be going away soon.  Now,
  we've converted the write handler in the hfi1 driver to use ioctls
  from the IB reserved magic area for its bidirectional communications.
  With that change, Intel has addressed all of the items originally on
  their TODO when they went into staging (as well as many items added to
  the list later).

  As such, I moved them out, and since they were the last item in the
  staging/rdma directory, and I don't have immediate plans to use the
  staging area again, I removed the staging/rdma area.

  Because of the move out of staging, as well as a series of 5 patches
  in the hfi1 driver that removed code people thought should be done in
  a different way and was optional to begin with (a snoop debug
  interface, an eeprom driver for an eeprom connected directory to their
  hfi1 chip and not via an i2c bus, and a few other things like that),
  the line count, especially the removal count, is high"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (56 commits)
  staging/rdma: Remove the entire rdma subdirectory of staging
  IB/core: Make device counter infrastructure dynamic
  IB/hfi1: Fix pio map initialization
  IB/hfi1: Correct 8051 link parameter settings
  IB/hfi1: Update pkey table properly after link down or FM start
  IB/rdamvt: Fix rdmavt s_ack_queue sizing
  IB/rdmavt: Max atomic value should be a u8
  IB/hfi1: Fix hard lockup due to not using save/restore spin lock
  IB/hfi1: Add tracing support for send with invalidate opcode
  IB/hfi1, qib: Add ieth to the packet header definitions
  IB/hfi1: Move driver out of staging
  IB/hfi1: Do not free hfi1 cdev parent structure early
  IB/hfi1: Add trace message in user IOCTL handling
  IB/hfi1: Remove write(), use ioctl() for user cmds
  IB/hfi1: Add ioctl() interface for user commands
  IB/hfi1: Remove unused user command
  IB/hfi1: Remove snoop/diag interface
  IB/hfi1: Remove EPROM functionality from data device
  IB/hfi1: Remove UI char device
  IB/hfi1: Remove multiple device cdev
  ...
2 parent s ed2608f + 7a226f9
Raw File
flex_array.c
/*
 * Flexible array managed in PAGE_SIZE parts
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 * Copyright IBM Corporation, 2009
 *
 * Author: Dave Hansen <dave@linux.vnet.ibm.com>
 */

#include <linux/flex_array.h>
#include <linux/slab.h>
#include <linux/stddef.h>
#include <linux/export.h>
#include <linux/reciprocal_div.h>

struct flex_array_part {
	char elements[FLEX_ARRAY_PART_SIZE];
};

/*
 * If a user requests an allocation which is small
 * enough, we may simply use the space in the
 * flex_array->parts[] array to store the user
 * data.
 */
static inline int elements_fit_in_base(struct flex_array *fa)
{
	int data_size = fa->element_size * fa->total_nr_elements;
	if (data_size <= FLEX_ARRAY_BASE_BYTES_LEFT)
		return 1;
	return 0;
}

/**
 * flex_array_alloc - allocate a new flexible array
 * @element_size:	the size of individual elements in the array
 * @total:		total number of elements that this should hold
 * @flags:		page allocation flags to use for base array
 *
 * Note: all locking must be provided by the caller.
 *
 * @total is used to size internal structures.  If the user ever
 * accesses any array indexes >=@total, it will produce errors.
 *
 * The maximum number of elements is defined as: the number of
 * elements that can be stored in a page times the number of
 * page pointers that we can fit in the base structure or (using
 * integer math):
 *
 * 	(PAGE_SIZE/element_size) * (PAGE_SIZE-8)/sizeof(void *)
 *
 * Here's a table showing example capacities.  Note that the maximum
 * index that the get/put() functions is just nr_objects-1.   This
 * basically means that you get 4MB of storage on 32-bit and 2MB on
 * 64-bit.
 *
 *
 * Element size | Objects | Objects |
 * PAGE_SIZE=4k |  32-bit |  64-bit |
 * ---------------------------------|
 *      1 bytes | 4177920 | 2088960 |
 *      2 bytes | 2088960 | 1044480 |
 *      3 bytes | 1392300 |  696150 |
 *      4 bytes | 1044480 |  522240 |
 *     32 bytes |  130560 |   65408 |
 *     33 bytes |  126480 |   63240 |
 *   2048 bytes |    2040 |    1020 |
 *   2049 bytes |    1020 |     510 |
 *       void * | 1044480 |  261120 |
 *
 * Since 64-bit pointers are twice the size, we lose half the
 * capacity in the base structure.  Also note that no effort is made
 * to efficiently pack objects across page boundaries.
 */
struct flex_array *flex_array_alloc(int element_size, unsigned int total,
					gfp_t flags)
{
	struct flex_array *ret;
	int elems_per_part = 0;
	int max_size = 0;
	struct reciprocal_value reciprocal_elems = { 0 };

	if (element_size) {
		elems_per_part = FLEX_ARRAY_ELEMENTS_PER_PART(element_size);
		reciprocal_elems = reciprocal_value(elems_per_part);
		max_size = FLEX_ARRAY_NR_BASE_PTRS * elems_per_part;
	}

	/* max_size will end up 0 if element_size > PAGE_SIZE */
	if (total > max_size)
		return NULL;
	ret = kzalloc(sizeof(struct flex_array), flags);
	if (!ret)
		return NULL;
	ret->element_size = element_size;
	ret->total_nr_elements = total;
	ret->elems_per_part = elems_per_part;
	ret->reciprocal_elems = reciprocal_elems;
	if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO))
		memset(&ret->parts[0], FLEX_ARRAY_FREE,
						FLEX_ARRAY_BASE_BYTES_LEFT);
	return ret;
}
EXPORT_SYMBOL(flex_array_alloc);

static int fa_element_to_part_nr(struct flex_array *fa,
					unsigned int element_nr)
{
	/*
	 * if element_size == 0 we don't get here, so we never touch
	 * the zeroed fa->reciprocal_elems, which would yield invalid
	 * results
	 */
	return reciprocal_divide(element_nr, fa->reciprocal_elems);
}

/**
 * flex_array_free_parts - just free the second-level pages
 * @fa:		the flex array from which to free parts
 *
 * This is to be used in cases where the base 'struct flex_array'
 * has been statically allocated and should not be free.
 */
void flex_array_free_parts(struct flex_array *fa)
{
	int part_nr;

	if (elements_fit_in_base(fa))
		return;
	for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++)
		kfree(fa->parts[part_nr]);
}
EXPORT_SYMBOL(flex_array_free_parts);

void flex_array_free(struct flex_array *fa)
{
	flex_array_free_parts(fa);
	kfree(fa);
}
EXPORT_SYMBOL(flex_array_free);

static unsigned int index_inside_part(struct flex_array *fa,
					unsigned int element_nr,
					unsigned int part_nr)
{
	unsigned int part_offset;

	part_offset = element_nr - part_nr * fa->elems_per_part;
	return part_offset * fa->element_size;
}

static struct flex_array_part *
__fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
{
	struct flex_array_part *part = fa->parts[part_nr];
	if (!part) {
		part = kmalloc(sizeof(struct flex_array_part), flags);
		if (!part)
			return NULL;
		if (!(flags & __GFP_ZERO))
			memset(part, FLEX_ARRAY_FREE,
				sizeof(struct flex_array_part));
		fa->parts[part_nr] = part;
	}
	return part;
}

/**
 * flex_array_put - copy data into the array at @element_nr
 * @fa:		the flex array to copy data into
 * @element_nr:	index of the position in which to insert
 * 		the new element.
 * @src:	address of data to copy into the array
 * @flags:	page allocation flags to use for array expansion
 *
 *
 * Note that this *copies* the contents of @src into
 * the array.  If you are trying to store an array of
 * pointers, make sure to pass in &ptr instead of ptr.
 * You may instead wish to use the flex_array_put_ptr()
 * helper function.
 *
 * Locking must be provided by the caller.
 */
int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
			gfp_t flags)
{
	int part_nr = 0;
	struct flex_array_part *part;
	void *dst;

	if (element_nr >= fa->total_nr_elements)
		return -ENOSPC;
	if (!fa->element_size)
		return 0;
	if (elements_fit_in_base(fa))
		part = (struct flex_array_part *)&fa->parts[0];
	else {
		part_nr = fa_element_to_part_nr(fa, element_nr);
		part = __fa_get_part(fa, part_nr, flags);
		if (!part)
			return -ENOMEM;
	}
	dst = &part->elements[index_inside_part(fa, element_nr, part_nr)];
	memcpy(dst, src, fa->element_size);
	return 0;
}
EXPORT_SYMBOL(flex_array_put);

/**
 * flex_array_clear - clear element in array at @element_nr
 * @fa:		the flex array of the element.
 * @element_nr:	index of the position to clear.
 *
 * Locking must be provided by the caller.
 */
int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
{
	int part_nr = 0;
	struct flex_array_part *part;
	void *dst;

	if (element_nr >= fa->total_nr_elements)
		return -ENOSPC;
	if (!fa->element_size)
		return 0;
	if (elements_fit_in_base(fa))
		part = (struct flex_array_part *)&fa->parts[0];
	else {
		part_nr = fa_element_to_part_nr(fa, element_nr);
		part = fa->parts[part_nr];
		if (!part)
			return -EINVAL;
	}
	dst = &part->elements[index_inside_part(fa, element_nr, part_nr)];
	memset(dst, FLEX_ARRAY_FREE, fa->element_size);
	return 0;
}
EXPORT_SYMBOL(flex_array_clear);

/**
 * flex_array_prealloc - guarantee that array space exists
 * @fa:			the flex array for which to preallocate parts
 * @start:		index of first array element for which space is allocated
 * @nr_elements:	number of elements for which space is allocated
 * @flags:		page allocation flags
 *
 * This will guarantee that no future calls to flex_array_put()
 * will allocate memory.  It can be used if you are expecting to
 * be holding a lock or in some atomic context while writing
 * data into the array.
 *
 * Locking must be provided by the caller.
 */
int flex_array_prealloc(struct flex_array *fa, unsigned int start,
			unsigned int nr_elements, gfp_t flags)
{
	int start_part;
	int end_part;
	int part_nr;
	unsigned int end;
	struct flex_array_part *part;

	if (!start && !nr_elements)
		return 0;
	if (start >= fa->total_nr_elements)
		return -ENOSPC;
	if (!nr_elements)
		return 0;

	end = start + nr_elements - 1;

	if (end >= fa->total_nr_elements)
		return -ENOSPC;
	if (!fa->element_size)
		return 0;
	if (elements_fit_in_base(fa))
		return 0;
	start_part = fa_element_to_part_nr(fa, start);
	end_part = fa_element_to_part_nr(fa, end);
	for (part_nr = start_part; part_nr <= end_part; part_nr++) {
		part = __fa_get_part(fa, part_nr, flags);
		if (!part)
			return -ENOMEM;
	}
	return 0;
}
EXPORT_SYMBOL(flex_array_prealloc);

/**
 * flex_array_get - pull data back out of the array
 * @fa:		the flex array from which to extract data
 * @element_nr:	index of the element to fetch from the array
 *
 * Returns a pointer to the data at index @element_nr.  Note
 * that this is a copy of the data that was passed in.  If you
 * are using this to store pointers, you'll get back &ptr.  You
 * may instead wish to use the flex_array_get_ptr helper.
 *
 * Locking must be provided by the caller.
 */
void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
{
	int part_nr = 0;
	struct flex_array_part *part;

	if (!fa->element_size)
		return NULL;
	if (element_nr >= fa->total_nr_elements)
		return NULL;
	if (elements_fit_in_base(fa))
		part = (struct flex_array_part *)&fa->parts[0];
	else {
		part_nr = fa_element_to_part_nr(fa, element_nr);
		part = fa->parts[part_nr];
		if (!part)
			return NULL;
	}
	return &part->elements[index_inside_part(fa, element_nr, part_nr)];
}
EXPORT_SYMBOL(flex_array_get);

/**
 * flex_array_get_ptr - pull a ptr back out of the array
 * @fa:		the flex array from which to extract data
 * @element_nr:	index of the element to fetch from the array
 *
 * Returns the pointer placed in the flex array at element_nr using
 * flex_array_put_ptr().  This function should not be called if the
 * element in question was not set using the _put_ptr() helper.
 */
void *flex_array_get_ptr(struct flex_array *fa, unsigned int element_nr)
{
	void **tmp;

	tmp = flex_array_get(fa, element_nr);
	if (!tmp)
		return NULL;

	return *tmp;
}
EXPORT_SYMBOL(flex_array_get_ptr);

static int part_is_free(struct flex_array_part *part)
{
	int i;

	for (i = 0; i < sizeof(struct flex_array_part); i++)
		if (part->elements[i] != FLEX_ARRAY_FREE)
			return 0;
	return 1;
}

/**
 * flex_array_shrink - free unused second-level pages
 * @fa:		the flex array to shrink
 *
 * Frees all second-level pages that consist solely of unused
 * elements.  Returns the number of pages freed.
 *
 * Locking must be provided by the caller.
 */
int flex_array_shrink(struct flex_array *fa)
{
	struct flex_array_part *part;
	int part_nr;
	int ret = 0;

	if (!fa->total_nr_elements || !fa->element_size)
		return 0;
	if (elements_fit_in_base(fa))
		return ret;
	for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) {
		part = fa->parts[part_nr];
		if (!part)
			continue;
		if (part_is_free(part)) {
			fa->parts[part_nr] = NULL;
			kfree(part);
			ret++;
		}
	}
	return ret;
}
EXPORT_SYMBOL(flex_array_shrink);
back to top