Revision ece2a7fa6f0294e48411251e0fa5294a521a4521 authored by Roberto Di Cosmo on 19 November 2011, 07:18:36 UTC, committed by Roberto Di Cosmo on 19 November 2011, 07:18:36 UTC
1 parent 355de51
parmap.mli
(**************************************************************************)
(* ParMap: a simple library to perform Map computations on a multi-core *)
(* *)
(* Author(s): Marco Danelutto, Roberto Di Cosmo *)
(* *)
(* This library is free software: you can redistribute it and/or modify *)
(* it under the terms of the GNU Lesser General Public License as *)
(* published by the Free Software Foundation, either version 2 of the *)
(* License, or (at your option) any later version. A special linking *)
(* exception to the GNU Lesser General Public License applies to this *)
(* library, see the LICENSE file for more information. *)
(**************************************************************************)
(** Module [Parmap]: parallel map on multicores. *)
(** {6 Sequence type, subsuming lists and arrays} *)
type 'a sequence = L of 'a list | A of 'a array;;
(** {6 Parallel mapfold} *)
val parmapfold : ?ncores:int -> ?chunksize:int -> ('a -> 'b) -> 'a sequence -> ('b-> 'c -> 'c) -> 'c -> ('c->'c->'c) -> 'c
(** [parmapfold ~ncores:n f (L l) op b concat ] computes [List.fold_right op (List.map f l) b]
by forking [n] processes on a multicore machine.
You need to provide the extra [concat] operator to combine the partial results of the
fold computed on each core. If 'b = 'c, then [concat] may be simply [op].
The order of computation in parallel changes w.r.t. sequential execution, so this
function is only correct if [op] and [concat] are associative and commutative.
[parmapfold ~ncores:n f (A a) op b concat ] computes [Array.fold_right op (Array.map f a) b]
*)
(** {6 Parallel fold} *)
val parfold: ?ncores:int -> ?chunksize:int -> ('a -> 'b -> 'b) -> 'a sequence -> 'b -> ('b->'b->'b) -> 'b
(** [parfold ~ncores:n op (L l) b concat] computes [List.fold_right op l b]
by forking [n] processes on a multicore machine.
You need to provide the extra [concat] operator to combine the partial results of the
fold computed on each core. If 'b = 'c, then [concat] may be simply [op].
The order of computation in parallel changes w.r.t. sequential execution, so this
function is only correct if [op] and [concat] are associative and commutative.
[parfold ~ncores:n op (A a) b concat] similarly computes [Array.fold_right op a b].
*)
(** {6 Parallel map} *)
val parmap : ?ncores:int -> ?chunksize:int -> ('a -> 'b) -> 'a sequence -> 'b list
(** [parmap ~ncores:n f (L l) ] computes [List.map f l]
by forking [n] processes on a multicore machine.
[parmap ~ncores:n f (A a) ] computes [Array.map f a]
by forking [n] processes on a multicore machine.
If the optional [chunksize] parameter is specified,
the processes compute the result in an on-demand fashion
on blochs of size [chunksize]; this provides automatic
load balancing for unbalanced computations, but the order
of the result is no longer guaranteed to be preserved. *)
(** {6 Parallel map on arrays} *)
val array_parmap : ?ncores:int -> ?chunksize:int -> ('a -> 'b) -> 'a array -> 'b array
(** [array_parmap ~ncores:n f a ] computes [Array.map f a]
by forking [n] processes on a multicore machine.
If the optional [chunksize] parameter is specified,
the processes compute the result in an on-demand fashion
on blochs of size [chunksize]; this provides automatic
load balancing for unbalanced computations, but the order
of the result is no longer guaranteed to be preserved. *)
(** {6 Parallel map on float arrays } *)
exception WrongArraySize
val array_float_parmap : ?ncores:int -> ?chunksize:int -> ?result: float array -> ('a -> float) -> 'a array -> float array
(** [array_float_parmap ~ncores:n f a ] computes [Array.map f a]
by forking [n] processes on a multicore machine, and
preallocating the resulting array as shared memory,
which allows significantly more efficient computation
than calling the generic array_parmap function.
In case you already have at hand an array where to store
the result, you can squeeze out some more memory cycles
by passing it as optional parameter [result]: this will
avoid the creation of a result array, which can be costly
for very large data sets. Raises WrongArraySize if [result]
is too small to small to hold the data.
If the optional [chunksize] parameter is specified,
the processes compute the result in an on-demand fashion
on blochs of size [chunksize]; this provides automatic
load balancing for unbalanced computations, *and* the order
of the result is still guaranteed to be preserved. *)
Computing file changes ...