Raw File
isplit.py
#!/usr/bin/env python
# *-* encoding: utf8
# 
# Copyright (c) 2006 Stian Soiland
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# Author: Stian Soiland <stian@soiland.no>
# URL: http://soiland.no/i/src/
# License: MIT
#

"""Split iterable into sub-iterators.

Use instead of itertools.izip(*iterable) when you want to have an
iterator for each index, for instance to save memory. 

Note that you should iterate over all the return sub-iterators to avoid
a large buffer building up for the unfetched values.
"""


from itertools import tee, chain

def isplit(iterable, n=None):
    """Split iterable into indexing sub-iterators.

    Opposite of itertools.izip() - split an iterable to n sub-iterators,
    where n is the length of the first item of the iterable.
    The items should be indexable, and each sub-iterator will yield
    items from a particular index. 

    If any of the items from iterable don't have the required index, it
    will be skipped. 

    If parameter n is supplied, this many index-subiterators will be
    returned, and the iterable's first element will not be inspected.

    Examples:

        >>> coords = [(1,2), (3,4), (5,6), (7,8)]
        >>> it_x, it_y = isplit(coords)
        >>> print list(it_x)
        [1, 3, 5, 7]
        >>> for y in it_y: 
        ...     print y,
        2 4 6 8
        >>> import itertools
        >>> ranges = itertools.izip(range(10), range(5,15), range(10,20))
        >>> ranges.next()
        (0, 5, 10)
        >>> x,y,z = isplit(ranges)
        >>> x.next()
        1
        >>> y.next() 
        6
        >>> for elem in z:
        ...    print elem,
        11 12 13 14 15 16 17 18 19
        >>> y.next()
        7
        >>> mess = [(0,1), (2,3), (4,5,6), (7,), (), (8,9,10,11)]
        >>> mess = iter(mess)
        >>> x,y,z = isplit(mess, n=3)
        >>> # Not affected yet, since we have specified n=3
        >>> mess.next() 
        (0, 1)
        >>> x.next()  # The 0 was eaten by mess.next()
        2
        >>> x.next()
        4
        >>> list(y)
        [3, 5, 9]
        >>> list(z)  # Missing elements are skipped
        [6, 10]
    """
    iterable = iter(iterable)
    if n is None:
        # Find length by inspecting first item
        first = iterable.next()
        n = len(first)
        # Re-insert the item into the iterable
        iterable = chain([first], iterable)

    # Generate len(first) independant (tee-ed) iterators
    sub_iters = tee(iterable, n)
    for i,sub_iter in enumerate(sub_iters):
        # Closure for keeping i and sub_iter 
        def closure(i=i, sub_iter=sub_iter):
            for elem in sub_iter:
                try:
                    yield elem[i]
                except IndexError:
                    continue    
        # Yield our generator    
        yield closure()    
            

def _test():
    import doctest
    doctest.testmod()

if __name__ == "__main__":
    _test()

back to top