Source code for xoutil.iterators

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# ---------------------------------------------------------------------
# xoutil.iterators
# ---------------------------------------------------------------------
# Copyright (c) 2015-2017 Merchise and Contributors
# Copyright (c) 2013, 2014 Merchise Autrement and Contributors
# Copyright (c) 2011, 2012 Medardo Rodríguez
# All rights reserved.
#
# Author: Manuel Vázquez Acosta <mva.led@gmail.com>
# Contributors: see CONTRIBUTORS and HISTORY file
#
# This is free software; you can redistribute it and/or modify it under the
# terms of the LICENCE attached (see LICENCE file) in the distribution
# package.
#
# Created on 2011-11-08

'''Several util functions for iterators'''

from __future__ import (division as _py3_division,
                        print_function as _py3_print,
                        unicode_literals as _py3_unicode,
                        absolute_import)

from xoutil import Unset
from xoutil.types import is_scalar

from xoutil.deprecation import deprecated


[docs]def first_non_null(iterable, default=None): '''Returns the first value from iterable which is non-null. This is roughly the same as:: next((x for x in iter(iterable) if x), default) .. versionadded:: 1.4.0 ''' return next((x for x in iter(iterable) if x), default)
[docs]def flatten(sequence, is_scalar=is_scalar, depth=None): '''Flattens out a sequence. It takes care of everything deemed a collection (i.e, not a scalar according to the callabled passed in `is_scalar`):: >>> from xoutil.eight import range >>> range_ = lambda *a: list(range(*a)) >>> tuple(flatten((1, range_(2, 5), range(5, 10)))) (1, 2, 3, 4, 5, 6, 7, 8, 9) If `depth` is None the collection is flattened recursiverly until the "bottom" is reached. If `depth` is an integer then the collection is flattened up to that level. `depth=0` means not to flatten. Nested iterators are not "exploded" if under the stated `depth`:: # In the following doctest we use ``...range(...X)`` because the string # repr of range differs in Py2 and Py3k. >>> tuple(flatten((range_(2), range(2, 4)), depth=0)) # doctest: +ELLIPSIS ([0, 1], ...range(2, 4)) >>> tuple(flatten((range(2), range_(2, 4)), depth=0)) # doctest: +ELLIPSIS (...range(...2), [2, 3]) ''' for item in sequence: if is_scalar(item): yield item elif depth == 0: yield item else: if depth is not None: depth = depth - 1 for subitem in flatten(item, is_scalar, depth=depth): yield subitem
def multi_pop(source, *keys): '''Pop values from `source` of all given `keys`. :param source: Any compatible mapping. :param keys: Keys to pop values. All keys that are not found are ignored. Examples:: >>> d = {'x': 1, 'y': 2, 'z': 3} >>> next(multi_pop(d, 'a', 'y', 'x'), '---') 2 >>> next(multi_pop(d, 'a', 'y', 'x'), '---') 1 >>> next(multi_pop(d, 'a', 'y', 'x'), '---') '---' ''' return (source.pop(key) for key in keys if key in source) def multi_get(source, *keys): '''Get values from `source` of all given `keys`. :param source: Any compatible mapping. :param keys: Keys to get values. All keys that are not found are ignored. Examples:: >>> d = {'x': 1, 'y': 2, 'z': 3} >>> next(multi_get(d, 'a', 'y', 'x'), '---') 2 >>> next(multi_get(d, 'a', 'y', 'x'), '---') 2 >>> next(multi_get(d, 'a', 'b'), '---') '---' ''' return (source.get(key) for key in keys if key in source)
[docs]def dict_update_new(target, source): '''Update values in `source` that are new (not present) in `target`.''' for key in source: if key not in target: target[key] = source[key]
@deprecated('generator expression')
[docs]def fake_dict_iteritems(source): '''Iterate (key, value) in a source that have defined method "keys" and :meth:`~object.__getitem__`. .. warning:: Deprecated since 1.7.0. This was actually in risk since 1.4.0. ''' import warnings warnings.warn('fake_dict_iteritems is in risk for deprecation') for key in source.keys(): yield key, source[key]
[docs]def delete_duplicates(seq, key=lambda x: x): '''Remove all duplicate elements from `seq`. Two items ``x`` and ``y`` are considered equal (duplicates) if ``key(x) == key(y)``. By default `key` is the identity function. Works with any sequence that supports :func:`len`, :meth:`~object.__getitem__` and `addition <object.__add__>`:meth:. .. note:: ``seq.__getitem__`` should work properly with slices. The return type will be the same as that of the original sequence. .. versionadded:: 1.5.5 .. versionchanged:: 1.7.4 Added the `key` argument. Clarified the documentation: `seq` should also implement the ``__add__`` method and that its ``__getitem__`` method should deal with slices. ''' i, done = 0, set() while i < len(seq): k = key(seq[i]) if k not in done: done.add(k) i += 1 else: seq = seq[:i] + seq[i+1:] return seq
[docs]def iter_delete_duplicates(iter, key=lambda x: x): '''Yields non-repeating items from `iter`. `key` has the same meaning as in `delete_duplicates`:func:. Examples: >>> list(iter_delete_duplicates('AAAaBBBA')) ['A', 'a', 'B', 'A'] >>> list(iter_delete_duplicates('AAAaBBBA', key=lambda x: x.lower())) ['A', 'B', 'A'] .. versionadded:: 1.7.4 ''' last = object() # a value we're sure `iter` won't produce for x in iter: k = key(x) if k != last: yield x last = k
[docs]def slides(iterable, width=2, fill=None): '''Creates a sliding window of a given `width` over an iterable:: >>> list(slides(range(1, 11))) [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10)] If the iterator does not yield a width-aligned number of items, the last slice returned is filled with `fill` (by default None):: >>> list(slides(range(1, 11), width=3)) # doctest: +ELLIPSIS [(1, 2, 3), (4, 5, 6), (7, 8, 9), (10, None, None)] .. versionchanged:: 1.4.0 If the `fill` argument is a collection is cycled over to get the filling, just like in :func:`first_n`. .. versionchanged:: 1.4.2 The `fill` argument now defaults to None, instead of Unset. ''' from itertools import cycle, repeat from xoutil.types import is_collection pos = 0 res = [] iterator = iter(iterable) current = next(iterator, Unset) while current is not Unset: if pos < width: res.append(current) current = next(iterator, Unset) pos = pos + 1 else: yield tuple(res) res = [] pos = 0 if res: if is_collection(fill): fill = cycle(fill) else: fill = repeat(fill) while pos < width: res.append(next(fill)) pos += 1 yield tuple(res)
[docs]def continuously_slides(iterable, width=2, fill=None): '''Similar to :func:`slides` but moves one item at the time (i.e continuously). `fill` is only used to fill the fist chunk if the `iterable` has less items than the `width` of the window. Example (generate a texts tri-grams):: >>> slider = continuously_slides(str('maupassant'), 3) >>> list(str('').join(chunk) for chunk in slider) ['mau', 'aup', 'upa', 'pas', 'ass', 'ssa', 'san', 'ant'] ''' i = iter(iterable) res = [] while len(res) < width: current = next(i, fill) res.append(current) yield tuple(res) current = next(i, Unset) while current is not Unset: res.pop(0) res.append(current) yield tuple(res) current = next(i, Unset)
[docs]def first_n(iterable, n=1, fill=Unset): '''Takes the first `n` items from iterable. If there are less than `n` items in the iterable and `fill` is :class:`~xoutil.types.Unset`, a StopIteration exception is raised; otherwise it's used as a filling pattern as explained below. :param iterable: An iterable from which the first `n` items should be collected. :param n: The number of items to collect :type n: int :param fill: The filling pattern to use. It may be: - a collection, in which case `first_n` fills the last items by cycling over `fill`. - anything else is used as the filling pattern by repeating. :returns: The first `n` items from `iterable`, probably with a filling pattern at the end. :rtype: generator object .. versionadded:: 1.2.0 .. versionchanged:: 1.4.0 The notion of collection for the `fill` argument uses :class:`xoutil.types.is_collection` instead of probing for the ``__iter__`` method. ''' if fill is not Unset: from xoutil.types import is_collection from itertools import cycle, repeat, chain if is_collection(fill): fill = cycle(fill) else: fill = repeat(fill) seq = chain(iterable, fill) else: seq = iter(iterable) while n > 0: yield next(seq) n -= 1
[docs]def ungroup(iterator): '''Reverses the operation of `itertools.groupby`:func: (or similar). The `iterator` should produce pairs of ``(_, xs)``; where ``xs`` is another iterator (or iterable). It's guaranteed that the `iterator` will be consumed at the *boundaries* of each pair, i.e. before taking another pair ``(_, ys)`` from `iterator` the first ``xs`` will be fully yielded. Demonstration: >>> def groups(): ... def chunk(s): ... for x in range(s, s+3): ... print('Yielding x:', x) ... yield x ... ... for g in range(2): ... print('Yielding group', g) ... yield g, chunk(g) >>> list(ungroup(groups())) Yielding group 0 Yielding x: 0 Yielding x: 1 Yielding x: 2 Yielding group 1 Yielding x: 1 Yielding x: 2 Yielding x: 3 [0, 1, 2, 1, 2, 3] This is not the same as:: >>> import itertools >>> xs = itertools.chain(*(xs for _, xs in groups())) Yielding group 0 Yielding group 1 Notice that the iterator was fully consumed just to create the arguments to ``chain()``. .. versionadded:: 1.7.3 ''' for _, xs in iterator: for x in xs: yield x
# Compatible zip and map from xoutil.eight import _py3 if _py3: map = map zip = zip from itertools import zip_longest # noqa else: from itertools import (imap as map, izip as zip, # noqa izip_longest as zip_longest)