# -*- coding: utf-8 -*-
# ----------------------------------------------------------------------
# xoutil.collections
# ----------------------------------------------------------------------
# Copyright (c) 2015 Merchise and Contributors
# Copyright (c) 2013, 2014 Merchise Autrement and Contributors
# defaultdict and opendict implementations.
#
# Copyright 2012 Medardo Rodríguez for the defaultdict and opendict
# implementations.
#
# The implementation of OrderedDict is the copyright of the Python
# Software Foundation.
#
# This file is distributed under the terms of the LICENCE distributed
# with this package.
#
# @created: 2012-07-03
#
# Contributors from Medardo Rodríguez:
# - Manuel Vázquez Acosta <manu@merchise.org>
# - Medardo Rodriguez <med@merchise.org>
'''Extensions to Python's ``collections`` module.
You may use it as drop-in replacement of ``collections``. Although we don't
document all items here. Refer to :mod:`collections <collections>`
documentation.
'''
from __future__ import (division as _py3_division,
print_function as _py3_print,
unicode_literals as _py3_unicode,
absolute_import as _absolute_import)
from xoutil.modules import copy_members as _copy_python_module_members
_pm = _copy_python_module_members()
from xoutil.deprecation import deprecated
import sys
_py2 = sys.version_info < (3, 0, 0)
_py33 = sys.version_info >= (3, 3, 0)
_py34 = sys.version_info >= (3, 4, 0)
del sys
if _py33:
_copy_python_module_members('collections.abc')
namedtuple = _pm.namedtuple
Sized = _pm.Sized
Container = _pm.Container
Iterable = _pm.Iterable
MutableMapping = _pm.MutableMapping
Mapping = _pm.Mapping
MutableSequence = _pm.MutableSequence
Sequence = _pm.Sequence
Set = _pm.Set
MutableSet = _pm.MutableSet
_itemgetter = _pm._itemgetter
_heapq = _pm._heapq
_chain = _pm._chain
_repeat = _pm._repeat
_starmap = _pm._starmap
del _pm, _copy_python_module_members
from collections import defaultdict as _defaultdict
from xoutil import Unset
from xoutil.names import strlist as slist
from xoutil.objects import SafeDataItem as safe
from xoutil.eight.meta import metaclass
class safe_dict_iter(tuple):
'''Iterate a dictionary in a safe way.
This is useful when a dictionary can be modified while iterating on it,
for example::
>>> d = {1: 2, 3: 4, 5: 6}
>>> di = safe_dict_iter(d)
>>> for k, v in di.items():
... d[v] = k
>>> [(k, v) for (k, v) in di.items()]
[(1, 2), (3, 4), (5, 6)]
>>> del d[1]
>>> [(k, v) for (k, v) in di.items()]
[(3, 4), (5, 6)]
>>> [k for k in di]
[3, 5]
'''
def __new__(cls, mapping):
self = super(safe_dict_iter, cls).__new__(cls, mapping)
self._mapping = mapping
return self
def __str__(self):
cls_name = type(self).__name__
res = str(', ').join(str(i) for i in self)
return str('{}({})').format(cls_name, res)
__repr__ = __str__
def __len__(self):
return sum(1 for key in self)
def __contains__(self, key):
res = super(safe_dict_iter, self).__contains__(key)
return res and key in self.mapping
def __nonzero__(self):
return bool(len(self))
__bool__ = __nonzero__
def __iter__(self):
for key in super(safe_dict_iter, self).__iter__():
if key in self._mapping:
yield key
keys = __iter__
def values(self):
for key in self:
if key in self._mapping:
yield self._mapping[key]
def items(self):
for key in self:
if key in self._mapping:
yield (key, self._mapping[key])
[docs]class defaultdict(_defaultdict):
'''A hack for ``collections.defaultdict`` that passes the key and a copy of
self as a plain dict (to avoid infinity recursion) to the callable.
Examples::
>>> from xoutil.collections import defaultdict
>>> d = defaultdict(lambda key, d: 'a')
>>> d['abc']
'a'
Since the second parameter is actually a dict-copy, you may (naively) do
the following::
>>> d = defaultdict(lambda k, d: d[k])
>>> d['abc']
Traceback (most recent call last):
...
KeyError: 'abc'
You may use this class as a drop-in replacement for
``collections.defaultdict``::
>>> d = defaultdict(lambda: 1)
>>> d['abc']
1
'''
def __missing__(self, key):
if self.default_factory is not None:
try:
return self.default_factory(key, dict(self))
except TypeError:
# This is the error when the arguments are not expected.
return super(defaultdict, self).__missing__(key)
else:
raise KeyError(key)
[docs]class OpenDictMixin(object):
'''A mixin for mappings implementation that expose keys as attributes::
>>> from xoutil.objects import SafeDataItem as safe
>>> class MyOpenDict(OpenDictMixin, dict):
... __slots__ = safe.slot(OpenDictMixin.__cache_name__, dict)
>>> d = MyOpenDict({'es': 'spanish'})
>>> d.es
'spanish'
>>> d['es'] = 'espanol'
>>> d.es
'espanol'
When setting or deleting an attribute, the attribute name is regarded as
key in the mapping if neither of the following condition holds:
- The name is a `slot`.
- The object has a ``__dict__`` attribute and the name is key there.
This mixin defines the following features that can be redefined:
_key2identifier
Protected method, receive a key as argument and return a valid
identifier that is used instead the key as an extended attribute.
__cache_name__
Inner field to store a cached mapping between actual keys and
calculated attribute names. The field must be always implemented as a
`SafeDataItem` descriptor and must be of type `dict`. There are two
ways of implementing this:
- As a slot. The first time of this implementation is an example.
Don't forget to pass the second parameter with the constructor
`dict`.
- As a normal descriptor::
>>> from xoutil.objects import SafeDataItem as safe
>>> class MyOpenDict(OpenDictMixin, dict):
... safe(OpenDictMixin.__cache_name__, dict)
Classes or Mixins that can be integrated with `dict` by inheritance
must not have a `__slots__` definition. Because of that, this mixin
must not declare any slot. If needed, it must be declared explicitly
in customized classed like in the example in the first part of this
documentation or in the definition of `opendict` class.
'''
__cache_name__ = str('_inverted_cache')
def __dir__(self):
'''Return normal "dir" plus valid keys as attributes.'''
# TODO: Check if super must be called if defined
from xoutil.objects import fulldir
return list(set(~self) | fulldir(self))
def __getattr__(self, name):
from xoutil.inspect import get_attr_value
res = get_attr_value(self, name, Unset)
if res is not Unset:
return res
else:
key = (~self).get(name)
if key:
return self[key]
else:
msg = "'%s' object has no attribute '%s'"
raise AttributeError(msg % (type(self).__name__, name))
def __setattr__(self, name, value):
key = (~self).get(name)
if key:
self[key] = value
else:
super(OpenDictMixin, self).__setattr__(name, value)
def __delattr__(self, name):
key = (~self).get(name)
if key:
del self[key]
else:
super(OpenDictMixin, self).__delattr__(name)
def __invert__(self):
'''Return an inverted mapping between key and attribute names (keys of
the resulting dictionary are identifiers for attribute names and values
are original key names).
Class attribute "__separators__" are used to calculate it and is
cached in '_inverted_cache slot safe variable.
Several keys could have the same identifier, only one will be valid and
used.
To obtain this mapping you can use as the unary operator "~".
'''
from xoutil.inspect import get_attr_value
KEY_LENGTH = 'length'
KEY_MAPPING = 'mapping'
cache = get_attr_value(self, type(self).__cache_name__)
cached_length = cache.setdefault(KEY_LENGTH, 0)
length = len(self)
if cached_length != length:
cache[KEY_LENGTH] = length
aux = ((self._key2identifier(k), k) for k in self)
res = {key: attr for key, attr in aux if key}
cache[KEY_MAPPING] = res
else:
res = cache.get(KEY_MAPPING)
if res is None:
assert cached_length == 0
res = {}
cache[KEY_MAPPING] = res
return res
@staticmethod
def _key2identifier(key):
'''Convert keys to valid identifiers.
This method could be redefined in sub-classes to change this feature.
This function must return a valid identifier or None if the conversion
is not possible.
'''
from xoutil.string import normalize_slug
from xoutil.validators import is_valid_identifier
return key if is_valid_identifier(key) else normalize_slug(key, '_')
[docs]class SmartDictMixin(object):
'''A mixin that extends the `update` method of dictionaries
Standard method allow only one positional argument, this allow several.
Note on using mixins in Python: method resolution order is calculated in
the order of inheritance, if a mixin is defined to overwrite behavior
already existent, use first that classes with it. See :class:`SmartDict`
below.
'''
def update(self, *args, **kwargs):
'''Update this dict from a set of iterables `args` and keyword values
`kwargs`.
Each positional argument could be:
- another mapping (any object implementing "keys" and
:meth:`~object.__getitem__` methods.
- an iterable of (key, value) pairs.
'''
for arg in args:
if hasattr(arg, 'keys') and hasattr(arg, '__getitem__'):
for key in arg:
self[key] = arg[key]
else:
for key, value in arg:
self[key] = value
for key in kwargs:
self[key] = kwargs[key]
# TODO: Include new argument ``full=True`` to also search in string
# values. Maybe this kind of feature will be better in a function
# instead a method.
def search(self, pattern):
'''Return new mapping with items which key match a `pattern` regexp.
This function always tries to return a valid new mapping of the same
type of the caller instance. If the constructor of corresponding type
can't be called without arguments, then look up for a class
variable named `__search_result_type__` or return a standard
Python dictionary if not found.
'''
from re import compile
regexp = compile(pattern)
cls = type(self)
try:
res = cls()
except BaseException:
from xoutil.inspect import get_attr_value
creator = get_attr_value(cls, '__search_result_type__', None)
res = creator() if creator else {}
for key in self:
if regexp.search(key):
res[key] = self[key]
return res
class SmartDict(SmartDictMixin, dict):
'''A "smart" dictionary that can receive a wide variety of arguments.
See :meth:`SmartDictMixin.update` and :meth:`SmartDictMixin.search`.
'''
def __init__(self, *args, **kwargs):
super(SmartDict, self).__init__()
self.update(*args, **kwargs)
[docs]class opendict(OpenDictMixin, dict, object):
'''A dictionary implementation that mirrors its keys as attributes::
>>> d = opendict({'es': 'spanish'})
>>> d.es
'spanish'
>>> d['es'] = 'espanol'
>>> d.es
'espanol'
Setting attributes *does not* makes them keys.
'''
__slots__ = safe.slot(OpenDictMixin.__cache_name__, dict)
if not _py33:
# From this point below: Copyright (c) 2001-2013, Python Software
# Foundation; All rights reserved.
import sys as _sys
from weakref import proxy as _proxy
from xoutil.reprlib import recursive_repr as _recursive_repr
class _Link(object):
__slots__ = 'prev', 'next', 'key', '__weakref__'
# TODO: This class is implemented in all Python versions I have.
[docs] class OrderedDict(dict):
'Dictionary that remembers insertion order'
# An inherited dict maps keys to values. The inherited dict provides
# __getitem__, __len__, __contains__, and get. The remaining methods
# are order-aware. Big-O running times for all methods are the same as
# regular dictionaries.
# The internal self.__map dict maps keys to links in a doubly linked
# list. The circular doubly linked list starts and ends with a sentinel
# element. The sentinel element never gets deleted (this simplifies the
# algorithm). The sentinel is in self.__hardroot with a weakref proxy
# in self.__root. The prev links are weakref proxies (to prevent
# circular references). Individual links are kept alive by the hard
# reference in self.__map. Those hard references disappear when a key
# is deleted from an OrderedDict.
def __init__(self, *args, **kwds):
'''Initialize an ordered dictionary.
The signature is the same as regular dictionaries, but keyword
arguments are not recommended because their insertion order is
arbitrary.
'''
if len(args) > 1:
raise TypeError('expected at most 1 arguments, got %d' %
len(args))
try:
self.__root
except AttributeError:
self.__hardroot = _Link()
self.__root = root = _proxy(self.__hardroot)
root.prev = root.next = root
self.__map = {}
self.__update(*args, **kwds)
def __setitem__(self, key, value, dict_setitem=dict.__setitem__,
proxy=_proxy, Link=_Link):
'od.__setitem__(i, y) <==> od[i]=y'
# Setting a new item creates a new link at the end of the linked
# list, and the inherited dictionary is updated with the new
# key/value pair.
if key not in self:
self.__map[key] = link = Link()
root = self.__root
last = root.prev
link.prev, link.next, link.key = last, root, key
last.next = link
root.prev = proxy(link)
dict_setitem(self, key, value)
def __delitem__(self, key, dict_delitem=dict.__delitem__):
'od.__delitem__(y) <==> del od[y]'
# Deleting an existing item uses self.__map to find the link which
# gets removed by updating the links in the predecessor and
# successor nodes.
dict_delitem(self, key)
link = self.__map.pop(key)
link_prev = link.prev
link_next = link.next
link_prev.next = link_next
link_next.prev = link_prev
def __iter__(self):
'od.__iter__() <==> iter(od)'
# Traverse the linked list in order.
root = self.__root
curr = root.next
while curr is not root:
yield curr.key
curr = curr.next
def __reversed__(self):
'od.__reversed__() <==> reversed(od)'
# Traverse the linked list in reverse order.
root = self.__root
curr = root.prev
while curr is not root:
yield curr.key
curr = curr.prev
def clear(self):
'od.clear() -> None. Remove all items from od.'
root = self.__root
root.prev = root.next = root
self.__map.clear()
dict.clear(self)
def popitem(self, last=True):
'''od.popitem() -> (k, v), return and remove a (key, value) pair.
Pairs are returned in LIFO order if last is true or FIFO order if
false.
'''
if not self:
raise KeyError('dictionary is empty')
root = self.__root
if last:
link = root.prev
link_prev = link.prev
link_prev.next = root
root.prev = link_prev
else:
link = root.next
link_next = link.next
root.next = link_next
link_next.prev = root
key = link.key
del self.__map[key]
value = dict.pop(self, key)
return key, value
def move_to_end(self, key, last=True):
'''Move an existing element to the end (or beginning if
``last==False``).
Raises KeyError if the element does not exist. When
``last==True``, acts like a fast version of
``self[key]=self.pop(key)``.
'''
link = self.__map[key]
link_prev = link.prev
link_next = link.next
link_prev.next = link_next
link_next.prev = link_prev
root = self.__root
if last:
last = root.prev
link.prev = last
link.next = root
last.next = root.prev = link
else:
first = root.next
link.prev = root
link.next = first
root.next = first.prev = link
def __sizeof__(self):
sizeof = _sys.getsizeof
n = len(self) + 1 # links including root
size = sizeof(self.__dict__) # instance dictionary
size += sizeof(self.__map) * 2 # internal & inherited dicts
size += sizeof(self.__hardroot) * n # link objects
size += sizeof(self.__root) * n # proxy objects
return size
update = __update = MutableMapping.update
keys = MutableMapping.keys
values = MutableMapping.values
items = MutableMapping.items
__ne__ = MutableMapping.__ne__
def pop(self, key, default=Unset):
'''od.pop(k[,d]) -> v, remove specified key and return the
corresponding value.
If key is not found, d is returned if given, otherwise KeyError is
raised.
'''
if key in self:
result = self[key]
del self[key]
return result
elif default is not Unset:
return default
else:
raise KeyError(key)
def setdefault(self, key, default=None):
'''``od.setdefault(k[, d])`` -> ``od.get(k, d)`` also set
``od[k] = d if k not in od``'''
if key in self:
return self[key]
self[key] = default
return default
@_recursive_repr()
def __repr__(self):
'od.__repr__() <==> repr(od)'
if not self:
return '%s()' % (self.__class__.__name__,)
return '%s(%r)' % (self.__class__.__name__, list(self.items()))
def __reduce__(self):
'Return state information for pickling'
items = [[k, self[k]] for k in self]
inst_dict = vars(self).copy()
for k in vars(OrderedDict()):
inst_dict.pop(k, None)
if inst_dict:
return (self.__class__, (items,), inst_dict)
return self.__class__, (items,)
def copy(self):
'od.copy() -> a shallow copy of od'
return self.__class__(self)
@classmethod
def fromkeys(cls, iterable, value=None):
'''``OD.fromkeys(S[, v])`` -> New ordered dictionary with keys from
`S`. If not specified, the value defaults to None.
'''
self = cls()
for key in iterable:
self[key] = value
return self
def __eq__(self, other):
'''``od.__eq__(y) <==> od==y``. Comparison to another OD is
order-sensitive while comparison to a regular mapping is
order-insensitive.
'''
if isinstance(other, OrderedDict):
return len(self) == len(other) and \
all(p == q for p, q in zip(self.items(), other.items()))
return dict.__eq__(self, other)
if not _py34:
[docs] class ChainMap(MutableMapping):
'''A ChainMap groups multiple dicts (or other mappings) together
to create a single, updateable view.
The underlying mappings are stored in a list. That list is public and
can accessed or updated using the *maps* attribute. There is no other
state.
Lookups search the underlying mappings successively until a key is
found. In contrast, writes, updates, and deletions only operate on
the first mapping.
'''
def __init__(self, *maps):
'''Initialize a ChainMap by setting *maps* to the given mappings.
If no mappings are provided, a single empty dictionary is used.
'''
self.maps = list(maps) or [{}] # always at least one map
def __missing__(self, key):
raise KeyError(key)
def __getitem__(self, key):
for mapping in self.maps:
try:
return mapping[key] # can't use 'key in mapping' with
# defaultdict
except KeyError:
pass
return self.__missing__(key) # support subclasses that define
# __missing__
def get(self, key, default=None):
return self[key] if key in self else default
def __len__(self):
return len(set().union(*self.maps)) # reuses stored hash values
# if possible
def __iter__(self):
return iter(set().union(*self.maps))
def __contains__(self, key):
return any(key in m for m in self.maps)
def __bool__(self):
return any(self.maps)
@_recursive_repr()
def __repr__(self):
return '{0.__class__.__name__}({1})'.format(
self, ', '.join(map(repr, self.maps)))
@classmethod
def fromkeys(cls, iterable, *args):
'Create a ChainMap with a single dict created from the iterable.'
return cls(dict.fromkeys(iterable, *args))
def copy(self):
'''New ChainMap or subclass with a new copy of ``maps[0]`` and
refs to ``maps[1:]``
'''
return self.__class__(self.maps[0].copy(), *self.maps[1:])
__copy__ = copy
[docs] def new_child(self, m=None):
'''New ChainMap with a new map followed by all previous maps.
If no map is provided, an empty dict is used.
'''
if m is None:
m = {}
return self.__class__(m, *self.maps)
@property
def parents(self):
'New ChainMap from ``maps[1:]``.'
return self.__class__(*self.maps[1:])
def __setitem__(self, key, value):
self.maps[0][key] = value
def __delitem__(self, key):
try:
del self.maps[0][key]
except KeyError:
msg = 'Key not found in the first mapping: {!r}'.format(key)
raise KeyError(msg)
def popitem(self):
'''Remove and return an item pair from ``maps[0]``.
Raise KeyError is ``maps[0]`` is empty.
'''
try:
return self.maps[0].popitem()
except KeyError:
raise KeyError('No keys found in the first mapping.')
def pop(self, key, *args):
'''Remove *key* from ``maps[0]`` and return its value.
Raise KeyError if *key* not in ``maps[0]``.'''
try:
return self.maps[0].pop(key, *args)
except KeyError:
msg = 'Key not found in the first mapping: {!r}'.format(key)
raise KeyError(msg)
def clear(self):
'Clear ``maps[0]``, leaving ``maps[1:]`` intact.'
self.maps[0].clear()
if not _py33:
class UserDict(MutableMapping):
# Start by filling-out the abstract methods
def __init__(self, dict=None, **kwargs):
self.data = {}
if dict is not None:
self.update(dict)
if len(kwargs):
self.update(kwargs)
def __len__(self):
return len(self.data)
def __getitem__(self, key):
if key in self.data:
return self.data[key]
if hasattr(self.__class__, "__missing__"):
return self.__class__.__missing__(self, key)
raise KeyError(key)
def __setitem__(self, key, item):
self.data[key] = item
def __delitem__(self, key):
del self.data[key]
def __iter__(self):
return iter(self.data)
# Modify __contains__ to work correctly when __missing__ is present
def __contains__(self, key):
return key in self.data
# Now, add the methods in dicts but not in MutableMapping
def __repr__(self):
return repr(self.data)
def copy(self):
if self.__class__ is UserDict:
return UserDict(self.data.copy())
import copy
data = self.data
try:
self.data = {}
c = copy.copy(self)
finally:
self.data = data
c.update(self)
return c
@classmethod
def fromkeys(cls, iterable, value=None):
d = cls()
for key in iterable:
d[key] = value
return d
class UserList(MutableSequence):
"""A more or less complete user-defined wrapper around list objects."""
def __init__(self, initlist=None):
self.data = []
if initlist is not None:
# XXX should this accept an arbitrary sequence?
if type(initlist) == type(self.data):
self.data[:] = initlist
elif isinstance(initlist, UserList):
self.data[:] = initlist.data[:]
else:
self.data = list(initlist)
def __repr__(self):
return repr(self.data)
def __lt__(self, other):
return self.data < self.__cast(other)
def __le__(self, other):
return self.data <= self.__cast(other)
def __eq__(self, other):
return self.data == self.__cast(other)
def __ne__(self, other):
return self.data != self.__cast(other)
def __gt__(self, other):
return self.data > self.__cast(other)
def __ge__(self, other):
return self.data >= self.__cast(other)
def __cast(self, other):
return other.data if isinstance(other, UserList) else other
def __contains__(self, item):
return item in self.data
def __len__(self):
return len(self.data)
def __getitem__(self, i):
return self.data[i]
def __setitem__(self, i, item):
self.data[i] = item
def __delitem__(self, i):
del self.data[i]
def __add__(self, other):
if isinstance(other, UserList):
return self.__class__(self.data + other.data)
elif isinstance(other, type(self.data)):
return self.__class__(self.data + other)
return self.__class__(self.data + list(other))
def __radd__(self, other):
if isinstance(other, UserList):
return self.__class__(other.data + self.data)
elif isinstance(other, type(self.data)):
return self.__class__(other + self.data)
return self.__class__(list(other) + self.data)
def __iadd__(self, other):
if isinstance(other, UserList):
self.data += other.data
elif isinstance(other, type(self.data)):
self.data += other
else:
self.data += list(other)
return self
def __mul__(self, n):
return self.__class__(self.data*n)
__rmul__ = __mul__
def __imul__(self, n):
self.data *= n
return self
def append(self, item):
self.data.append(item)
def insert(self, i, item):
self.data.insert(i, item)
def pop(self, i=-1):
return self.data.pop(i)
def remove(self, item):
self.data.remove(item)
def clear(self):
self.data.clear()
def copy(self):
return self.__class__(self)
def count(self, item):
return self.data.count(item)
def index(self, item, *args):
return self.data.index(item, *args)
def reverse(self):
self.data.reverse()
def sort(self, *args, **kwds):
self.data.sort(*args, **kwds)
def extend(self, other):
if isinstance(other, UserList):
self.data.extend(other.data)
else:
self.data.extend(other)
class UserString(Sequence):
def __init__(self, seq):
if isinstance(seq, str):
self.data = seq
elif isinstance(seq, UserString):
self.data = seq.data[:]
else:
self.data = str(seq)
def __str__(self):
return str(self.data)
def __repr__(self):
return repr(self.data)
def __int__(self):
return int(self.data)
def __float__(self):
return float(self.data)
def __complex__(self):
return complex(self.data)
def __hash__(self):
return hash(self.data)
def __eq__(self, string):
if isinstance(string, UserString):
return self.data == string.data
return self.data == string
def __ne__(self, string):
if isinstance(string, UserString):
return self.data != string.data
return self.data != string
def __lt__(self, string):
if isinstance(string, UserString):
return self.data < string.data
return self.data < string
def __le__(self, string):
if isinstance(string, UserString):
return self.data <= string.data
return self.data <= string
def __gt__(self, string):
if isinstance(string, UserString):
return self.data > string.data
return self.data > string
def __ge__(self, string):
if isinstance(string, UserString):
return self.data >= string.data
return self.data >= string
def __contains__(self, char):
if isinstance(char, UserString):
char = char.data
return char in self.data
def __len__(self):
return len(self.data)
def __getitem__(self, index):
return self.__class__(self.data[index])
def __add__(self, other):
if isinstance(other, UserString):
return self.__class__(self.data + other.data)
elif isinstance(other, str):
return self.__class__(self.data + other)
return self.__class__(self.data + str(other))
def __radd__(self, other):
if isinstance(other, str):
return self.__class__(other + self.data)
return self.__class__(str(other) + self.data)
def __mul__(self, n):
return self.__class__(self.data*n)
__rmul__ = __mul__
def __mod__(self, args):
return self.__class__(self.data % args)
# the following methods are defined in alphabetical order:
def capitalize(self):
return self.__class__(self.data.capitalize())
def center(self, width, *args):
return self.__class__(self.data.center(width, *args))
def count(self, sub, start=0, end=_sys.maxsize):
if isinstance(sub, UserString):
sub = sub.data
return self.data.count(sub, start, end)
def encode(self, encoding=None, errors=None): # XXX improve this?
if encoding:
if errors:
return self.__class__(self.data.encode(encoding, errors))
return self.__class__(self.data.encode(encoding))
return self.__class__(self.data.encode())
def endswith(self, suffix, start=0, end=_sys.maxsize):
return self.data.endswith(suffix, start, end)
def expandtabs(self, tabsize=8):
return self.__class__(self.data.expandtabs(tabsize))
def find(self, sub, start=0, end=_sys.maxsize):
if isinstance(sub, UserString):
sub = sub.data
return self.data.find(sub, start, end)
def format(self, *args, **kwds):
return self.data.format(*args, **kwds)
def index(self, sub, start=0, end=_sys.maxsize):
return self.data.index(sub, start, end)
def isalpha(self):
return self.data.isalpha()
def isalnum(self):
return self.data.isalnum()
def isdecimal(self):
return self.data.isdecimal()
def isdigit(self):
return self.data.isdigit()
def isidentifier(self):
return self.data.isidentifier()
def islower(self):
return self.data.islower()
def isnumeric(self):
return self.data.isnumeric()
def isspace(self):
return self.data.isspace()
def istitle(self):
return self.data.istitle()
def isupper(self):
return self.data.isupper()
def join(self, seq):
return self.data.join(seq)
def ljust(self, width, *args):
return self.__class__(self.data.ljust(width, *args))
def lower(self):
return self.__class__(self.data.lower())
def lstrip(self, chars=None):
return self.__class__(self.data.lstrip(chars))
def partition(self, sep):
return self.data.partition(sep)
def replace(self, old, new, maxsplit=-1):
if isinstance(old, UserString):
old = old.data
if isinstance(new, UserString):
new = new.data
return self.__class__(self.data.replace(old, new, maxsplit))
def rfind(self, sub, start=0, end=_sys.maxsize):
if isinstance(sub, UserString):
sub = sub.data
return self.data.rfind(sub, start, end)
def rindex(self, sub, start=0, end=_sys.maxsize):
return self.data.rindex(sub, start, end)
def rjust(self, width, *args):
return self.__class__(self.data.rjust(width, *args))
def rpartition(self, sep):
return self.data.rpartition(sep)
def rstrip(self, chars=None):
return self.__class__(self.data.rstrip(chars))
def split(self, sep=None, maxsplit=-1):
return self.data.split(sep, maxsplit)
def rsplit(self, sep=None, maxsplit=-1):
return self.data.rsplit(sep, maxsplit)
def splitlines(self, keepends=False):
return self.data.splitlines(keepends)
def startswith(self, prefix, start=0, end=_sys.maxsize):
return self.data.startswith(prefix, start, end)
def strip(self, chars=None):
return self.__class__(self.data.strip(chars))
def swapcase(self):
return self.__class__(self.data.swapcase())
def title(self):
return self.__class__(self.data.title())
def translate(self, *args):
return self.__class__(self.data.translate(*args))
def upper(self):
return self.__class__(self.data.upper())
def zfill(self, width):
return self.__class__(self.data.zfill(width))
def _count_elements(mapping, iterable):
self_get = mapping.get
for elem in iterable:
mapping[elem] = self_get(elem, 0) + 1
[docs] class Counter(dict):
'''Dict subclass for counting hashable items. Sometimes called a bag
or multiset. Elements are stored as dictionary keys and their counts
are stored as dictionary values.
>>> c = Counter('abcdeabcdabcaba') # count elements from a string
>>> c.most_common(3) # three most common elements
[('a', 5), ('b', 4), ('c', 3)]
>>> sorted(c) # list all unique elements
['a', 'b', 'c', 'd', 'e']
>>> ''.join(sorted(c.elements())) # list elements with repetitions
'aaaaabbbbcccdde'
>>> sum(c.values()) # total of all counts
15
>>> c['a'] # count of letter 'a'
5
>>> for elem in 'shazam': # update counts from an iterable
... c[elem] += 1 # by adding 1 to each element's count
>>> c['a'] # now there are seven 'a'
7
>>> del c['b'] # remove all 'b'
>>> c['b'] # now there are zero 'b'
0
>>> d = Counter('simsalabim') # make another counter
>>> c.update(d) # add in the second counter
>>> c['a'] # now there are nine 'a'
9
>>> c.clear() # empty the counter
>>> c
Counter()
Note: If a count is set to zero or reduced to zero, it will remain
in the counter until the entry is deleted or the counter is cleared:
>>> c = Counter('aaabbc')
>>> c['b'] -= 2 # reduce the count of 'b' by two
>>> c.most_common() # 'b' is still in, but its count is zero
[('a', 3), ('c', 1), ('b', 0)]
'''
# References:
# http://en.wikipedia.org/wiki/Multiset
# http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
# http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
# http://code.activestate.com/recipes/259174/
# Knuth, TAOCP Vol. II section 4.6.3
def __init__(self, iterable=None, **kwds):
'''Create a new, empty Counter object. And if given, count
elements from an input iterable. Or, initialize the count from
another mapping of elements to their counts.
>>> c = Counter() # a new, empty counter
>>> c = Counter('gallahad') # a new counter from an iterable
>>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
>>> c = Counter(a=4, b=2) # a new counter from keyword args
'''
super(Counter, self).__init__()
self.update(iterable, **kwds)
def __missing__(self, key):
'The count of elements not in the Counter is zero.'
# Needed so that self[missing_item] does not raise KeyError
return 0
def most_common(self, n=None):
'''List the n most common elements and their counts from the most
common to the least. If n is None, then list all element counts.
>>> Counter('abcdeabcdabcaba').most_common(3)
[('a', 5), ('b', 4), ('c', 3)]
'''
# Emulate Bag.sortedByCount from Smalltalk
if n is None:
return sorted(self.items(), key=_itemgetter(1), reverse=True)
return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
def elements(self):
'''Iterator over elements repeating each as many times as its
count.
>>> c = Counter('ABCABC')
>>> sorted(c.elements())
['A', 'A', 'B', 'B', 'C', 'C']
# Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
>>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
>>> product = 1
>>> for factor in prime_factors.elements(): # loop over factors
... product *= factor # and multiply them
>>> product
1836
Note, if an element's count has been set to zero or is a negative
number, elements() will ignore it.
'''
# Emulate Bag.do from Smalltalk and Multiset.begin from C++.
return _chain.from_iterable(_starmap(_repeat, self.items()))
# Override dict methods where necessary
@classmethod
def fromkeys(cls, iterable, v=None):
# There is no equivalent method for counters because setting v=1
# means that no element can have a count greater than one.
raise NotImplementedError('Counter.fromkeys() is undefined. '
'Use Counter(iterable) instead.')
def update(self, iterable=None, **kwds):
'''Like dict.update() but add counts instead of replacing them.
Source can be an iterable, a dictionary, or another Counter
instance.
>>> c = Counter('which')
>>> c.update('witch') # add elements from another iterable
>>> d = Counter('watch')
>>> c.update(d) # add elements from another counter
>>> c['h'] # four 'h' in which, witch, and watch
4
'''
# The regular dict.update() operation makes no sense here because
# the replace behavior results in the some of original untouched
# counts being mixed-in with all of the other counts for a mismash
# that doesn't have a straight-forward interpretation in most
# counting contexts. Instead, we implement straight-addition.
# Both the inputs and outputs are allowed to contain zero and
# negative counts.
if iterable is not None:
if isinstance(iterable, Mapping):
if self:
self_get = self.get
for elem, count in iterable.items():
self[elem] = count + self_get(elem, 0)
else:
# fast path when counter is empty
super(Counter, self).update(iterable)
else:
_count_elements(self, iterable)
if kwds:
self.update(kwds)
def subtract(self, iterable=None, **kwds):
'''Like dict.update() but subtracts counts instead of replacing
them.
Counts can be reduced below zero. Both the inputs and outputs are
allowed to contain zero and negative counts.
Source can be an iterable, a dictionary, or another Counter
instance.
Examples::
>>> c = Counter('which')
Subtract elements from another iterable::
>>> c.subtract('witch')
Subtract elements from another counter::
>>> c.subtract(Counter('watch'))
2 in which, minus 1 in witch, minus 1 in watch::
>>> c['h']
0
1 in which, minus 1 in witch, minus 1 in watch::
>>> c['w']
-1
'''
if iterable is not None:
self_get = self.get
if isinstance(iterable, Mapping):
for elem, count in iterable.items():
self[elem] = self_get(elem, 0) - count
else:
for elem in iterable:
self[elem] = self_get(elem, 0) - 1
if kwds:
self.subtract(kwds)
def copy(self):
'Return a shallow copy.'
return self.__class__(self)
def __reduce__(self):
return self.__class__, (dict(self),)
def __delitem__(self, elem):
'''Like dict.__delitem__() but does not raise KeyError for missing
values.'''
if elem in self:
super(Counter, self).__delitem__(elem)
def __repr__(self):
if not self:
return '%s()' % self.__class__.__name__
try:
items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
return '%s({%s})' % (self.__class__.__name__, items)
except TypeError:
# handle case where values are not orderable
return '{0}({1!r})'.format(self.__class__.__name__, dict(self))
# Multiset-style mathematical operations discussed in:
# Knuth TAOCP Volume II section 4.6.3 exercise 19
# and at http://en.wikipedia.org/wiki/Multiset
#
# Outputs guaranteed to only include positive counts.
#
# To strip negative and zero counts, add-in an empty counter:
# c += Counter()
def __add__(self, other):
'''Add counts from two counters.
>>> Counter('abbb') + Counter('bcc')
Counter({'b': 4, 'c': 2, 'a': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
result = Counter()
for elem, count in self.items():
newcount = count + other[elem]
if newcount > 0:
result[elem] = newcount
for elem, count in other.items():
if elem not in self and count > 0:
result[elem] = count
return result
def __sub__(self, other):
''' Subtract count, but keep only results with positive counts.
>>> Counter('abbbc') - Counter('bccd')
Counter({'b': 2, 'a': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
result = Counter()
for elem, count in self.items():
newcount = count - other[elem]
if newcount > 0:
result[elem] = newcount
for elem, count in other.items():
if elem not in self and count < 0:
result[elem] = 0 - count
return result
def __or__(self, other):
'''Union is the maximum of value in either of the input counters.
>>> Counter('abbb') | Counter('bcc')
Counter({'b': 3, 'c': 2, 'a': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
result = Counter()
for elem, count in self.items():
other_count = other[elem]
newcount = other_count if count < other_count else count
if newcount > 0:
result[elem] = newcount
for elem, count in other.items():
if elem not in self and count > 0:
result[elem] = count
return result
def __and__(self, other):
''' Intersection is the minimum of corresponding counts.
>>> Counter('abbb') & Counter('bcc')
Counter({'b': 1})
'''
if not isinstance(other, Counter):
return NotImplemented
result = Counter()
for elem, count in self.items():
other_count = other[elem]
newcount = count if count < other_count else other_count
if newcount > 0:
result[elem] = newcount
return result
def __pos__(self):
'''Adds an empty counter, effectively stripping negative and zero
counts.'''
return self + Counter()
def __neg__(self):
'''Subtracts from an empty counter. Strips positive and zero
counts, and flips the sign on negative counts.
'''
return Counter() - self
def _keep_positive(self):
'''Internal method to strip elements with a negative or zero
count.'''
nonpositive = [elem for elem, count in self.items()
if not count > 0]
for elem in nonpositive:
del self[elem]
return self
def __iadd__(self, other):
'''Inplace add from another counter, keeping only positive counts.
>>> c = Counter('abbb')
>>> c += Counter('bcc')
>>> c
Counter({'b': 4, 'c': 2, 'a': 1})
'''
for elem, count in other.items():
self[elem] += count
return self._keep_positive()
def __isub__(self, other):
'''Inplace subtract counter, but keep only results with positive
counts.
>>> c = Counter('abbbc')
>>> c -= Counter('bccd')
>>> c
Counter({'b': 2, 'a': 1})
'''
for elem, count in other.items():
self[elem] -= count
return self._keep_positive()
def __ior__(self, other):
'''Inplace union is the maximum of value from either counter.
>>> c = Counter('abbb')
>>> c |= Counter('bcc')
>>> c
Counter({'b': 3, 'c': 2, 'a': 1})
'''
for elem, other_count in other.items():
count = self[elem]
if other_count > count:
self[elem] = other_count
return self._keep_positive()
def __iand__(self, other):
'''Inplace intersection is the minimum of corresponding counts.
>>> c = Counter('abbb')
>>> c &= Counter('bcc')
>>> c
Counter({'b': 1})
'''
for elem, count in self.items():
other_count = other[elem]
if other_count < count:
self[elem] = other_count
return self._keep_positive()
### ;; end of Python 3.3 backport
[docs]class StackedDict(OpenDictMixin, SmartDictMixin, MutableMapping):
'''A multi-level mapping.
A level is entered by using the :meth:`push` and is leaved by calling
:meth:`pop`.
The property :attr:`level` returns the actual number of levels.
When accessing keys they are searched from the latest level "upwards", if
such a key does not exists in any level a KeyError is raised.
Deleting a key only works in the *current level*; if it's not defined there
a KeyError is raised. This means that you can't delete keys from the upper
levels without :func:`popping <pop>`.
Setting the value for key, sets it in the current level.
.. versionchanged:: 1.5.2 Based on the newly introduced :class:`ChainMap`.
'''
__slots__ = (safe.slot('inner', ChainMap),
safe.slot(OpenDictMixin.__cache_name__, dict))
def __init__(self, *args, **kwargs):
# Each data item is stored as {key: {level: value, ...}}
self.update(*args, **kwargs)
@property
def level(self):
'''Return the current level number.
The first level is 0. Calling :meth:`push` increases the current
level (and returns it), while calling :meth:`pop` decreases the
current level (if possible).
'''
return len(self.inner.maps) - 1
[docs] def push_level(self, *args, **kwargs):
'''Pushes a whole new level to the stacked dict.
:param args: Several mappings from which the new level will be
initialled filled.
:param kwargs: Values to fill the new level.
:returns: The pushed :attr:`level` number.
'''
self.inner = self.inner.new_child()
self.update(*args, **kwargs)
return self.level
@deprecated(push_level)
[docs] def push(self, *args, **kwargs):
'''Don't use thid method, use new `push_level`:meth: instead.'''
return self.push_level(*args, **kwargs)
[docs] def pop(self, *args):
'''Remove this, always use original `MutableMapping.pop`:meth:.
If none arguments are given, `pop_level`:meth: is called and a
deprecation warning is printed in `sys.stderr` the first time. If one
or two arguments are given, those are interpreted as (key, default)
values of the super class `pop`:meth:.
'''
if len(args) == 0:
cls = type(self)
if not hasattr(cls, '_bad_pop_called'):
import warnings
setattr(cls, '_bad_pop_called', True)
msg = ('Use `pop` method without parameters is deprecated, '
'use `pop_level` instead')
warnings.warn(msg, stacklevel=2)
return self.pop_level()
else:
return super(StackedDict, self).pop(*args)
[docs] def pop_level(self):
'''Pops the last pushed level and returns the whole level.
If there are no levels in the stacked dict, a TypeError is raised.
:returns: A dict containing the poped level.
'''
if self.level > 0:
stack = self.inner
res = stack.maps[0]
self.inner = stack.parents
return res
else:
raise TypeError('Cannot pop from StackedDict without any levels')
[docs] def peek(self):
'''Peeks the top level of the stack.
Returns a copy of the top-most level without any of the keys from
lower levels.
Example::
>>> sdict = StackedDict(a=1, b=2)
>>> sdict.push(c=3) # it returns the level...
1
>>> sdict.peek()
{'c': 3}
'''
return dict(self.inner.maps[0])
def __str__(self):
# TODO: Optimize
return str(dict(self))
def __repr__(self):
return '%s(%s)' % (type(self).__name__, str(self))
def __len__(self):
return len(self.inner)
def __iter__(self):
return iter(self.inner)
def __getitem__(self, key):
return self.inner[key]
def __setitem__(self, key, value):
self.inner[key] = value
def __delitem__(self, key):
del self.inner[key]
[docs]class OrderedSmartDict(SmartDictMixin, OrderedDict):
'''A combination of the `OrderedDict` with the `SmartDictMixin`.
.. warning:: Initializing with kwargs does not ensure any initial ordering,
since Python's keyword dict is not ordered. Use a list/tuple
of pairs instead.
'''
def __init__(self, *args, **kwds):
'''Initialize an ordered dictionary.
The signature is the same as regular dictionaries, but keyword
arguments are not recommended because their insertion order is
arbitrary.
'''
super(OrderedSmartDict, self).__init__()
self.update(*args, **kwds)
class MetaSet(type):
'''Allow syntax sugar creating sets.
This is pythonic syntax (stop limit is never included), for example::
>>> from xoutil.collections import PascalSet as srange
>>> [i for i in srange[1:4, 15, 20:23]]
[1, 2, 3, 15, 20, 21, 22, 23]
'''
def __getitem__(cls, ranges):
return cls(*ranges) if isinstance(ranges, tuple) else cls(ranges)
[docs]class PascalSet(object, metaclass(MetaSet)):
'''Collection of unique integer elements (implemented with intervals).
::
PascalSet(*others) -> new set object
.. versionadded:: 1.7.0
'''
__slots__ = ('_items',)
def __init__(self, *others):
'''Initialize self.
:param others: Any number of integer or collection of integers that
will be the set members.
'''
self._items = [] # a list of list of two elements
self.update(*others)
def __str__(self):
from xoutil.eight import range
def aux(s, e):
if s == e:
return str(s)
elif s + 1 == e:
return '%s, %s' % (s, e)
else:
return '%s..%s' % (s, e)
l = self._items
ranges = ((l[i], l[i + 1]) for i in range(0, len(l), 2))
return str('{%s}' % ', '.join((aux(s, e) for (s, e) in ranges)))
def __repr__(self):
cls = type(self)
cname = cls.__name__
return str('%s([%s])' % (cname, ', '.join((str(i) for i in self))))
def __iter__(self):
l = self._items
i, count = 0, len(l)
while i < count:
s, e = l[i], l[i + 1]
while s <= e:
yield s
s += 1
i += 2
def __len__(self):
res = 0
l = self._items
i, count = 0, len(l)
while i < count:
res += l[i + 1] - l[i] + 1
i += 2
return res
def __nonzero__(self):
return bool(self._items)
__bool__ = __nonzero__
def __contains__(self, other):
'''True if set has an element ``other``, else False.'''
from xoutil.eight import integer_types
return isinstance(other, integer_types) and self._search(other)[0]
def __hash__(self):
'''Compute the hash value of a set.'''
return Set._hash(self)
if _py2:
def __cmp__(self, other):
# Python 3 automatically generate a TypeError when no mechanism is
# found by returning `NotImplemented` special value. In Python 2
# this patch method must be generated
from xoutil.eight import typeof
sname = typeof(self).__name__
oname = typeof(other).__name__
msg = 'unorderable types: "%s" and "%s"!'
raise TypeError(msg % (sname, oname))
def __eq__(self, other):
'''Python 2 and 3 have several differences in operator definitions.
For example::
>>> from xoutil.collections import PascalSet
>>> s1 = PascalSet[0:10]
>>> assert s1 == set(s1) # OK (True) in 2 and 3
>>> assert set(s1) == s1 # OK in 3, fails in 2
'''
if isinstance(other, Set):
ls, lo = len(self), len(other)
if ls == lo:
if isinstance(other, PascalSet):
return self._items == other._items
else:
return self.count(other) == ls
else:
return False
else:
return False
def __ne__(self, other):
return not self.__eq__(other)
def __gt__(self, other):
if isinstance(other, Set):
if other:
return self.issuperset(other) and len(self) > len(other)
else:
return bool(self._items)
else:
return NotImplemented
def __ge__(self, other):
if isinstance(other, Set):
if other:
return self.issuperset(other)
else:
return bool(self._items)
else:
return NotImplemented
def __lt__(self, other):
if isinstance(other, Set):
if other:
return self.issubset(other) and len(self) < len(other)
else:
return not self._items
else:
return NotImplemented
def __le__(self, other):
if isinstance(other, Set):
if other:
return self.issubset(other)
else:
return not self._items
else:
return NotImplemented
def __sub__(self, other):
if isinstance(other, Set):
return self.difference(other)
else:
return NotImplemented
def __isub__(self, other):
if isinstance(other, Set):
self.difference_update(other)
return self
else:
return NotImplemented
def __rsub__(self, other):
if isinstance(other, Set):
return other - type(other)(self)
else:
return NotImplemented
def __and__(self, other):
if isinstance(other, Set):
return self.intersection(other)
else:
return NotImplemented
def __iand__(self, other):
if isinstance(other, Set):
self.intersection_update(other)
return self
else:
return NotImplemented
def __rand__(self, other):
if isinstance(other, Set):
return other & type(other)(self)
else:
return NotImplemented
def __or__(self, other):
if isinstance(other, Set):
return self.union(other)
else:
return NotImplemented
def __ior__(self, other):
if isinstance(other, Set):
self.update(other)
return self
else:
return NotImplemented
def __ror__(self, other):
if isinstance(other, Set):
return other | type(other)(self)
else:
return NotImplemented
def __xor__(self, other):
if isinstance(other, Set):
return self.symmetric_difference(other)
else:
return NotImplemented
def __ixor__(self, other):
if isinstance(other, Set):
self.symmetric_difference_update(other)
return self
else:
return NotImplemented
def __rxor__(self, other):
if isinstance(other, Set):
return other ^ type(other)(self)
else:
return NotImplemented
def count(self, other):
'''Number of occurrences of any member of other in this set.
If other is an integer, return 1 if present, 0 if not.
'''
from xoutil.eight import integer_types
if isinstance(other, integer_types):
return 1 if other in self else 0
else:
return sum((i in self for i in other), 0)
def add(self, other):
'''Add an element to a set.
This has no effect if the element is already present.
'''
self._insert(other)
def union(self, *others):
'''Return the union of sets as a new set.
(i.e. all elements that are in either set.)
'''
res = self.copy()
res.update(*others)
return res
def update(self, *others):
'''Update a set with the union of itself and others.'''
from xoutil.eight import integer_types, range
for other in others:
if isinstance(other, PascalSet):
l = other._items
if self._items:
count = len(l)
i = 0
while i < count:
self._insert(l[i], l[i + 1])
i += 2
else:
self._items = l[:]
elif isinstance(other, integer_types):
self._insert(other)
elif isinstance(other, Iterable):
for i in other:
self._insert(i)
elif isinstance(other, slice):
start, stop, step = other.start, other.stop, other.step
if step is None:
step = 1
if step in (1, -1):
stop -= step
if step == -1:
start, stop = stop, start
self._insert(start, stop)
else:
for i in range(start, stop, step):
self._insert(i)
else:
raise self._invalid_value(other)
def intersection(self, *others):
'''Return the intersection of two or more sets as a new set.
(i.e. elements that are common to all of the sets.)
'''
res = self.copy()
res.intersection_update(*others)
return res
def intersection_update(self, *others):
'''Update a set with the intersection of itself and another.'''
from xoutil.eight import integer_types as ints
l = self._items
oi, count = 0, len(others)
while l and oi < count:
other = others[oi]
if not isinstance(other, PascalSet):
# safe mode for intersection
other = PascalSet(i for i in other if isinstance(i, ints))
o = other._items
if o:
sl, el = l[0], l[-1]
so, eo = o[0], o[-1]
if sl < so:
self._remove(sl, so - 1)
if eo < el:
self._remove(eo + 1, el)
i = 2
while l and i < len(o):
s, e = o[i - 1] + 1, o[i] - 1
if s <= e:
self._remove(s, e)
i += 2
else:
del l[:]
oi += 1
def difference(self, *others):
'''Return the difference of two or more sets as a new set.
(i.e. all elements that are in this set but not the others.)
'''
res = self.copy()
res.difference_update(*others)
return res
def difference_update(self, *others):
'''Remove all elements of another set from this set.'''
for other in others:
if isinstance(other, PascalSet):
l = other._items
count = len(l)
i = 0
while i < count:
self._remove(l[i], l[i + 1])
i += 2
else:
from xoutil.eight import integer_types
for i in other:
if isinstance(i, integer_types):
self._remove(i)
def symmetric_difference(self, other):
'''Return the symmetric difference of two sets as a new set.
(i.e. all elements that are in exactly one of the sets.)
'''
res = self.copy()
res.symmetric_difference_update(other)
return res
def symmetric_difference_update(self, other):
'Update a set with the symmetric difference of itself and another.'
if not isinstance(other, PascalSet):
other = PascalSet(other)
if self:
if other:
# TODO: Implement more efficiently
aux = other - self
self -= other
self |= aux
else:
self._items = other._items[:]
def discard(self, other):
'''Remove an element from a set if it is a member.
If the element is not a member, do nothing.
'''
self._remove(other)
def remove(self, other):
'''Remove an element from a set; it must be a member.
If the element is not a member, raise a KeyError.
'''
if other in self:
self._remove(other)
else:
raise KeyError('"%s" is not a member!' % other)
def pop(self):
'''Remove and return an arbitrary set element.
Raises KeyError if the set is empty.
'''
l = self._items
if l:
res = l[0]
if l[0] < l[1]:
l[0] += 1
else:
del l[0:2]
return res
else:
raise KeyError('pop from an empty set!')
def clear(self):
'''Remove all elements from this set.'''
self._items = []
def copy(self):
'''Return a shallow copy of a set.'''
return type(self)(self)
def isdisjoint(self, other):
'''Return True if two sets have a null intersection.'''
if isinstance(other, PascalSet):
if self and other:
l, o = self._items, other._items
i, lcount, ocount = 0, len(l), len(o)
maybe = True
while maybe and i < lcount:
found, idx = other._search(l[i])
if idx == ocount: # exhausted
# assert not found
i = lcount
elif found or l[i + 1] >= o[idx]:
maybe = False
else:
i += 2
return maybe
else:
return True
else:
return not any(i in self for i in other)
def issubset(self, other):
'''Report whether another set contains this set.'''
ls = len(self)
if isinstance(other, PascalSet):
if self:
if ls > len(other): # Fast check for obvious cases
return False
else:
l, o = self._items, other._items
i, lcount = 0, len(l)
maybe = True
while maybe and i < lcount:
found, idx = other._search(l[i])
if found and l[i + 1] <= o[idx + 1]:
i += 2
else:
maybe = False
return maybe
else:
return True
elif isinstance(other, Sized) and ls > len(other):
# Fast check for obvious cases
return False
elif isinstance(other, Container):
aux = next((i for i in self if i not in other), Unset)
return aux is Unset
else:
# Generator cases
from operator import add
from functools import reduce
lo = reduce(add, (i in self for i in other), 0)
return lo == ls
def issuperset(self, other):
'''Report whether this set contains another set.'''
ls = len(self)
if isinstance(other, PascalSet):
if other:
if ls < len(other): # Fast check for obvious cases
return False
else:
l, o = self._items, other._items
i, ocount = 0, len(o)
maybe = True
while maybe and i < ocount:
found, idx = self._search(o[i])
if found and o[i + 1] <= l[idx + 1]:
i += 2
else:
maybe = False
return maybe
else:
return True
elif isinstance(other, Sized) and ls < len(other):
# Fast check for obvious cases
return False
else:
aux = next((i for i in other if i not in self), Unset)
return aux is Unset
def _search(self, other):
'''Search the pair where ``other`` is placed.
Return a duple :``(if found or not, index)``.
'''
from xoutil.eight import integer_types
if isinstance(other, integer_types):
l = self._items
start, end = 0, len(l)
res, pivot = False, 2*(end // 4)
while not res and start < end:
s, e = l[pivot], l[pivot + 1]
if other < s:
end = pivot
elif other > e:
start = pivot + 2
else:
res = True
pivot = start + 2*((end - start) // 4)
return res, pivot
else:
raise self._invalid_value(other)
def _insert(self, start, end=None):
'''Insert an interval of integers.'''
if not end:
end = start
assert start <= end
l = self._items
count = len(l)
found, idx = self._search(start)
if not found:
if idx > 0 and start == l[idx - 1] + 1:
found = True
idx -= 2
l[idx + 1] = start
if idx < count - 2 and end == l[idx + 2] - 1:
end = l[idx + 3]
elif idx < count and end >= l[idx] - 1:
found = True
l[idx] = start
if found:
while end > l[idx + 1]:
if idx < count - 2 and end >= l[idx + 2] - 1:
if end <= l[idx + 3]:
l[idx + 1] = l[idx + 3]
del l[idx + 2:idx + 4]
count -= 2
else:
l[idx + 1] = end
else:
if idx < count:
l.insert(idx, start)
l.insert(idx + 1, end)
else:
l.extend((start, end))
count += 2
def _remove(self, start, end=None):
'''Remove an interval of integers.'''
if not end:
end = start
assert start <= end
l = self._items
sfound, sidx = self._search(start)
efound, eidx = self._search(end)
if sfound and efound and sidx == eidx:
first = l[sidx] < start
last = l[eidx + 1] > end
if first and last:
l.insert(eidx + 1, end + 1)
l.insert(sidx + 1, start - 1)
elif first:
l[sidx + 1] = start - 1
elif last:
l[eidx] = end + 1
else:
del l[sidx:eidx + 2]
else:
if sfound and l[sidx] < start:
l[sidx + 1] = start - 1
sidx += 2
if efound:
if l[eidx + 1] > end:
l[eidx] = end + 1
else:
eidx += 2
if sidx < eidx:
del l[sidx:eidx]
def _invalid_value(self, value):
from xoutil.eight import typeof
cls_name = typeof(self).__name__
vname = typeof(value).__name__
msg = ('Unsupported type for value "%s" of type "%s" for a "%s", '
'must be an integer!')
return TypeError(msg % (value, vname, cls_name))
@classmethod
def _prime_numbers_until(cls, limit):
'''This is totally a funny test method.'''
from xoutil.eight import range
res = cls[2:limit]
for i in range(2, limit//2 + 1):
if i in res:
aux = i + i
while aux < limit:
if aux in res:
res.remove(aux)
aux += i
return res
MutableSet.register(PascalSet)
[docs]class BitPascalSet(object, metaclass(MetaSet)):
'''Collection of unique integer elements (implemented with bit-wise sets).
::
BitPascalSet(*others) -> new bit-set object
.. versionadded:: 1.7.0.
'''
__slots__ = ('_items',)
_bit_length = 62 # How many values are stored in each item
def __init__(self, *others):
'''Initialize self.
:param others: Any number of integer or collection of integers that
will be the set members.
In this case `_items` is a dictionary with keys containing number
division seeds and values bit-wise integers (each bit is the division
modulus position).
'''
self._items = {}
self.update(*others)
def __str__(self):
if self:
return str(PascalSet(self))
else:
cname = type(self).__name__
return str('%s([])') % cname
def __repr__(self):
cname = type(self).__name__
res = str(', ').join(str(i) for i in self)
return str('%s([%s])') % (cname, res)
def __iter__(self):
bl = self._bit_length
sm = self._items
for k in sorted(sm):
v = sm[k]
base = k*bl
i = 0
ref = 1
while i < bl:
if ref & v:
yield base + i
ref <<= 1
i += 1
def __len__(self):
return sum((1 for i in self), 0)
def __nonzero__(self):
return bool(self._items)
__bool__ = __nonzero__
def __contains__(self, other):
'''True if this bit-set has the element ``other``, else False.'''
res = self._search(other)
if res:
k, ref, v = res
return bool(v & (1 << ref))
else:
return False
def __hash__(self):
'''Compute the hash value of a set.'''
return Set._hash(self)
if _py2:
def __cmp__(self, other):
# Python 3 automatically generate a TypeError when no mechanism is
# found by returning `NotImplemented` special value. In Python 2
# this patch method must be generated
from xoutil.eight import typeof
sname = typeof(self).__name__
oname = typeof(other).__name__
msg = 'unorderable types: "%s" and "%s"!'
raise TypeError(msg % (sname, oname))
def __eq__(self, other):
'''Python 2 and 3 have several differences in operator definitions.
For example::
>>> from xoutil.collections import BitPascalSet
>>> s1 = BitPascalSet[0:10]
>>> assert s1 == set(s1) # OK (True) in 2 and 3
>>> assert set(s1) == s1 # OK in 3, fails in 2
'''
if isinstance(other, Set):
if isinstance(other, BitPascalSet):
return self._items == other._items
else:
ls, lo = len(self), len(other)
return ls == lo == self.count(other)
else:
return False
def __ne__(self, other):
return not self.__eq__(other)
def __gt__(self, other):
if isinstance(other, Set):
if other:
return self.issuperset(other) and len(self) > len(other)
else:
return bool(self._items)
else:
return NotImplemented
def __ge__(self, other):
if isinstance(other, Set):
return self.issuperset(other) if other else bool(self._items)
else:
return NotImplemented
def __lt__(self, other):
if isinstance(other, Set):
if other:
return self.issubset(other) and len(self) < len(other)
else:
return not self._items
else:
return NotImplemented
def __le__(self, other):
if isinstance(other, Set):
return self.issubset(other) if other else not self._items
else:
return NotImplemented
def __sub__(self, other):
if isinstance(other, Set):
return self.difference(other)
else:
return NotImplemented
def __isub__(self, other):
if isinstance(other, Set):
self.difference_update(other)
return self
else:
return NotImplemented
def __rsub__(self, other):
if isinstance(other, Set):
return other - type(other)(self)
else:
return NotImplemented
def __and__(self, other):
if isinstance(other, Set):
return self.intersection(other)
else:
return NotImplemented
def __iand__(self, other):
if isinstance(other, Set):
self.intersection_update(other)
return self
else:
return NotImplemented
def __rand__(self, other):
if isinstance(other, Set):
return other & type(other)(self)
else:
return NotImplemented
def __or__(self, other):
if isinstance(other, Set):
return self.union(other)
else:
return NotImplemented
def __ior__(self, other):
if isinstance(other, Set):
self.update(other)
return self
else:
return NotImplemented
def __ror__(self, other):
if isinstance(other, Set):
return other | type(other)(self)
else:
return NotImplemented
def __xor__(self, other):
if isinstance(other, Set):
return self.symmetric_difference(other)
else:
return NotImplemented
def __ixor__(self, other):
if isinstance(other, Set):
self.symmetric_difference_update(other)
return self
else:
return NotImplemented
def __rxor__(self, other):
if isinstance(other, Set):
return other ^ type(other)(self)
else:
return NotImplemented
def count(self, other):
'''Number of occurrences of any member of other in this set.
If other is an integer, return 1 if present, 0 if not.
'''
from xoutil.eight import integer_types
if isinstance(other, integer_types):
return 1 if other in self else 0
else:
return sum((i in self for i in other), 0)
def add(self, other):
'''Add an element to a bit-set.
This has no effect if the element is already present.
'''
self._insert(other)
def union(self, *others):
'''Return the union of bit-sets as a new set.
(i.e. all elements that are in either set.)
'''
res = self.copy()
res.update(*others)
return res
def update(self, *others):
'''Update a bit-set with the union of itself and others.'''
from xoutil.eight import integer_types, range
for other in others:
if isinstance(other, BitPascalSet):
sm = self._items
om = other._items
for k, v in safe_dict_iter(om).items():
if k in sm:
sm[k] |= v
else:
sm[k] = v
elif isinstance(other, integer_types):
self._insert(other)
elif isinstance(other, Iterable):
for i in other:
self._insert(i)
elif isinstance(other, slice):
start, stop, step = other.start, other.stop, other.step
if step is None:
step = 1
for i in range(start, stop, step):
self._insert(i)
else:
raise self._invalid_value(other)
def intersection(self, *others):
'''Return the intersection of two or more bit-sets as a new set.
(i.e. elements that are common to all of the sets.)
'''
res = self.copy()
res.intersection_update(*others)
return res
def intersection_update(self, *others):
'''Update a bit-set with the intersection of itself and another.'''
from xoutil.eight import integer_types as ints
sm = self._items
oi, count = 0, len(others)
while sm and oi < count:
other = others[oi]
if not isinstance(other, BitPascalSet):
# safe mode for intersection
other = PascalSet(i for i in other if isinstance(i, ints))
om = other._items
for k, v in safe_dict_iter(sm).items():
v &= om.get(k, 0)
if v:
sm[k] = v
else:
del sm[k]
oi += 1
def difference(self, *others):
'''Return the difference of two or more bit-sets as a new set.
(i.e. all elements that are in this set but not the others.)
'''
res = self.copy()
res.difference_update(*others)
return res
def difference_update(self, *others):
'''Remove all elements of another bit-set from this set.'''
for other in others:
if isinstance(other, BitPascalSet):
sm = self._items
om = other._items
for k, v in safe_dict_iter(om).items():
if k in sm:
v = sm[k] & ~v
if v:
sm[k] = v
else:
del sm[k]
else:
from xoutil.eight import integer_types
for i in other:
if isinstance(i, integer_types):
self._remove(i)
def symmetric_difference(self, other):
'''Return the symmetric difference of two bit-sets as a new set.
(i.e. all elements that are in exactly one of the sets.)
'''
res = self.copy()
res.symmetric_difference_update(other)
return res
def symmetric_difference_update(self, other):
'Update a bit-set with the symmetric difference of itself and another.'
if not isinstance(other, BitPascalSet):
other = BitPascalSet(other)
if self:
if other:
# TODO: Implement more efficiently
aux = other - self
self -= other
self |= aux
else:
self._items = other._items[:]
def discard(self, other):
'''Remove an element from a bit-set if it is a member.
If the element is not a member, do nothing.
'''
self._remove(other)
def remove(self, other):
'''Remove an element from a bit-set; it must be a member.
If the element is not a member, raise a KeyError.
'''
self._remove(other, fail=True)
def pop(self):
'''Remove and return an arbitrary bit-set element.
Raises KeyError if the set is empty.
'''
from xoutil.eight import iteritems
sm = self._items
if sm:
bl = self._bit_length
k, v = next(iteritems(sm))
assert v
base = k*bl
i = 0
ref = 1
res = None
while res is None:
if ref & v:
res = base + i
else:
ref <<= 1
i += 1
v &= ~ref
if v:
sm[k] = v
else:
del sm[k]
return res
else:
raise KeyError('pop from an empty set!')
def clear(self):
'''Remove all elements from this bit-set.'''
self._items = {}
def copy(self):
'''Return a shallow copy of a set.'''
return type(self)(self)
def isdisjoint(self, other):
'''Return True if two bit-sets have a null intersection.'''
from xoutil.eight import iteritems
if isinstance(other, BitPascalSet):
sm, om = self._items, other._items
if sm and om:
return all(sm.get(k, 0) & v == 0 for k, v in iteritems(om))
else:
return True
else:
return not any(i in self for i in other)
def issubset(self, other):
'''Report whether another set contains this bit-set.'''
from xoutil.eight import iteritems
if isinstance(other, BitPascalSet):
sm, om = self._items, other._items
if sm:
return all(om.get(k, 0) & v == v for k, v in iteritems(sm))
else:
return True
elif isinstance(other, Container):
return not any(i not in other for i in self)
else:
# Generator cases
return sum((i in self for i in other), 0) == len(self)
def issuperset(self, other):
'''Report whether this bit set contains another set.'''
from xoutil.eight import iteritems
if isinstance(other, BitPascalSet):
sm, om = self._items, other._items
if om:
return all(sm.get(k, 0) & v == v for k, v in iteritems(om))
else:
return True
else:
return not any(i not in self for i in other)
def _search(self, other):
'''Search the bit-wise value where ``other`` could be placed.
Return a duple :``(seed, bits to shift left)``.
'''
from xoutil.eight import integer_types
if isinstance(other, integer_types):
sm = self._items
bl = self._bit_length
k, ref = divmod(other, bl)
return k, ref, sm.get(k, 0)
else:
return None
def _insert(self, other):
'''Add a member in this bit-set.'''
aux = self._search(other)
if aux:
k, ref, v = aux
self._items[k] = v | (1 << ref)
else:
raise self._invalid_value(other)
def _remove(self, other, fail=False):
'''Remove an interval of integers from this bit-set.'''
aux = self._search(other)
ok = False
if aux:
k, ref, v = aux
if v:
aux = v & ~(1 << ref)
if v != aux:
ok = True
sm = self._items
if aux:
sm[k] = aux
else:
del sm[k]
if not ok and fail:
raise KeyError('"%s" is not a member!' % other)
def _invalid_value(self, value):
from xoutil.eight import typeof
cls_name = typeof(self).__name__
vname = typeof(value).__name__
msg = ('Unsupported type for value "%s" of type "%s" for a "%s", '
'must be an integer!')
return TypeError(msg % (value, vname, cls_name))
@classmethod
def _prime_numbers_until(cls, limit):
'''This is totally a funny test method.'''
from xoutil.eight import range
res = cls[2:limit]
for i in range(2, limit//2 + 1):
if i in res:
aux = i + i
while aux < limit:
if aux in res:
res.remove(aux)
aux += i
return res
MutableSet.register(BitPascalSet)
# get rid of unused global variables
del slist, _py2, _py33, _py34, metaclass
del deprecated