Source code for xotl.tools.values.simple

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# ---------------------------------------------------------------------
# Copyright (c) Merchise Autrement [~º/~] and Contributors
# All rights reserved.
#
# This is free software; you can do what the LICENCE file allows you to.
#

"""Simple or internal coercers.

With coercers defined in this module, many of the `xotl.tools.string`:mod:
utilities could be deprecated.

In Python 3, all arrays, not only those containing valid byte or unicode
chars, are buffers.

"""

from xotl.tools.values import coercer, nil


[docs]@coercer def not_false_coercer(arg): """Validate that `arg` is not a false value. Python convention for values considered True or False is not used here, our false values are only `None` or any false instance of `xotl.tools.symbols.boolean`:class: (of course including `False` itself). """ from xotl.tools.symbols import boolean false = arg is None or (not arg and isinstance(arg, boolean)) return arg if not false else nil
[docs]def not_false(default): """Create a coercer that returns `default` if `arg` is considered false. See `not_false_coercer`:func: for more information on values considered false. """ @coercer def inner_coercer(arg): coercer = not_false_coercer return arg if coercer(arg) is arg else coercer(default) return inner_coercer
[docs]def isnot(value): """Create a coercer that returns `arg` if `arg` is not `value`.""" @coercer def inner_coercer(arg): return arg if arg is not value else nil return inner_coercer
[docs]@coercer def name_coerce(arg): """If `arg` is a named object, return its name, else `nil`. Object names are always of `str` type, other types are considered invalid. Generator objects has the special `__name__` attribute, but they are ignored and considered invalid. """ from types import GeneratorType if isinstance(arg, GeneratorType): return nil else: if isinstance(arg, (staticmethod, classmethod)): fn = getattr(arg, "__func__", None) if fn: arg = fn res = getattr(arg, "__name__", None) return res if isinstance(res, str) else nil
[docs]@coercer def iterable_coerce(arg): """Return the same argument if it is an iterable.""" from collections.abc import Iterable return arg if isinstance(arg, Iterable) else nil
[docs]def collection(arg=nil, avoid=(), force=False, base=None, name=None): """Coercer for logic collections. Inner coercer returns the same argument if it is a strict iterable. In Python, strings are normally iterables, but never in our logic. So:: >>> collection('abc') is nil True This function could directly check an argument if it isn't ``nil``, or returns a coercer using extra parameters: :param avoid: a type or tuple of extra types to ignore as valid collections; for example:: >>> collection(avoid=dict)({}) is nil True >>> collection()({}) is nil False :param force: if main argument is not a valid collection, it is are wrapped inner a list:: >>> collection(avoid=(dict,), force=True)({}) == [{}] True :param base: if not ``None``, must be the base to check instead of `~collections.abc.Iterable`:class:. :param name: decorate inner coercer with that function name. """ if not base: from collections.abc import Iterable as base if not isinstance(avoid, tuple): avoid = (avoid,) @coercer def collection_coerce(arg): invalid = (str,) + avoid ok = not isinstance(arg, invalid) and isinstance(arg, base) return arg if ok else ([arg] if force else nil) if arg is nil: doc = ( "Return the same argument if it is a strict iterable.\n " "Strings{} are not considered valid iterables in this case.\n" ).format(" and {}".format(avoid) if avoid else "") if force: doc += " A non iterable argument is wrapped in a list.\n" collection_coerce.__doc__ = doc del doc if name: collection_coerce.__name__ = name return collection_coerce else: assert not name return collection_coerce(arg)
from collections.abc import Mapping, Sequence # noqa logic_iterable_coerce = collection(name="logic_iterable_coerce") force_iterable_coerce = collection(force=True, name="force_iterable_coerce") logic_collection_coerce = collection(avoid=Mapping, name="logic_collection_coerce") force_collection_coerce = collection(avoid=Mapping, force=True, name="force_collection_coerce") logic_sequence_coerce = collection(avoid=Mapping, base=Sequence, name="logic_sequence_coerce") force_sequence_coerce = collection( avoid=Mapping, force=True, base=Sequence, name="force_sequence_coerce" ) del Mapping, Sequence
[docs]@coercer def decode_coerce(arg): """Decode objects implementing the buffer protocol.""" import locale encoding = locale.getpreferredencoding() or "UTF-8" decode = getattr(arg, "decode", None) if callable(decode): try: res = decode(encoding, "replace") if not isinstance(res, str): res = None except Exception: res = None else: res = None if res is None: try: # TODO: All arrays are decoded, and not only those containing # valid byte or unicode characters. import codecs res = codecs.decode(arg, encoding, "replace") except Exception: res = nil return res
[docs]@coercer def encode_coerce(arg): """Encode string objects.""" import locale encoding = locale.getpreferredencoding() or "UTF-8" encode = getattr(arg, "encode", None) if callable(encode): try: res = encode(encoding, "replace") if not isinstance(res, bytes): res = None except Exception: res = None else: res = None if res is None: try: import codecs res = codecs.encode(arg, encoding, "replace") except Exception: res = nil return res
[docs]@coercer def unicode_coerce(arg): """Decode a buffer or any object returning unicode text. Uses the defined `encoding` system value. In Python 2.x unicode has a special type different to `str` but in Python 3 coincide with `str` type. Name is used in named objects, see `name_coerce`:func: for more information. See `str_coerce`:func: to coerce to standard string type, `bytes` in Python 2.x and unicode (`str`) in Python 3. .. versionadded:: 1.7.0 """ from array import array aux = name_coerce(arg) if aux is not nil: arg = aux if isinstance(arg, str): return arg elif isinstance(arg, bytearray): arg = bytes(arg) elif isinstance(arg, memoryview): arg = arg.tobytes() elif isinstance(arg, array): try: return arg.tounicode() except Exception: try: arg = bytes(bytearray(arg.tolist())) except Exception: arg = str(arg) return arg res = decode_coerce(arg) return str(arg) if res is nil else res
[docs]@coercer def bytes_coerce(arg): """Encode an unicode string (or any object) returning a bytes buffer. Uses the defined `encoding` system value. In Python 2.x `bytes` coincide with `str` type, in Python 3 `str` uses unicode and `str` is different to `bytes`. There are differences if you want to obtain a buffer in Python 2.x and Python 3; for example, the following code obtain different results:: >>> ba = bytes([65, 66, 67]) In Python 2.x is obtained the string ``"[65, 66, 67]"`` and in Python 3 ``b"ABC"``. This function normalize these differences. Name is used in named objects, see `name_coerce`:func: for more information. See `str_coerce`:func: to coerce to standard string type, `bytes` in Python 2.x and unicode (`str`) in Python 3. Always returns the `bytes` type. .. versionadded:: 1.7.0 """ from array import array aux = name_coerce(arg) if aux is not nil: arg = aux if isinstance(arg, bytes): return arg elif isinstance(arg, bytearray): return bytes(arg) elif isinstance(arg, memoryview): return arg.tobytes() elif isinstance(arg, array): try: arg = arg.tounicode() except Exception: try: return bytes(bytearray(arg.tolist())) except Exception: arg = str(arg) res = encode_coerce(arg) return encode_coerce(str(arg)) if res is nil else res
[docs]@coercer def str_coerce(arg): """Coerce to standard string type. `bytes` in Python 2.x and unicode (`str`) in Python 3. .. versionadded:: 1.7.0 .. deprecated:: 2.0.6 """ return unicode_coerce(arg)
[docs]@coercer def ascii_coerce(arg): """Coerce to string containing only ASCII characters. Convert all non-ascii to valid characters using unicode 'NFKC' normalization. """ import unicodedata if not isinstance(arg, str): arg = unicode_coerce(arg) res = unicodedata.normalize("NFKD", arg).encode("ascii", "ignore") return str_coerce(res)
[docs]@coercer def ascii_set_coerce(arg): """Coerce to string with only ASCII characters removing repetitions. Convert all non-ascii to valid characters using unicode 'NFKC' normalization. """ return "".join(set(ascii_coerce(arg)))
[docs]@coercer def lower_ascii_coerce(arg): """Coerce to string containing only lower-case ASCII characters. Convert all non-ascii to valid characters using unicode 'NFKC' normalization. """ return ascii_coerce(arg).lower()
[docs]@coercer def lower_ascii_set_coerce(arg): """Coerce to string with only lower-case ASCII chars removing repetitions. Convert all non-ascii to valid characters using unicode 'NFKC' normalization. """ return "".join(set(lower_ascii_coerce(arg)))
[docs]@coercer def chars_coerce(arg): """Convert to unicode characters. If `arg` is an integer between ``0`` and ``0x10ffff`` is converted assuming it as ordinal unicode code, else is converted with `unicode_coerce`:meth:. """ if isinstance(arg, int) and 0 <= arg <= 0x10FFFF: return chr(arg) else: return unicode_coerce(arg)
[docs]@coercer def strict_string_coerce(arg): """Coerce to string only if argument is a valid string type.""" return str_coerce(arg) if isinstance(arg, str) else nil
# TODO: Why is this here
[docs]class text(str): """Return a nice text representation of one object. text(obj='') -> text text(bytes_or_buffer[, encoding[, errors]]) -> text Create a new string object from the given object. If `encoding` or `errors` is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object text representation. :param encoding: defaults to ``sys.getdefaultencoding()``. :param errors: defaults to 'strict'. Method join is improved, in order to receive any collection of objects, as variable number of arguments or as one iterable. """ def __new__(cls, obj="", *args, **kwargs): if not (args or kwargs): obj = unicode_coerce(obj) return super().__new__(cls, obj, *args, **kwargs)
[docs] def join(self, *args): """S.join(variable_number_args or iterable) -> text Return a text which is the concatenation of the objects (converted to text) in argument items. The separator between elements is `S`. See `chr_join`:meth: for other vertion of this functionality. """ return self._join(unicode_coerce, args)
[docs] def chr_join(self, *args): """S.chr_join(variable_number_args or iterable) -> text Return a text which is the concatenation of the objects (converted to text) in argument items. The separator between elements is `S`. Difference with `join`:meth: is that integers between ``0`` and ``0x10ffff`` are converted to characters as unicode ordinal. """ return self._join(chars_coerce, args)
def _join(self, coercer, args): """Protected method to implement `join`:meth: and `chr_join`:meth:.""" from collections.abc import Iterable if len(args) == 1 and isinstance(args[0], Iterable): args = args[0] return super().join(coercer(obj) for obj in args)