Source code for xotl.tools.future.codecs

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# ----------------------------------------------------------------------
# Copyright (c) Merchise Autrement [~º/~] and Contributors
# All rights reserved.
#
# This is free software; you can do what the LICENCE file allows you to.
#

"""Codec registry, base classes and tools.

In this module, some additions for `codecs` standard module.

"""


from codecs import *  # noqa
from codecs import __all__  # noqa

__all__ = list(__all__)


[docs]def force_encoding(encoding=None): """Validates an encoding value. If `encoding` is None use `locale.getdefaultlocale`:func:. If that is also none, return 'UTF-8'. .. versionadded:: 1.2.0 .. versionchanged:: 1.8.0 migrated to 'future.codecs' .. versionchanged:: 1.8.7 Stop using `locale.getpreferrededencoding`:func: and improve documentation. """ # TODO: This mechanism is tricky, we must find out how to unroll the mess # involving the concept of which encoding to use by default: # # - locale.getlocale(): In Python 2 returns ``(None, None)``, but in # Python 3 ``('en_US', 'UTF-8')``. # # - locale.getpreferredencoding(): all versions returns ``'UTF-8'``. # # - sys.getdefaultencoding(): In Python 2 returns ``'ascii'``, but in # Python 3 ``'utf-8'``. The same in Mac-OS. The related code was # commented because these differences. # # All these considerations where also proved in Mac-OS. import locale return encoding or locale.getdefaultlocale()[1] or "UTF-8"
[docs]def safe_decode(s, encoding=None): """Similar to bytes `decode` method returning unicode. Decodes `s` using the given `encoding`, or determining one from the system. Returning type depend on python version; if 2.x is `unicode` if 3.x `str`. .. versionadded:: 1.1.3 .. versionchanged:: 1.8.0 migrated to 'future.codecs' """ if isinstance(s, str): return s else: encoding = force_encoding(encoding) try: # In Python 3 str(b'm') returns the string "b'm'" and not just "m", # this fixes this. return str(s, encoding, "replace") except LookupError: # The provided enconding is not know, try with no encoding. return safe_decode(s) except TypeError: # For numbers and other stuff. return str(s)
[docs]def safe_encode(u, encoding=None): """Similar to unicode `encode` method returning bytes. Encodes `u` using the given `encoding`, or determining one from the system. Returning type is always `bytes`; but in python 2.x is also `str`. .. versionadded:: 1.1.3 .. versionchanged:: 1.8.0 migrated to 'future.codecs' """ # XXX: 'eight' pending. # TODO: This is not nice for Python 3, bytes is not valid string any more # See `json.encoder.py_encode_basestring_ascii`:func: of Python 2.x if isinstance(u, bytes): return u else: encoding = force_encoding(encoding) try: try: if isinstance(u, str): return bytes(u, encoding, "replace") else: return str(u).encode(encoding, "replace") except (UnicodeError, TypeError): return str(u).encode(encoding, "replace") except LookupError: return safe_encode(u)
__all__ += ("force_encoding", "safe_decode", "safe_encode")