Source code for xotl.tools.future.codecs
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# ----------------------------------------------------------------------
# Copyright (c) Merchise Autrement [~º/~] and Contributors
# All rights reserved.
#
# This is free software; you can do what the LICENCE file allows you to.
#
"""Codec registry, base classes and tools.
In this module, some additions for `codecs` standard module.
"""
from codecs import * # noqa
from codecs import __all__ # noqa
__all__ = list(__all__)
[docs]def force_encoding(encoding=None):
"""Validates an encoding value.
If `encoding` is None use `locale.getdefaultlocale`:func:. If that is
also none, return 'UTF-8'.
.. versionadded:: 1.2.0
.. versionchanged:: 1.8.0 migrated to 'future.codecs'
.. versionchanged:: 1.8.7 Stop using `locale.getpreferrededencoding`:func:
and improve documentation.
"""
# TODO: This mechanism is tricky, we must find out how to unroll the mess
# involving the concept of which encoding to use by default:
#
# - locale.getlocale(): In Python 2 returns ``(None, None)``, but in
# Python 3 ``('en_US', 'UTF-8')``.
#
# - locale.getpreferredencoding(): all versions returns ``'UTF-8'``.
#
# - sys.getdefaultencoding(): In Python 2 returns ``'ascii'``, but in
# Python 3 ``'utf-8'``. The same in Mac-OS. The related code was
# commented because these differences.
#
# All these considerations where also proved in Mac-OS.
import locale
return encoding or locale.getdefaultlocale()[1] or "UTF-8"
[docs]def safe_decode(s, encoding=None):
"""Similar to bytes `decode` method returning unicode.
Decodes `s` using the given `encoding`, or determining one from the system.
Returning type depend on python version; if 2.x is `unicode` if 3.x `str`.
.. versionadded:: 1.1.3
.. versionchanged:: 1.8.0 migrated to 'future.codecs'
"""
if isinstance(s, str):
return s
else:
encoding = force_encoding(encoding)
try:
# In Python 3 str(b'm') returns the string "b'm'" and not just "m",
# this fixes this.
return str(s, encoding, "replace")
except LookupError:
# The provided enconding is not know, try with no encoding.
return safe_decode(s)
except TypeError:
# For numbers and other stuff.
return str(s)
[docs]def safe_encode(u, encoding=None):
"""Similar to unicode `encode` method returning bytes.
Encodes `u` using the given `encoding`, or determining one from the system.
Returning type is always `bytes`; but in python 2.x is also `str`.
.. versionadded:: 1.1.3
.. versionchanged:: 1.8.0 migrated to 'future.codecs'
"""
# XXX: 'eight' pending.
# TODO: This is not nice for Python 3, bytes is not valid string any more
# See `json.encoder.py_encode_basestring_ascii`:func: of Python 2.x
if isinstance(u, bytes):
return u
else:
encoding = force_encoding(encoding)
try:
try:
if isinstance(u, str):
return bytes(u, encoding, "replace")
else:
return str(u).encode(encoding, "replace")
except (UnicodeError, TypeError):
return str(u).encode(encoding, "replace")
except LookupError:
return safe_encode(u)
__all__ += ("force_encoding", "safe_decode", "safe_encode")