Source code for xotl.tools.web

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# ---------------------------------------------------------------------
# Copyright (c) Merchise Autrement [~º/~] and Contributors
# All rights reserved.
#
# This is free software; you can do what the LICENCE file allows you to.
#

"""Utils for Web applications."""

__all__ = ["slugify"]


# TODO: Why not deprecate this and use standard `xotl.tools.string.slugify`.
[docs]def slugify( s, entities=True, decimal=True, hexadecimal=True, ): # pragma: no cover # noqa """Convert a string to a slug representation. Normalizes string, converts to lower-case, removes non-alpha characters, and converts spaces to hyphens. Parts from http://www.djangosnippets.org/snippets/369/ >>> slugify("Manuel Vázquez Acosta") # doctest: +SKIP 'manuel-vazquez-acosta' If `s` and `entities` is True (the default) all HTML entities are replaced by its equivalent character before normalization:: >>> slugify("Manuel Vázquez Acosta") # doctest: +SKIP 'manuel-vazquez-acosta' If `entities` is False, then no HTML-entities substitution is made:: >>> value = "Manuel Vázquez Acosta" >>> slugify(value, entities=False) # doctest: +SKIP 'manuel-v-aacute-zquez-acosta' If `decimal` is True, then all entities of the form ``&#nnnn`` where `nnnn` is a decimal number deemed as a unicode codepoint, are replaced by the corresponding unicode character:: >>> slugify('Manuel Vázquez Acosta') # doctest: +SKIP 'manuel-vazquez-acosta' >>> value = 'Manuel Vázquez Acosta' >>> slugify(value, decimal=False) # doctest: +SKIP 'manuel-v-225-zquez-acosta' If `hexadecimal` is True, then all entities of the form ``&#nnnn`` where `nnnn` is a hexdecimal number deemed as a unicode codepoint, are replaced by the corresponding unicode character:: >>> slugify('Manuel Vázquez Acosta') # doctest: +SKIP 'manuel-vazquez-acosta' >>> slugify('Manuel Vázquez Acosta', hexadecimal=False) # doctest: +SKIP # noqa 'manuel-v-x00e1-zquez-acosta' .. deprecated:: 2.1.0 Use `xotl.tools.strings.slugify`:func:. """ import re from xotl.tools.string import slugify from xotl.tools.future.codecs import safe_decode if not isinstance(s, str): s = safe_decode(s) if entities: try: from htmlentitydefs import name2codepoint except ImportError: # Py3k: The ``htmlentitydefs`` module has been renamed to # ``html.entities`` in Python 3 from html.entities import name2codepoint s = re.sub( str("&(%s);") % str("|").join(name2codepoint), lambda m: chr(name2codepoint[m.group(1)]), s, ) if decimal: try: s = re.sub(r"&#(\d+);", lambda m: chr(int(m.group(1))), s) except Exception: # TODO: @med which exceptions are expected? pass if hexadecimal: try: s = re.sub(r"&#x([\da-fA-F]+);", lambda m: chr(int(m.group(1), 16)), s) except Exception: # TODO: @med which exceptions are expected? pass return slugify(s, "-")