Source code for xoutil.eight.string

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# ----------------------------------------------------------------------
# Copyright (c) Merchise Autrement [~º/~] and Contributors
# All rights reserved.
#
# This is free software; you can do what the LICENCE file allows you to.
#

'''Technical string handling.

Technical strings are those that requires to be instances of `str` standard
type.  See `py-string-ambiguity`:any: for more information.

This module will be used mostly as a namespace, for example::

  from xoutil.eight import string
  Foobar.__name__ = string.force(class_name)

If these functions are going to be used standalone, do something like::

  from xoutil.eight.string import force as force_str
  Foobar.__name__ = force_str(class_name)

'''

from __future__ import (division as _py3_division,
                        print_function as _py3_print,
                        absolute_import as _py3_import)


[docs]def force(value=str(), encoding=None): '''Convert any value to standard `str` type in a safe way. This function is useful in some scenarios that require `str` type (for example attribute ``__name__`` in functions and types). As ``str is bytes`` in Python 2, using str(value) assures correct these scenarios in most cases, but in other is not enough, for example:: >>> from xoutil.eight import string >>> def inverted_partial(func, *args, **keywords): ... def inner(*a, **kw): ... a += args ... kw.update(keywords) ... return func(*a, **kw) ... name = func.__name__.replace('lambda', u'λ') ... inner.__name__ = string.force(name) ... return inner .. versionchanged:: 1.9.6 Add the 'enconding' parameter. ''' from xoutil.future.codecs import safe_decode, safe_encode if isinstance(value, str): return value elif str is bytes: # Python 2 return safe_encode(value, encoding=encoding) else: return safe_decode(value, encoding=encoding)
[docs]def safe_join(separator, iterable, encoding=None): '''Similar to `join` method in string objects. The semantics is equivalent to ``separator.join(iterable)`` but forcing separator and items to be of ``str`` standard type. For example:: >>> safe_join('-', range(6)) '0-1-2-3-4-5' Check that the expression ``'-'.join(range(6))`` raises a ``TypeError``. .. versionchanged:: 1.9.6 Add the 'enconding' parameter. ''' sep = force(separator, encoding) return sep.join(force(item, encoding) for item in iterable)
[docs]def force_ascii(value, encoding=None): '''Return the string normal form for the `value` Convert all non-ascii to valid characters using unicode 'NFKC' normalization. :param encoding: If `value` is not a text (unicode), it is decoded before ASCII normalization using this encoding. If not provided use the return of `~xoutil.future.codecs.force_encoding`:func:. .. versionchanged:: 1.8.7 Add parameter 'encoding'. ''' import unicodedata from xoutil.future.codecs import safe_decode from xoutil.eight import text_type from xoutil.eight import string if not isinstance(value, text_type): value = safe_decode(value, encoding=encoding) res = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') return string.force(res)
# ------------------ Here, the original file starts ------------------ if hasattr(str, 'isidentifier'): def isidentifier(s): return str(s) if s.isidentifier() else False else: import re # noqa _PY2_IDENTIFIER_REGEX = re.compile('(?i)^[_a-z][_a-z0-9]*$') del re
[docs] def isidentifier(s): return str(s) if _PY2_IDENTIFIER_REGEX.match(s) else False
isidentifier.__doc__ = ('If `s` is a valid identifier according to the ' 'language definition.')
[docs]def isfullidentifier(s): '''Check if `arg` is a valid dotted Python identifier. See `isidentifier`:func: for what "validity" means. ''' return str(s) if all(isidentifier(p) for p in s.split('.')) else False
[docs]def safe_isidentifier(s): '''If `s` is a valid identifier according to the language definition. Check before if `s` is instance of string types. ''' from xoutil.eight import string_types return isinstance(s, string_types) and isidentifier(s)
[docs]def safe_isfullidentifier(s): '''Check if `arg` is a valid dotted Python identifier. Check before if `s` is instance of string types. See `safe_isidentifier`:func: for what "validity" means. ''' from xoutil.eight import string_types return isinstance(s, string_types) and isfullidentifier(s)
[docs]def check_identifier(s): '''Check if `s` is a valid identifier.''' from xoutil.eight.string import isidentifier res = isidentifier(s) if res: return res else: msg = 'invalid identifier "{}"' raise TypeError(msg.format(s))