# -*- encoding: utf-8 -*-
# ---------------------------------------------------------------------
# xoutil.records
# ---------------------------------------------------------------------
# Copyright (c) 2015 Merchise and Contributors
# Copyright (c) 2014 Merchise Autrement and Contributors
# All rights reserved.
#
# This is free software; you can redistribute it and/or modify it under the
# terms of the LICENCE attached (see LICENCE file) in the distribution
# package.
#
# Created on 2014-09-22
'''Records definitions.
A record allows to describe plain external data and a simplified model to
*read* it. The main use of records is to represent data that is read from a
CSV file.
See the `record`:class: class to find out how to use it.
'''
from __future__ import (division as _py3_division,
print_function as _py3_print,
absolute_import as _py3_abs_import)
from xoutil import Unset
from xoutil.functools import lru_cache
from xoutil.eight.meta import metaclass
@lru_cache()
def field_descriptor(field_name):
'''Returns a read-only descriptor for `field_name`.'''
class descriptor(object):
def __get__(self, instance, owner):
if instance:
return owner.get_field(instance._raw_data,
owner._rec_fields[field_name])
else:
return self
return descriptor
class _record_type(type):
@staticmethod
def _is_rec_definition(attr, val=Unset):
result = not attr.startswith('_') and attr.upper() == attr
if val is not Unset:
from numbers import Integral
from xoutil.eight import string_types
isi = isinstance
result = result and (isi(val, Integral) or isi(val, string_types))
return result
@staticmethod
def is_reader(attr, func, fields=None):
from xoutil.types import FunctionType as function
from xoutil.types import is_staticmethod as static
attr = attr.lower()
good_name = attr.startswith('_') and attr.endswith('_reader')
good_type = isinstance(func, function) or static(func)
return good_name and good_type
def __new__(cls, name, bases, attrs):
from xoutil.types import is_staticmethod as stm
cls_fields = {attr: val for attr, val in attrs.items()
if cls._is_rec_definition(attr, val)}
descriptors = {attr.lower(): field_descriptor(attr)()
for attr in cls_fields}
readers = {attr.lower(): staticmethod(func) if not stm(func) else func
for attr, func in attrs.items()
if cls.is_reader(attr, func)}
new_attrs = dict(attrs, **descriptors)
new_attrs.update(readers)
result = super(_record_type, cls).__new__(cls, name, bases,
new_attrs)
# Make a copy, or else the super-class attribute gets contaminated
fields = dict(getattr(result, '_rec_fields', {}))
index = dict(getattr(result, '_rec_index', {}))
fields.update(cls_fields)
if len(fields) != len({val for val in fields.values()}):
msg = ('Duplicated field index definition in class "%s"' % name)
import logging
logger = logging.getLogger(__name__)
logger.error(msg)
logger.debug(fields)
raise TypeError(msg)
result._rec_fields = fields
index.update({val: attr for attr, val in cls_fields.items()})
result._rec_index = index
return result
def get_field(self, raw_data, field):
from xoutil import Undefined
field_name = self._rec_index[field]
try:
value = raw_data[field]
except (IndexError, KeyError):
value = Undefined
reader_name = '_%s_reader' % field_name.lower()
reader = getattr(self, reader_name, None)
if reader:
return reader(value)
else:
return value
[docs]class record(metaclass(_record_type)):
'''Base record class.
Records allow to represent a sequence or mapping of values extracted from
external sources into a dict-like Python value.
The first use-case for this abstraction is importing data from a CSV file.
You could represent each line as an instance of a properly defined record.
An instance of a record would represent a single `line` (or row) from the
external data source.
Records are expected to declare `fields`. Each field must be a
CAPITALIZED valid identifier like::
>>> class INVOICE(record):
... ID = 0
... REFERENCE = 1
Fields must be integers or plain strings. Fields must not begin with an
underscore ("_"). External data lines are required to support indexes of
those types.
You could use either the classmethod :func:`get_field` to get the value of
field in a single line (data as provided by the external source)::
>>> line = (1, 'AA20X138874Z012')
>>> INVOICE.get_field(line, INVOICE.REFERENCE)
'AA20X138874Z012'
You may also have an instance::
>>> invoice = INVOICE(line)
>>> invoice.reference
'AA20X138874Z012'
.. note:: Instances attributes are renamed to lowercase. So you **must**
not create any other attribute that has the same name as a field in
lowercase, or else it will be overwritten.
You could define `readers` for any field. For instance if you have a
"CREATED_DATETIME" field you may create a "_created_datetime_reader"
function that will be used to parse the raw value of the instance into an
expected type. See the `included readers builders below
<included-readers>`:ref:.
Readers are always cast as `staticmethods`, whether or not you have
explicitly stated that fact::
>>> from dateutil import parser
>>> class BETTER_INVOICE(INVOICE):
... CREATED_TIME = 2
... _created_time_reader = lambda val: parser.parse(val)
...
>>> line = (1, 'AA20X138874Z012', '2014-02-17T17:29:21.965053')
>>> BETTER_INVOICE.get_field(line, BETTER_INVOICE.CREATED_TIME)
datetime.datetime(2014, 2, 17, 17, 29, 21, 965053)
.. warning:: Creating readers for fields defined in super classes is not
directly supported. To do so, you **must** declare the reader as a
staticmethod yourself.
.. note:: Currently there's no concept of relationship between rows in
this model. We are evaluating whether by placing a some sort of
context into the `kwargs` argument would be possible to write readers
that fetch other instances.
'''
def __init__(self, raw_data):
self._raw_data = raw_data
def __repr__(self):
cls = type(self)
return '%s(%r)' % (cls.__name__, self._raw_data)
def __getitem__(self, field_index):
return type(self).get_field(self._raw_data, field_index)
[docs]def isnull(val):
'''Return True if `val` is null.
Null values are None, the empty string and any False instance of
`xoutil.logical.Logical`:class:.
Notice that 0, the empty list and other false values in Python are not
considered null. This allows that the CSV null (the empty string) is
correctly treated while other sources that provide numbers (and 0 is a
valid number) are not misinterpreted as null.
'''
from xoutil.logical import Logical
return val in (None, '') or (isinstance(val, Logical) and not val)
# Standard readers
[docs]def check_nullable(val, nullable):
'''Check the restriction of nullable.
Return True if the val is non-null. If nullable is True and the val is
null returns False. If `nullable` is False and `val` is null, raise a
ValueError.
Test for null is done with function `isnull`:func:.
'''
null = isnull(val)
if not null or nullable:
return not null
else:
raise ValueError('NULL value was not expected here')
@lru_cache()
[docs]def datetime_reader(format, nullable=False, default=None, strict=True):
'''Returns a datetime reader.
:param format: The format the datetime is expected to be in the external
data. This is passed to `datetime.datetime.strptime`:func:.
:param strict: Whether to be strict about datetime format.
The reader works first by passing the value to strict
`datetime.datetime.strptime`:func: function. If that fails with a
ValueError and strict is True the reader fails entirely.
If strict is False, the worker applies different rules. First if the
`dateutil` package is installed its parser module is tried. If `dateutil`
is not available and nullable is True, return None; if nullable is False
and default is not null (as in `isnull`:func:), return `default`,
otherwise raise a ValueError.
.. versionadded: 1.6.7 Add the `strict` argument.
.. versionchanged: 1.6.7.1 Keep the meaning of null when testing for
`default` if strict is False and dateutil is not available.
'''
try:
from dateutil.parser import parse
except ImportError:
parse = None
def reader(val):
if check_nullable(val, nullable):
from datetime import datetime
try:
return datetime.strptime(val, format)
except ValueError:
if strict:
raise
elif parse:
return parse(val)
else:
if nullable:
return None
elif not isnull(default):
return default
else:
raise ValueError
else:
return default
return reader
@lru_cache()
[docs]def date_reader(format, nullable=False, default=None, strict=True):
'''Return a date reader.
This is similar to `datetime_reader`:func: but instead of returning a
`datetime.datetime`:class: it returns a `datetime.date`.
Actually this function delegates to `datetime_reader`:func: most of its
functionality.
.. versionadded: 1.6.8
'''
reader = datetime_reader(format, nullable=nullable, default=default,
strict=strict)
def res(val):
result = reader(val)
if not isnull(result) and result is not default:
return result.date()
else:
return result
return res
@lru_cache()
[docs]def boolean_reader(true=('1', ), nullable=False, default=None):
'''Returns a boolean reader.
:param true: A collection of raw values considered to be True. Only the
values in this collection will be considered True values.
'''
def reader(val):
if check_nullable(val, nullable):
return val in true
else:
return default
return reader
@lru_cache()
[docs]def integer_reader(nullable=False, default=None):
'''Returns an integer reader.'''
def reader(val):
if check_nullable(val, nullable):
return int(val)
else:
return default
return reader
@lru_cache()
[docs]def decimal_reader(nullable=False, default=None):
'''Returns a Decimal reader.'''
def reader(val):
if check_nullable(val, nullable):
from decimal import Decimal
return Decimal(val)
else:
return default
return reader
@lru_cache()
[docs]def float_reader(nullable=False, default=None):
'''Returns a float reader.'''
def reader(val):
if check_nullable(val, nullable):
return float(val)
else:
return default
return reader
del metaclass, lru_cache