# -*- coding: utf-8 -*-
from __future__ import division, print_function, absolute_import
from petl.compat import next, string_types
from petl.util.base import iterpeek, ValuesView, Table
from petl.util.materialise import columns
def infer_dtype(table):
import numpy as np
# get numpy to infer dtype
it = iter(table)
hdr = next(it)
flds = list(map(str, hdr))
rows = tuple(it)
dtype = np.rec.array(rows).dtype
dtype.names = flds
return dtype
def construct_dtype(flds, peek, dtype):
import numpy as np
if dtype is None:
dtype = infer_dtype(peek)
elif isinstance(dtype, string_types):
# insert field names from source table
typestrings = [s.strip() for s in dtype.split(',')]
dtype = [(f, t) for f, t in zip(flds, typestrings)]
elif (isinstance(dtype, dict) and
('names' not in dtype or 'formats' not in dtype)):
# allow for partial specification of dtype
cols = columns(peek)
newdtype = {'names': [], 'formats': []}
for f in flds:
newdtype['names'].append(f)
if f in dtype and isinstance(dtype[f], tuple):
# assume fully specified
newdtype['formats'].append(dtype[f][0])
elif f not in dtype:
# not specified at all
a = np.array(cols[f])
newdtype['formats'].append(a.dtype)
else:
# assume directly specified, just need to add offset
newdtype['formats'].append(dtype[f])
dtype = newdtype
return dtype
[docs]def toarray(table, dtype=None, count=-1, sample=1000):
"""
Load data from the given `table` into a
`numpy <http://www.numpy.org/>`_ structured array. E.g.::
>>> import petl as etl
>>> table = [('foo', 'bar', 'baz'),
... ('apples', 1, 2.5),
... ('oranges', 3, 4.4),
... ('pears', 7, .1)]
>>> a = etl.toarray(table)
>>> a
array([('apples', 1, 2.5), ('oranges', 3, 4.4), ('pears', 7, 0.1)],
dtype=(numpy.record, [('foo', '<U7'), ('bar', '<i8'), ('baz', '<f8')]))
>>> # the dtype can be specified as a string
... a = etl.toarray(table, dtype='a4, i2, f4')
>>> a
array([(b'appl', 1, 2.5), (b'oran', 3, 4.4), (b'pear', 7, 0.1)],
dtype=[('foo', 'S4'), ('bar', '<i2'), ('baz', '<f4')])
>>> # the dtype can also be partially specified
... a = etl.toarray(table, dtype={'foo': 'a4'})
>>> a
array([(b'appl', 1, 2.5), (b'oran', 3, 4.4), (b'pear', 7, 0.1)],
dtype=[('foo', 'S4'), ('bar', '<i8'), ('baz', '<f8')])
If the dtype is not completely specified, `sample` rows will be
examined to infer an appropriate dtype.
"""
import numpy as np
it = iter(table)
peek, it = iterpeek(it, sample)
hdr = next(it)
flds = list(map(str, hdr))
dtype = construct_dtype(flds, peek, dtype)
# numpy is fussy about having tuples, need to make sure
it = (tuple(row) for row in it)
sa = np.fromiter(it, dtype=dtype, count=count)
return sa
Table.toarray = toarray
[docs]def torecarray(*args, **kwargs):
"""
Convenient shorthand for ``toarray(*args, **kwargs).view(np.recarray)``.
"""
import numpy as np
return toarray(*args, **kwargs).view(np.recarray)
Table.torecarray = torecarray
[docs]def fromarray(a):
"""
Extract a table from a `numpy <http://www.numpy.org/>`_ structured array,
e.g.::
>>> import petl as etl
>>> import numpy as np
>>> a = np.array([('apples', 1, 2.5),
... ('oranges', 3, 4.4),
... ('pears', 7, 0.1)],
... dtype='U8, i4,f4')
>>> table = etl.fromarray(a)
>>> table
+-----------+----+-----+
| f0 | f1 | f2 |
+===========+====+=====+
| 'apples' | 1 | 2.5 |
+-----------+----+-----+
| 'oranges' | 3 | 4.4 |
+-----------+----+-----+
| 'pears' | 7 | 0.1 |
+-----------+----+-----+
"""
return ArrayView(a)
class ArrayView(Table):
def __init__(self, a):
self.a = a
def __iter__(self):
yield tuple(self.a.dtype.names)
for row in self.a:
yield tuple(row)
[docs]def valuestoarray(vals, dtype=None, count=-1, sample=1000):
"""
Load values from a table column into a `numpy <http://www.numpy.org/>`_
array, e.g.::
>>> import petl as etl
>>> table = [('foo', 'bar', 'baz'),
... ('apples', 1, 2.5),
... ('oranges', 3, 4.4),
... ('pears', 7, .1)]
>>> table = etl.wrap(table)
>>> table.values('bar').array()
array([1, 3, 7])
>>> # specify dtype
... table.values('bar').array(dtype='i4')
array([1, 3, 7], dtype=int32)
"""
import numpy as np
it = iter(vals)
if dtype is None:
peek, it = iterpeek(it, sample)
dtype = np.array(peek).dtype
a = np.fromiter(it, dtype=dtype, count=count)
return a
ValuesView.toarray = valuestoarray
ValuesView.array = valuestoarray