from __future__ import absolute_import, print_function, division
import operator
import re
from petl.compat import next, string_types, callable, text_type
from petl.comparison import Comparable
from petl.errors import ArgumentError
from petl.util.base import asindices, expr, Table, values, Record
[docs]def select(table, *args, **kwargs):
"""
Select rows meeting a condition. E.g.::
>>> import petl as etl
>>> table1 = [['foo', 'bar', 'baz'],
... ['a', 4, 9.3],
... ['a', 2, 88.2],
... ['b', 1, 23.3],
... ['c', 8, 42.0],
... ['d', 7, 100.9],
... ['c', 2]]
>>> # the second positional argument can be a function accepting
... # a row
... table2 = etl.select(table1,
... lambda rec: rec.foo == 'a' and rec.baz > 88.1)
>>> table2
+-----+-----+------+
| foo | bar | baz |
+=====+=====+======+
| 'a' | 2 | 88.2 |
+-----+-----+------+
>>> # the second positional argument can also be an expression
... # string, which will be converted to a function using petl.expr()
... table3 = etl.select(table1, "{foo} == 'a' and {baz} > 88.1")
>>> table3
+-----+-----+------+
| foo | bar | baz |
+=====+=====+======+
| 'a' | 2 | 88.2 |
+-----+-----+------+
>>> # the condition can also be applied to a single field
... table4 = etl.select(table1, 'foo', lambda v: v == 'a')
>>> table4
+-----+-----+------+
| foo | bar | baz |
+=====+=====+======+
| 'a' | 4 | 9.3 |
+-----+-----+------+
| 'a' | 2 | 88.2 |
+-----+-----+------+
The complement of the selection can be returned (i.e., the query can be
inverted) by providing `complement=True` as a keyword argument.
"""
missing = kwargs.get('missing', None)
complement = kwargs.get('complement', False)
if len(args) == 0:
raise ArgumentError('missing positional argument')
elif len(args) == 1:
where = args[0]
if isinstance(where, string_types):
where = expr(where)
else:
assert callable(where), 'second argument must be string or callable'
return RowSelectView(table, where, missing=missing,
complement=complement)
else:
field = args[0]
where = args[1]
assert callable(where), 'third argument must be callable'
return FieldSelectView(table, field, where, complement=complement,
missing=missing)
Table.select = select
class RowSelectView(Table):
def __init__(self, source, where, missing=None, complement=False):
self.source = source
self.where = where
self.missing = missing
self.complement = complement
def __iter__(self):
return iterrowselect(self.source, self.where, self.missing,
self.complement)
class FieldSelectView(Table):
def __init__(self, source, field, where, complement=False, missing=None):
self.source = source
self.field = field
self.where = where
self.complement = complement
self.missing = missing
def __iter__(self):
return iterfieldselect(self.source, self.field, self.where,
self.complement, self.missing)
def iterfieldselect(source, field, where, complement, missing):
it = iter(source)
hdr = next(it)
yield tuple(hdr)
indices = asindices(hdr, field)
getv = operator.itemgetter(*indices)
for row in it:
try:
v = getv(row)
except IndexError:
v = missing
if where(v) != complement: # XOR
yield tuple(row)
def iterrowselect(source, where, missing, complement):
it = iter(source)
hdr = next(it)
flds = list(map(text_type, hdr))
yield tuple(hdr)
it = (Record(row, flds, missing=missing) for row in it)
for row in it:
if where(row) != complement: # XOR
yield tuple(row) # need to convert back to tuple?
[docs]def rowlenselect(table, n, complement=False):
"""Select rows of length `n`."""
where = lambda row: len(row) == n
return select(table, where, complement=complement)
Table.rowlenselect = rowlenselect
[docs]def selectop(table, field, value, op, complement=False):
"""Select rows where the function `op` applied to the given field and
the given value returns `True`."""
return select(table, field, lambda v: op(v, value),
complement=complement)
Table.selectop = selectop
[docs]def selecteq(table, field, value, complement=False):
"""Select rows where the given field equals the given value."""
return selectop(table, field, value, operator.eq, complement=complement)
Table.selecteq = selecteq
Table.eq = selecteq
[docs]def selectne(table, field, value, complement=False):
"""Select rows where the given field does not equal the given value."""
return selectop(table, field, value, operator.ne, complement=complement)
Table.selectne = selectne
Table.ne = selectne
[docs]def selectlt(table, field, value, complement=False):
"""Select rows where the given field is less than the given value."""
value = Comparable(value)
return selectop(table, field, value, operator.lt, complement=complement)
Table.selectlt = selectlt
Table.lt = selectlt
[docs]def selectle(table, field, value, complement=False):
"""Select rows where the given field is less than or equal to the given
value."""
value = Comparable(value)
return selectop(table, field, value, operator.le, complement=complement)
Table.selectle = selectle
Table.le = selectle
[docs]def selectgt(table, field, value, complement=False):
"""Select rows where the given field is greater than the given value."""
value = Comparable(value)
return selectop(table, field, value, operator.gt, complement=complement)
Table.selectgt = selectgt
Table.gt = selectgt
[docs]def selectge(table, field, value, complement=False):
"""Select rows where the given field is greater than or equal to the given
value."""
value = Comparable(value)
return selectop(table, field, value, operator.ge, complement=complement)
Table.selectge = selectge
Table.ge = selectge
[docs]def selectcontains(table, field, value, complement=False):
"""Select rows where the given field contains the given value."""
return selectop(table, field, value, operator.contains,
complement=complement)
Table.selectcontains = selectcontains
[docs]def selectin(table, field, value, complement=False):
"""Select rows where the given field is a member of the given value."""
return select(table, field, lambda v: v in value,
complement=complement)
Table.selectin = selectin
[docs]def selectnotin(table, field, value, complement=False):
"""Select rows where the given field is not a member of the given value."""
return select(table, field, lambda v: v not in value,
complement=complement)
Table.selectnotin = selectnotin
[docs]def selectis(table, field, value, complement=False):
"""Select rows where the given field `is` the given value."""
return selectop(table, field, value, operator.is_, complement=complement)
Table.selectis = selectis
[docs]def selectisnot(table, field, value, complement=False):
"""Select rows where the given field `is not` the given value."""
return selectop(table, field, value, operator.is_not, complement=complement)
Table.selectisnot = selectisnot
[docs]def selectisinstance(table, field, value, complement=False):
"""Select rows where the given field is an instance of the given type."""
return selectop(table, field, value, isinstance, complement=complement)
Table.selectisinstance = selectisinstance
[docs]def selectrangeopenleft(table, field, minv, maxv, complement=False):
"""Select rows where the given field is greater than or equal to `minv` and
less than `maxv`."""
minv = Comparable(minv)
maxv = Comparable(maxv)
return select(table, field, lambda v: minv <= v < maxv,
complement=complement)
Table.selectrangeopenleft = selectrangeopenleft
[docs]def selectrangeopenright(table, field, minv, maxv, complement=False):
"""Select rows where the given field is greater than `minv` and
less than or equal to `maxv`."""
minv = Comparable(minv)
maxv = Comparable(maxv)
return select(table, field, lambda v: minv < v <= maxv,
complement=complement)
Table.selectrangeopenright = selectrangeopenright
[docs]def selectrangeopen(table, field, minv, maxv, complement=False):
"""Select rows where the given field is greater than or equal to `minv` and
less than or equal to `maxv`."""
minv = Comparable(minv)
maxv = Comparable(maxv)
return select(table, field, lambda v: minv <= v <= maxv,
complement=complement)
Table.selectrangeopen = selectrangeopen
[docs]def selectrangeclosed(table, field, minv, maxv, complement=False):
"""Select rows where the given field is greater than `minv` and
less than `maxv`."""
minv = Comparable(minv)
maxv = Comparable(maxv)
return select(table, field, lambda v: minv < Comparable(v) < maxv,
complement=complement)
Table.selectrangeclosed = selectrangeclosed
[docs]def selecttrue(table, field, complement=False):
"""Select rows where the given field evaluates `True`."""
return select(table, field, lambda v: bool(v), complement=complement)
Table.selecttrue = selecttrue
Table.true = selecttrue
[docs]def selectfalse(table, field, complement=False):
"""Select rows where the given field evaluates `False`."""
return select(table, field, lambda v: not bool(v),
complement=complement)
Table.selectfalse = selectfalse
Table.false = selectfalse
[docs]def selectnone(table, field, complement=False):
"""Select rows where the given field is `None`."""
return select(table, field, lambda v: v is None, complement=complement)
Table.selectnone = selectnone
Table.none = selectnone
[docs]def selectnotnone(table, field, complement=False):
"""Select rows where the given field is not `None`."""
return select(table, field, lambda v: v is not None,
complement=complement)
Table.selectnotnone = selectnotnone
Table.notnone = selectnotnone
[docs]def selectusingcontext(table, query):
"""
Select rows based on data in the current row and/or previous and
next row. E.g.::
>>> import petl as etl
>>> table1 = [['foo', 'bar'],
... ['A', 1],
... ['B', 4],
... ['C', 5],
... ['D', 9]]
>>> def query(prv, cur, nxt):
... return ((prv is not None and (cur.bar - prv.bar) < 2)
... or (nxt is not None and (nxt.bar - cur.bar) < 2))
...
>>> table2 = etl.selectusingcontext(table1, query)
>>> table2
+-----+-----+
| foo | bar |
+=====+=====+
| 'B' | 4 |
+-----+-----+
| 'C' | 5 |
+-----+-----+
The `query` function should accept three rows and return a boolean value.
"""
return SelectUsingContextView(table, query)
Table.selectusingcontext = selectusingcontext
class SelectUsingContextView(Table):
def __init__(self, table, query):
self.table = table
self.query = query
def __iter__(self):
return iterselectusingcontext(self.table, self.query)
def iterselectusingcontext(table, query):
it = iter(table)
hdr = tuple(next(it))
flds = list(map(text_type, hdr))
yield hdr
it = (Record(row, flds) for row in it)
prv = None
cur = next(it)
for nxt in it:
if query(prv, cur, nxt):
yield cur
prv = cur
cur = nxt
# handle last row
if query(prv, cur, None):
yield cur
[docs]def facet(table, key):
"""
Return a dictionary mapping field values to tables. E.g.::
>>> import petl as etl
>>> table1 = [['foo', 'bar', 'baz'],
... ['a', 4, 9.3],
... ['a', 2, 88.2],
... ['b', 1, 23.3],
... ['c', 8, 42.0],
... ['d', 7, 100.9],
... ['c', 2]]
>>> foo = etl.facet(table1, 'foo')
>>> sorted(foo.keys())
['a', 'b', 'c', 'd']
>>> foo['a']
+-----+-----+------+
| foo | bar | baz |
+=====+=====+======+
| 'a' | 4 | 9.3 |
+-----+-----+------+
| 'a' | 2 | 88.2 |
+-----+-----+------+
>>> foo['c']
+-----+-----+------+
| foo | bar | baz |
+=====+=====+======+
| 'c' | 8 | 42.0 |
+-----+-----+------+
| 'c' | 2 | |
+-----+-----+------+
See also :func:`petl.util.materialise.facetcolumns`.
"""
fct = dict()
for v in set(values(table, key)):
fct[v] = selecteq(table, key, v)
return fct
Table.facet = facet