Source code for petl.transform.basics

from __future__ import absolute_import, print_function, division


# standard library dependencies
from itertools import islice, chain
from collections import deque
from itertools import count
from petl.compat import izip, izip_longest, next, string_types, text_type


# internal dependencies
from petl.util.base import asindices, rowgetter, Record, Table


import logging
logger = logging.getLogger(__name__)
warning = logger.warning
info = logger.info
debug = logger.debug


[docs]def cut(table, *args, **kwargs):
    """
    Choose and/or re-order fields. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar', 'baz'],
        ...           ['A', 1, 2.7],
        ...           ['B', 2, 3.4],
        ...           ['B', 3, 7.8],
        ...           ['D', 42, 9.0],
        ...           ['E', 12]]
        >>> table2 = etl.cut(table1, 'foo', 'baz')
        >>> table2
        +-----+------+
        | foo | baz  |
        +=====+======+
        | 'A' |  2.7 |
        +-----+------+
        | 'B' |  3.4 |
        +-----+------+
        | 'B' |  7.8 |
        +-----+------+
        | 'D' |  9.0 |
        +-----+------+
        | 'E' | None |
        +-----+------+

        >>> # fields can also be specified by index, starting from zero
        ... table3 = etl.cut(table1, 0, 2)
        >>> table3
        +-----+------+
        | foo | baz  |
        +=====+======+
        | 'A' |  2.7 |
        +-----+------+
        | 'B' |  3.4 |
        +-----+------+
        | 'B' |  7.8 |
        +-----+------+
        | 'D' |  9.0 |
        +-----+------+
        | 'E' | None |
        +-----+------+

        >>> # field names and indices can be mixed
        ... table4 = etl.cut(table1, 'bar', 0)
        >>> table4
        +-----+-----+
        | bar | foo |
        +=====+=====+
        |   1 | 'A' |
        +-----+-----+
        |   2 | 'B' |
        +-----+-----+
        |   3 | 'B' |
        +-----+-----+
        |  42 | 'D' |
        +-----+-----+
        |  12 | 'E' |
        +-----+-----+

        >>> # select a range of fields
        ... table5 = etl.cut(table1, *range(0, 2))
        >>> table5
        +-----+-----+
        | foo | bar |
        +=====+=====+
        | 'A' |   1 |
        +-----+-----+
        | 'B' |   2 |
        +-----+-----+
        | 'B' |   3 |
        +-----+-----+
        | 'D' |  42 |
        +-----+-----+
        | 'E' |  12 |
        +-----+-----+

    Note that any short rows will be padded with `None` values (or whatever is
    provided via the `missing` keyword argument).

    See also :func:`petl.transform.basics.cutout`.

    """

    # support passing a single list or tuple of fields
    if len(args) == 1 and isinstance(args[0], (list, tuple)):
        args = args[0]

    return CutView(table, args, **kwargs)


Table.cut = cut


class CutView(Table):

    def __init__(self, source, spec, missing=None):
        self.source = source
        self.spec = spec
        self.missing = missing

    def __iter__(self):
        return itercut(self.source, self.spec, self.missing)


def itercut(source, spec, missing=None):
    it = iter(source)
    spec = tuple(spec)  # make sure no-one can change midstream

    # convert field selection into field indices
    try:
        hdr = next(it)
    except StopIteration:
        hdr = []
    indices = asindices(hdr, spec)

    # define a function to transform each row in the source data
    # according to the field selection
    transform = rowgetter(*indices)

    # yield the transformed header
    yield transform(hdr)

    # construct the transformed data
    for row in it:
        try:
            yield transform(row)
        except IndexError:
            # row is short, let's be kind and fill in any missing fields
            yield tuple(row[i] if i < len(row) else missing for i in indices)


[docs]def cutout(table, *args, **kwargs):
    """
    Remove fields. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar', 'baz'],
        ...           ['A', 1, 2.7],
        ...           ['B', 2, 3.4],
        ...           ['B', 3, 7.8],
        ...           ['D', 42, 9.0],
        ...           ['E', 12]]
        >>> table2 = etl.cutout(table1, 'bar')
        >>> table2
        +-----+------+
        | foo | baz  |
        +=====+======+
        | 'A' |  2.7 |
        +-----+------+
        | 'B' |  3.4 |
        +-----+------+
        | 'B' |  7.8 |
        +-----+------+
        | 'D' |  9.0 |
        +-----+------+
        | 'E' | None |
        +-----+------+

    See also :func:`petl.transform.basics.cut`.

    """

    return CutOutView(table, args, **kwargs)


Table.cutout = cutout


class CutOutView(Table):

    def __init__(self, source, spec, missing=None):
        self.source = source
        self.spec = spec
        self.missing = missing

    def __iter__(self):
        return itercutout(self.source, self.spec, self.missing)


def itercutout(source, spec, missing=None):
    it = iter(source)
    spec = tuple(spec)  # make sure no-one can change midstream

    # convert field selection into field indices
    try:
        hdr = next(it)
    except StopIteration:
        hdr = []
    indicesout = asindices(hdr, spec)
    indices = [i for i in range(len(hdr)) if i not in indicesout]

    # define a function to transform each row in the source data
    # according to the field selection
    transform = rowgetter(*indices)

    # yield the transformed header
    yield transform(hdr)

    # construct the transformed data
    for row in it:
        try:
            yield transform(row)
        except IndexError:
            # row is short, let's be kind and fill in any missing fields
            yield tuple(row[i] if i < len(row) else missing for i in indices)


[docs]def cat(*tables, **kwargs):
    """
    Concatenate tables. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar'],
        ...           [1, 'A'],
        ...           [2, 'B']]
        >>> table2 = [['bar', 'baz'],
        ...           ['C', True],
        ...           ['D', False]]
        >>> table3 = etl.cat(table1, table2)
        >>> table3
        +------+-----+-------+
        | foo  | bar | baz   |
        +======+=====+=======+
        |    1 | 'A' | None  |
        +------+-----+-------+
        |    2 | 'B' | None  |
        +------+-----+-------+
        | None | 'C' | True  |
        +------+-----+-------+
        | None | 'D' | False |
        +------+-----+-------+

        >>> # can also be used to square up a single table with uneven rows
        ... table4 = [['foo', 'bar', 'baz'],
        ...           ['A', 1, 2],
        ...           ['B', '2', '3.4'],
        ...           [u'B', u'3', u'7.8', True],
        ...           ['D', 'xyz', 9.0],
        ...           ['E', None]]
        >>> table5 = etl.cat(table4)
        >>> table5
        +-----+-------+-------+
        | foo | bar   | baz   |
        +=====+=======+=======+
        | 'A' |     1 |     2 |
        +-----+-------+-------+
        | 'B' | '2'   | '3.4' |
        +-----+-------+-------+
        | 'B' | '3'   | '7.8' |
        +-----+-------+-------+
        | 'D' | 'xyz' |   9.0 |
        +-----+-------+-------+
        | 'E' | None  | None  |
        +-----+-------+-------+

        >>> # use the header keyword argument to specify a fixed set of fields
        ... table6 = [['bar', 'foo'],
        ...           ['A', 1],
        ...           ['B', 2]]
        >>> table7 = etl.cat(table6, header=['A', 'foo', 'B', 'bar', 'C'])
        >>> table7
        +------+-----+------+-----+------+
        | A    | foo | B    | bar | C    |
        +======+=====+======+=====+======+
        | None |   1 | None | 'A' | None |
        +------+-----+------+-----+------+
        | None |   2 | None | 'B' | None |
        +------+-----+------+-----+------+

        >>> # using the header keyword argument with two input tables
        ... table8 = [['bar', 'foo'],
        ...           ['A', 1],
        ...           ['B', 2]]
        >>> table9 = [['bar', 'baz'],
        ...           ['C', True],
        ...           ['D', False]]
        >>> table10 = etl.cat(table8, table9, header=['A', 'foo', 'B', 'bar', 'C'])
        >>> table10
        +------+------+------+-----+------+
        | A    | foo  | B    | bar | C    |
        +======+======+======+=====+======+
        | None |    1 | None | 'A' | None |
        +------+------+------+-----+------+
        | None |    2 | None | 'B' | None |
        +------+------+------+-----+------+
        | None | None | None | 'C' | None |
        +------+------+------+-----+------+
        | None | None | None | 'D' | None |
        +------+------+------+-----+------+

    Note that the tables do not need to share exactly the same fields, any
    missing fields will be padded with `None` or whatever is provided via the
    `missing` keyword argument.

    Note that this function can be used with a single table argument, in which
    case it has the effect of ensuring all data rows are the same length as
    the header row, truncating any long rows and padding any short rows with
    the value of the `missing` keyword argument.

    By default, the fields for the output table will be determined as the
    union of all fields found in the input tables. Use the `header` keyword
    argument to override this behaviour and specify a fixed set of fields for
    the output table.

    """

    return CatView(tables, **kwargs)


Table.cat = cat


class CatView(Table):

    def __init__(self, sources, missing=None, header=None):
        self.sources = sources
        self.missing = missing
        self.header = header

    def __iter__(self):
        return itercat(self.sources, self.missing, self.header)


def itercat(sources, missing, header):
    its = [iter(t) for t in sources]
    hdrs = []
    for it in its:
        try:
            hdrs.append(list(next(it)))
        except StopIteration:
            hdrs.append([])

    if header is None:
        # determine output fields by gathering all fields found in the sources
        outhdr = list(hdrs[0])
        for hdr in hdrs[1:]:
            for h in hdr:
                if h not in outhdr:
                    # add any new fields as we find them
                    outhdr.append(h)
    else:
        # predetermined output fields
        outhdr = header
    yield tuple(outhdr)

    # output data rows
    for hdr, it in zip(hdrs, its):

        # now construct and yield the data rows
        for row in it:
            outrow = list()
            for h in outhdr:
                val = missing
                try:
                    val = row[hdr.index(h)]
                except IndexError:
                    # short row
                    pass
                except ValueError:
                    # field not in table
                    pass
                outrow.append(val)
            yield tuple(outrow)


[docs]def stack(*tables, **kwargs):
    """Concatenate tables, without trying to match headers. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar'],
        ...           [1, 'A'],
        ...           [2, 'B']]
        >>> table2 = [['bar', 'baz'],
        ...           ['C', True],
        ...           ['D', False]]
        >>> table3 = etl.stack(table1, table2)
        >>> table3
        +-----+-------+
        | foo | bar   |
        +=====+=======+
        |   1 | 'A'   |
        +-----+-------+
        |   2 | 'B'   |
        +-----+-------+
        | 'C' | True  |
        +-----+-------+
        | 'D' | False |
        +-----+-------+

        >>> # can also be used to square up a single table with uneven rows
        ... table4 = [['foo', 'bar', 'baz'],
        ...           ['A', 1, 2],
        ...           ['B', '2', '3.4'],
        ...           [u'B', u'3', u'7.8', True],
        ...           ['D', 'xyz', 9.0],
        ...           ['E', None]]
        >>> table5 = etl.stack(table4)
        >>> table5
        +-----+-------+-------+
        | foo | bar   | baz   |
        +=====+=======+=======+
        | 'A' |     1 |     2 |
        +-----+-------+-------+
        | 'B' | '2'   | '3.4' |
        +-----+-------+-------+
        | 'B' | '3'   | '7.8' |
        +-----+-------+-------+
        | 'D' | 'xyz' |   9.0 |
        +-----+-------+-------+
        | 'E' | None  | None  |
        +-----+-------+-------+

    Similar to :func:`petl.transform.basics.cat` except that no attempt is
    made to align fields from different tables. Data rows are simply emitted
    in order, trimmed or padded to the length of the header row from the
    first table.

    .. versionadded:: 1.1.0

    """

    return StackView(tables, **kwargs)


Table.stack = stack


class StackView(Table):

    def __init__(self, sources, missing=None, trim=True, pad=True):
        self.sources = sources
        self.missing = missing
        self.trim = trim
        self.pad = pad

    def __iter__(self):
        return iterstack(self.sources, self.missing, self.trim, self.pad)


def iterstack(sources, missing, trim, pad):
    its = [iter(t) for t in sources]
    hdrs = []
    for it in its:
        try:
            hdrs.append(next(it))
        except StopIteration:
            hdrs.append([])
    hdr = hdrs[0]
    n = len(hdr)
    yield tuple(hdr)
    for it in its:
        for row in it:
            outrow = tuple(row)
            if trim:
                outrow = outrow[:n]
            if pad and len(outrow) < n:
                outrow += (missing,) * (n - len(outrow))
            yield outrow


[docs]def addfield(table, field, value=None, index=None, missing=None):
    """
    Add a field with a fixed or calculated value. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar'],
        ...           ['M', 12],
        ...           ['F', 34],
        ...           ['-', 56]]
        >>> # using a fixed value
        ... table2 = etl.addfield(table1, 'baz', 42)
        >>> table2
        +-----+-----+-----+
        | foo | bar | baz |
        +=====+=====+=====+
        | 'M' |  12 |  42 |
        +-----+-----+-----+
        | 'F' |  34 |  42 |
        +-----+-----+-----+
        | '-' |  56 |  42 |
        +-----+-----+-----+

        >>> # calculating the value
        ... table2 = etl.addfield(table1, 'baz', lambda rec: rec['bar'] * 2)
        >>> table2
        +-----+-----+-----+
        | foo | bar | baz |
        +=====+=====+=====+
        | 'M' |  12 |  24 |
        +-----+-----+-----+
        | 'F' |  34 |  68 |
        +-----+-----+-----+
        | '-' |  56 | 112 |
        +-----+-----+-----+

    Use the `index` parameter to control the position of the inserted field.

    """

    return AddFieldView(table, field, value=value, index=index,
                        missing=missing)


Table.addfield = addfield


class AddFieldView(Table):

    def __init__(self, source, field, value=None, index=None, missing=None):
        # ensure rows are all the same length
        self.source = stack(source, missing=missing)
        self.field = field
        self.value = value
        self.index = index

    def __iter__(self):
        return iteraddfield(self.source, self.field, self.value, self.index)


def iteraddfield(source, field, value, index):
    it = iter(source)
    try:
        hdr = next(it)
    except StopIteration:
        hdr = []
    flds = list(map(text_type, hdr))

    # determine index of new field
    if index is None:
        index = len(hdr)

    # construct output fields
    outhdr = list(hdr)
    outhdr.insert(index, field)
    yield tuple(outhdr)

    if callable(value):
        # wrap rows as records if using calculated value
        it = (Record(row, flds) for row in it)
        for row in it:
            outrow = list(row)
            v = value(row)
            outrow.insert(index, v)
            yield tuple(outrow)
    else:
        for row in it:
            outrow = list(row)
            outrow.insert(index, value)
            yield tuple(outrow)


[docs]def addfields(table, field_defs, missing=None):
    """
    Add fields with fixed or calculated values. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar'],
        ...           ['M', 12],
        ...           ['F', 34],
        ...           ['-', 56]]
        >>> # using a fixed value or a calculation
        ... table2 = etl.addfields(table1,
        ...                        [('baz', 42),
        ...                         ('luhrmann', lambda rec: rec['bar'] * 2)])
        >>> table2
        +-----+-----+-----+----------+
        | foo | bar | baz | luhrmann |
        +=====+=====+=====+==========+
        | 'M' |  12 |  42 |       24 |
        +-----+-----+-----+----------+
        | 'F' |  34 |  42 |       68 |
        +-----+-----+-----+----------+
        | '-' |  56 |  42 |      112 |
        +-----+-----+-----+----------+

        >>> # you can specify an index as a 3rd item in each tuple -- indicies
        ... # are evaluated in order.
        ... table2 = etl.addfields(table1,
        ...                        [('baz', 42, 0),
        ...                         ('luhrmann', lambda rec: rec['bar'] * 2, 0)])
        >>> table2
        +----------+-----+-----+-----+
        | luhrmann | baz | foo | bar |
        +==========+=====+=====+=====+
        |       24 |  42 | 'M' |  12 |
        +----------+-----+-----+-----+
        |       68 |  42 | 'F' |  34 |
        +----------+-----+-----+-----+
        |      112 |  42 | '-' |  56 |
        +----------+-----+-----+-----+

    """

    return AddFieldsView(table, field_defs, missing=missing)


Table.addfields = addfields


class AddFieldsView(Table):

    def __init__(self, source, field_defs, missing=None):
        # ensure rows are all the same length
        self.source = stack(source, missing=missing)
        # convert tuples to FieldDefinitions, if necessary
        self.field_defs = field_defs

    def __iter__(self):
        return iteraddfields(self.source, self.field_defs)


def iteraddfields(source, field_defs):
    it = iter(source)
    try:
        hdr = next(it)
    except StopIteration:
        hdr = []
    flds = list(map(text_type, hdr))

    # initialize output fields and indices
    outhdr = list(hdr)
    value_indexes = []

    for fdef in field_defs:
        # determine the defined field index
        if len(fdef) == 2:
            name, value = fdef
            index = len(outhdr)
        else:
            name, value, index = fdef

        # insert the name into the header at the appropriate index
        outhdr.insert(index, name)

        # remember the value/index pairs for later
        value_indexes.append((value, index))
    yield tuple(outhdr)

    for row in it:
        outrow = list(row)

        # add each defined field into the row at the appropriate index
        for value, index in value_indexes:
            if callable(value):
                # wrap row as record if using calculated value
                row = Record(row, flds)
                v = value(row)
                outrow.insert(index, v)
            else:
                outrow.insert(index, value)

        yield tuple(outrow)


[docs]def rowslice(table, *sliceargs):
    """
    Choose a subsequence of data rows. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar'],
        ...           ['a', 1],
        ...           ['b', 2],
        ...           ['c', 5],
        ...           ['d', 7],
        ...           ['f', 42]]
        >>> table2 = etl.rowslice(table1, 2)
        >>> table2
        +-----+-----+
        | foo | bar |
        +=====+=====+
        | 'a' |   1 |
        +-----+-----+
        | 'b' |   2 |
        +-----+-----+

        >>> table3 = etl.rowslice(table1, 1, 4)
        >>> table3
        +-----+-----+
        | foo | bar |
        +=====+=====+
        | 'b' |   2 |
        +-----+-----+
        | 'c' |   5 |
        +-----+-----+
        | 'd' |   7 |
        +-----+-----+

        >>> table4 = etl.rowslice(table1, 0, 5, 2)
        >>> table4
        +-----+-----+
        | foo | bar |
        +=====+=====+
        | 'a' |   1 |
        +-----+-----+
        | 'c' |   5 |
        +-----+-----+
        | 'f' |  42 |
        +-----+-----+

    Positional arguments are used to slice the data rows. The `sliceargs` are
    passed through to :func:`itertools.islice`.

    See also :func:`petl.transform.basics.head`,
    :func:`petl.transform.basics.tail`.

    """

    return RowSliceView(table, *sliceargs)


Table.rowslice = rowslice


class RowSliceView(Table):

    def __init__(self, source, *sliceargs):
        self.source = source
        if not sliceargs:
            self.sliceargs = (None,)
        else:
            self.sliceargs = sliceargs

    def __iter__(self):
        return iterrowslice(self.source, self.sliceargs)


def iterrowslice(source, sliceargs):
    it = iter(source)
    try:
        yield tuple(next(it))  # fields
    except StopIteration:
        return
    for row in islice(it, *sliceargs):
        yield tuple(row)


[docs]def head(table, n=5):
    """
    Select the first `n` data rows. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar'],
        ...           ['a', 1],
        ...           ['b', 2],
        ...           ['c', 5],
        ...           ['d', 7],
        ...           ['f', 42],
        ...           ['f', 3],
        ...           ['h', 90]]
        >>> table2 = etl.head(table1, 4)
        >>> table2
        +-----+-----+
        | foo | bar |
        +=====+=====+
        | 'a' |   1 |
        +-----+-----+
        | 'b' |   2 |
        +-----+-----+
        | 'c' |   5 |
        +-----+-----+
        | 'd' |   7 |
        +-----+-----+

    See also :func:`petl.transform.basics.tail`,
    :func:`petl.transform.basics.rowslice`.

    """

    return rowslice(table, n)


Table.head = head


[docs]def tail(table, n=5):
    """
    Select the last `n` data rows. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar'],
        ...           ['a', 1],
        ...           ['b', 2],
        ...           ['c', 5],
        ...           ['d', 7],
        ...           ['f', 42],
        ...           ['f', 3],
        ...           ['h', 90],
        ...           ['k', 12],
        ...           ['l', 77],
        ...           ['q', 2]]
        >>> table2 = etl.tail(table1, 4)
        >>> table2
        +-----+-----+
        | foo | bar |
        +=====+=====+
        | 'h' |  90 |
        +-----+-----+
        | 'k' |  12 |
        +-----+-----+
        | 'l' |  77 |
        +-----+-----+
        | 'q' |   2 |
        +-----+-----+

    See also :func:`petl.transform.basics.head`,
    :func:`petl.transform.basics.rowslice`.

    """

    return TailView(table, n)


Table.tail = tail


class TailView(Table):

    def __init__(self, source, n):
        self.source = source
        self.n = n

    def __iter__(self):
        return itertail(self.source, self.n)


def itertail(source, n):
    it = iter(source)
    try:
        yield tuple(next(it))  # fields
    except StopIteration:
        return  # stop generating
    cache = deque()
    for row in it:
        cache.append(row)
        if len(cache) > n:
            cache.popleft()
    for row in cache:
        yield tuple(row)


[docs]def skipcomments(table, prefix):
    """
    Skip any row where the first value is a string and starts with
    `prefix`. E.g.::

        >>> import petl as etl
        >>> table1 = [['##aaa', 'bbb', 'ccc'],
        ...           ['##mmm',],
        ...           ['#foo', 'bar'],
        ...           ['##nnn', 1],
        ...           ['a', 1],
        ...           ['b', 2]]
        >>> table2 = etl.skipcomments(table1, '##')
        >>> table2
        +------+-----+
        | #foo | bar |
        +======+=====+
        | 'a'  |   1 |
        +------+-----+
        | 'b'  |   2 |
        +------+-----+

    Use the `prefix` parameter to determine which string to consider as
    indicating a comment.

    """

    return SkipCommentsView(table, prefix)


Table.skipcomments = skipcomments


class SkipCommentsView(Table):

    def __init__(self, source, prefix):
        self.source = source
        self.prefix = prefix

    def __iter__(self):
        return iterskipcomments(self.source, self.prefix)


def iterskipcomments(source, prefix):
    return (row for row in source
            if (len(row) > 0
                and not(isinstance(row[0], string_types)
                and row[0].startswith(prefix))))


[docs]def movefield(table, field, index):
    """
    Move a field to a new position.

    """

    return MoveFieldView(table, field, index)


Table.movefield = movefield


class MoveFieldView(Table):

    def __init__(self, table, field, index, missing=None):
        self.table = table
        self.field = field
        self.index = index
        self.missing = missing

    def __iter__(self):
        it = iter(self.table)

        # determine output fields
        try:
            hdr = next(it)
        except StopIteration:
            hdr = []
        outhdr = [f for f in hdr if f != self.field]
        outhdr.insert(self.index, self.field)
        yield tuple(outhdr)

        # define a function to transform each row in the source data
        # according to the field selection
        outflds = list(map(str, outhdr))
        indices = asindices(hdr, outflds)
        transform = rowgetter(*indices)

        # construct the transformed data
        for row in it:
            try:
                yield transform(row)
            except IndexError:
                # row is short, let's be kind and fill in any missing fields
                yield tuple(row[i] if i < len(row) else self.missing
                            for i in indices)


[docs]def annex(*tables, **kwargs):
    """
    Join two or more tables by row order. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar'],
        ...           ['A', 9],
        ...           ['C', 2],
        ...           ['F', 1]]
        >>> table2 = [['foo', 'baz'],
        ...           ['B', 3],
        ...           ['D', 10]]
        >>> table3 = etl.annex(table1, table2)
        >>> table3
        +-----+-----+------+------+
        | foo | bar | foo  | baz  |
        +=====+=====+======+======+
        | 'A' |   9 | 'B'  |    3 |
        +-----+-----+------+------+
        | 'C' |   2 | 'D'  |   10 |
        +-----+-----+------+------+
        | 'F' |   1 | None | None |
        +-----+-----+------+------+

    See also :func:`petl.transform.joins.join`.

    """

    return AnnexView(tables, **kwargs)


Table.annex = annex


class AnnexView(Table):

    def __init__(self, tables, missing=None):
        self.tables = tables
        self.missing = missing

    def __iter__(self):
        return iterannex(self.tables, self.missing)


def iterannex(tables, missing):
    its = [iter(t) for t in tables]
    hdrs = []
    for it in its:
        try:
            hdrs.append(next(it))
        except StopIteration:
            hdrs.append([])
    outhdr = tuple(chain(*hdrs))
    yield outhdr
    for rows in izip_longest(*its):
        outrow = list()
        for i, row in enumerate(rows):
            lh = len(hdrs[i])
            if row is None:  # handle uneven length tables
                row = [missing] * len(hdrs[i])
            else:
                lr = len(row)
                if lr < lh:  # handle short rows
                    row = list(row)
                    row.extend([missing] * (lh-lr))
                elif lr > lh:  # handle long rows
                    row = row[:lh]
            outrow.extend(row)
        yield tuple(outrow)


[docs]def addrownumbers(table, start=1, step=1, field='row'):
    """
    Add a field of row numbers. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar'],
        ...           ['A', 9],
        ...           ['C', 2],
        ...           ['F', 1]]
        >>> table2 = etl.addrownumbers(table1)
        >>> table2
        +-----+-----+-----+
        | row | foo | bar |
        +=====+=====+=====+
        |   1 | 'A' |   9 |
        +-----+-----+-----+
        |   2 | 'C' |   2 |
        +-----+-----+-----+
        |   3 | 'F' |   1 |
        +-----+-----+-----+

    Parameters `start` and `step` control the numbering.

    """

    return AddRowNumbersView(table, start, step, field)


Table.addrownumbers = addrownumbers


class AddRowNumbersView(Table):

    def __init__(self, table, start=1, step=1, field='row'):
        self.table = table
        self.start = start
        self.step = step
        self.field = field

    def __iter__(self):
        return iteraddrownumbers(self.table, self.start, self.step, self.field)


def iteraddrownumbers(table, start, step, field):
    it = iter(table)
    try:
        hdr = next(it)
    except StopIteration:
        hdr = []
    outhdr = [field]
    outhdr.extend(hdr)
    yield tuple(outhdr)
    for row, n in izip(it, count(start, step)):
        outrow = [n]
        outrow.extend(row)
        yield tuple(outrow)


[docs]def addcolumn(table, field, col, index=None, missing=None):
    """
    Add a column of data to the table. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar'],
        ...           ['A', 1],
        ...           ['B', 2]]
        >>> col = [True, False]
        >>> table2 = etl.addcolumn(table1, 'baz', col)
        >>> table2
        +-----+-----+-------+
        | foo | bar | baz   |
        +=====+=====+=======+
        | 'A' |   1 | True  |
        +-----+-----+-------+
        | 'B' |   2 | False |
        +-----+-----+-------+

    Use the `index` parameter to control the position of the new column.

    """

    return AddColumnView(table, field, col, index=index, missing=missing)


Table.addcolumn = addcolumn


class AddColumnView(Table):

    def __init__(self, table, field, col, index=None, missing=None):
        self._table = table
        self._field = field
        self._col = col
        self._index = index
        self._missing = missing

    def __iter__(self):
        return iteraddcolumn(self._table, self._field, self._col,
                             self._index, self._missing)


def iteraddcolumn(table, field, col, index, missing):
    it = iter(table)
    try:
        hdr = next(it)
    except StopIteration:
        hdr = []

    # determine position of new column
    if index is None:
        index = len(hdr)

    # construct output header
    outhdr = list(hdr)
    outhdr.insert(index, field)
    yield tuple(outhdr)

    # construct output data
    for row, val in izip_longest(it, col, fillvalue=missing):
        # run out of rows?
        if row == missing:
            row = [missing] * len(hdr)
        outrow = list(row)
        outrow.insert(index, val)
        yield tuple(outrow)


class TransformError(Exception):
    pass


[docs]def addfieldusingcontext(table, field, query):
    """
    Like :func:`petl.transform.basics.addfield` but the `query` function is
    passed the previous, current and next rows, so values may be calculated
    based on data in adjacent rows. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar'],
        ...           ['A', 1],
        ...           ['B', 4],
        ...           ['C', 5],
        ...           ['D', 9]]
        >>> def upstream(prv, cur, nxt):
        ...     if prv is None:
        ...         return None
        ...     else:
        ...         return cur.bar - prv.bar
        ...
        >>> def downstream(prv, cur, nxt):
        ...     if nxt is None:
        ...         return None
        ...     else:
        ...         return nxt.bar - cur.bar
        ...
        >>> table2 = etl.addfieldusingcontext(table1, 'baz', upstream)
        >>> table3 = etl.addfieldusingcontext(table2, 'quux', downstream)
        >>> table3
        +-----+-----+------+------+
        | foo | bar | baz  | quux |
        +=====+=====+======+======+
        | 'A' |   1 | None |    3 |
        +-----+-----+------+------+
        | 'B' |   4 |    3 |    1 |
        +-----+-----+------+------+
        | 'C' |   5 |    1 |    4 |
        +-----+-----+------+------+
        | 'D' |   9 |    4 | None |
        +-----+-----+------+------+

    The `field` parameter is the name of the field to be added. The `query`
    parameter is a function operating on the current, previous and next rows
    and returning the value.

    """

    return AddFieldUsingContextView(table, field, query)


Table.addfieldusingcontext = addfieldusingcontext


class AddFieldUsingContextView(Table):

    def __init__(self, table, field, query):
        self.table = table
        self.field = field
        self.query = query

    def __iter__(self):
        return iteraddfieldusingcontext(self.table, self.field, self.query)


def iteraddfieldusingcontext(table, field, query):
    it = iter(table)
    try:
        hdr = tuple(next(it))
    except StopIteration:
        hdr = ()
    flds = list(map(text_type, hdr))
    yield hdr + (field,)
    flds.append(field)
    it = (Record(row, flds) for row in it)
    prv = None
    try:
        cur = next(it)
    except StopIteration:
        return  # no more items
    for nxt in it:
        v = query(prv, cur, nxt)
        yield tuple(cur) + (v,)
        prv = Record(tuple(cur) + (v,), flds)
        cur = nxt
    # handle last row
    v = query(prv, cur, None)
    yield tuple(cur) + (v,)