# Copyright 2016 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Filters for Google Cloud Bigtable Row classes."""
from google.cloud._helpers import _microseconds_from_datetime
from google.cloud._helpers import _to_bytes
from google.cloud.bigtable._generated import (
data_pb2 as data_v2_pb2)
[docs]class RowFilter(object):
"""Basic filter to apply to cells in a row.
These values can be combined via :class:`RowFilterChain`,
:class:`RowFilterUnion` and :class:`ConditionalRowFilter`.
.. note::
This class is a do-nothing base class for all row filters.
"""
def __ne__(self, other):
return not self.__eq__(other)
class _BoolFilter(RowFilter):
"""Row filter that uses a boolean flag.
:type flag: bool
:param flag: An indicator if a setting is turned on or off.
"""
def __init__(self, flag):
self.flag = flag
def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return other.flag == self.flag
[docs]class SinkFilter(_BoolFilter):
"""Advanced row filter to skip parent filters.
:type flag: bool
:param flag: ADVANCED USE ONLY. Hook for introspection into the row filter.
Outputs all cells directly to the output of the read rather
than to any parent filter. Cannot be used within the
``predicate_filter``, ``true_filter``, or ``false_filter``
of a :class:`ConditionalRowFilter`.
"""
[docs] def to_pb(self):
"""Converts the row filter to a protobuf.
:rtype: :class:`.data_v2_pb2.RowFilter`
:returns: The converted current object.
"""
return data_v2_pb2.RowFilter(sink=self.flag)
[docs]class PassAllFilter(_BoolFilter):
"""Row filter equivalent to not filtering at all.
:type flag: bool
:param flag: Matches all cells, regardless of input. Functionally
equivalent to leaving ``filter`` unset, but included for
completeness.
"""
[docs] def to_pb(self):
"""Converts the row filter to a protobuf.
:rtype: :class:`.data_v2_pb2.RowFilter`
:returns: The converted current object.
"""
return data_v2_pb2.RowFilter(pass_all_filter=self.flag)
[docs]class BlockAllFilter(_BoolFilter):
"""Row filter that doesn't match any cells.
:type flag: bool
:param flag: Does not match any cells, regardless of input. Useful for
temporarily disabling just part of a filter.
"""
[docs] def to_pb(self):
"""Converts the row filter to a protobuf.
:rtype: :class:`.data_v2_pb2.RowFilter`
:returns: The converted current object.
"""
return data_v2_pb2.RowFilter(block_all_filter=self.flag)
class _RegexFilter(RowFilter):
"""Row filter that uses a regular expression.
The ``regex`` must be valid RE2 patterns. See Google's
`RE2 reference`_ for the accepted syntax.
.. _RE2 reference: https://github.com/google/re2/wiki/Syntax
:type regex: bytes or str
:param regex: A regular expression (RE2) for some row filter.
"""
def __init__(self, regex):
self.regex = _to_bytes(regex)
def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return other.regex == self.regex
[docs]class RowKeyRegexFilter(_RegexFilter):
"""Row filter for a row key regular expression.
The ``regex`` must be valid RE2 patterns. See Google's
`RE2 reference`_ for the accepted syntax.
.. _RE2 reference: https://github.com/google/re2/wiki/Syntax
.. note::
Special care need be used with the expression used. Since
each of these properties can contain arbitrary bytes, the ``\\C``
escape sequence must be used if a true wildcard is desired. The ``.``
character will not match the new line character ``\\n``, which may be
present in a binary value.
:type regex: bytes
:param regex: A regular expression (RE2) to match cells from rows with row
keys that satisfy this regex. For a
``CheckAndMutateRowRequest``, this filter is unnecessary
since the row key is already specified.
"""
[docs] def to_pb(self):
"""Converts the row filter to a protobuf.
:rtype: :class:`.data_v2_pb2.RowFilter`
:returns: The converted current object.
"""
return data_v2_pb2.RowFilter(row_key_regex_filter=self.regex)
[docs]class RowSampleFilter(RowFilter):
"""Matches all cells from a row with probability p.
:type sample: float
:param sample: The probability of matching a cell (must be in the
interval ``[0, 1]``).
"""
def __init__(self, sample):
self.sample = sample
def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return other.sample == self.sample
[docs] def to_pb(self):
"""Converts the row filter to a protobuf.
:rtype: :class:`.data_v2_pb2.RowFilter`
:returns: The converted current object.
"""
return data_v2_pb2.RowFilter(row_sample_filter=self.sample)
[docs]class FamilyNameRegexFilter(_RegexFilter):
"""Row filter for a family name regular expression.
The ``regex`` must be valid RE2 patterns. See Google's
`RE2 reference`_ for the accepted syntax.
.. _RE2 reference: https://github.com/google/re2/wiki/Syntax
:type regex: str
:param regex: A regular expression (RE2) to match cells from columns in a
given column family. For technical reasons, the regex must
not contain the ``':'`` character, even if it is not being
used as a literal.
"""
[docs] def to_pb(self):
"""Converts the row filter to a protobuf.
:rtype: :class:`.data_v2_pb2.RowFilter`
:returns: The converted current object.
"""
return data_v2_pb2.RowFilter(family_name_regex_filter=self.regex)
[docs]class ColumnQualifierRegexFilter(_RegexFilter):
"""Row filter for a column qualifier regular expression.
The ``regex`` must be valid RE2 patterns. See Google's
`RE2 reference`_ for the accepted syntax.
.. _RE2 reference: https://github.com/google/re2/wiki/Syntax
.. note::
Special care need be used with the expression used. Since
each of these properties can contain arbitrary bytes, the ``\\C``
escape sequence must be used if a true wildcard is desired. The ``.``
character will not match the new line character ``\\n``, which may be
present in a binary value.
:type regex: bytes
:param regex: A regular expression (RE2) to match cells from column that
match this regex (irrespective of column family).
"""
[docs] def to_pb(self):
"""Converts the row filter to a protobuf.
:rtype: :class:`.data_v2_pb2.RowFilter`
:returns: The converted current object.
"""
return data_v2_pb2.RowFilter(column_qualifier_regex_filter=self.regex)
[docs]class TimestampRange(object):
"""Range of time with inclusive lower and exclusive upper bounds.
:type start: :class:`datetime.datetime`
:param start: (Optional) The (inclusive) lower bound of the timestamp
range. If omitted, defaults to Unix epoch.
:type end: :class:`datetime.datetime`
:param end: (Optional) The (exclusive) upper bound of the timestamp
range. If omitted, no upper bound is used.
"""
def __init__(self, start=None, end=None):
self.start = start
self.end = end
def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return (other.start == self.start and
other.end == self.end)
def __ne__(self, other):
return not self.__eq__(other)
[docs] def to_pb(self):
"""Converts the :class:`TimestampRange` to a protobuf.
:rtype: :class:`.data_v2_pb2.TimestampRange`
:returns: The converted current object.
"""
timestamp_range_kwargs = {}
if self.start is not None:
timestamp_range_kwargs['start_timestamp_micros'] = (
_microseconds_from_datetime(self.start))
if self.end is not None:
timestamp_range_kwargs['end_timestamp_micros'] = (
_microseconds_from_datetime(self.end))
return data_v2_pb2.TimestampRange(**timestamp_range_kwargs)
[docs]class TimestampRangeFilter(RowFilter):
"""Row filter that limits cells to a range of time.
:type range_: :class:`TimestampRange`
:param range_: Range of time that cells should match against.
"""
def __init__(self, range_):
self.range_ = range_
def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return other.range_ == self.range_
[docs] def to_pb(self):
"""Converts the row filter to a protobuf.
First converts the ``range_`` on the current object to a protobuf and
then uses it in the ``timestamp_range_filter`` field.
:rtype: :class:`.data_v2_pb2.RowFilter`
:returns: The converted current object.
"""
return data_v2_pb2.RowFilter(
timestamp_range_filter=self.range_.to_pb())
[docs]class ColumnRangeFilter(RowFilter):
"""A row filter to restrict to a range of columns.
Both the start and end column can be included or excluded in the range.
By default, we include them both, but this can be changed with optional
flags.
:type column_family_id: str
:param column_family_id: The column family that contains the columns. Must
be of the form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``.
:type start_column: bytes
:param start_column: The start of the range of columns. If no value is
used, the backend applies no upper bound to the
values.
:type end_column: bytes
:param end_column: The end of the range of columns. If no value is used,
the backend applies no upper bound to the values.
:type inclusive_start: bool
:param inclusive_start: Boolean indicating if the start column should be
included in the range (or excluded). Defaults
to :data:`True` if ``start_column`` is passed and
no ``inclusive_start`` was given.
:type inclusive_end: bool
:param inclusive_end: Boolean indicating if the end column should be
included in the range (or excluded). Defaults
to :data:`True` if ``end_column`` is passed and
no ``inclusive_end`` was given.
:raises: :class:`ValueError <exceptions.ValueError>` if ``inclusive_start``
is set but no ``start_column`` is given or if ``inclusive_end``
is set but no ``end_column`` is given
"""
def __init__(self, column_family_id, start_column=None, end_column=None,
inclusive_start=None, inclusive_end=None):
self.column_family_id = column_family_id
if inclusive_start is None:
inclusive_start = True
elif start_column is None:
raise ValueError('Inclusive start was specified but no '
'start column was given.')
self.start_column = start_column
self.inclusive_start = inclusive_start
if inclusive_end is None:
inclusive_end = True
elif end_column is None:
raise ValueError('Inclusive end was specified but no '
'end column was given.')
self.end_column = end_column
self.inclusive_end = inclusive_end
def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return (other.column_family_id == self.column_family_id and
other.start_column == self.start_column and
other.end_column == self.end_column and
other.inclusive_start == self.inclusive_start and
other.inclusive_end == self.inclusive_end)
[docs] def to_pb(self):
"""Converts the row filter to a protobuf.
First converts to a :class:`.data_v2_pb2.ColumnRange` and then uses it
in the ``column_range_filter`` field.
:rtype: :class:`.data_v2_pb2.RowFilter`
:returns: The converted current object.
"""
column_range_kwargs = {'family_name': self.column_family_id}
if self.start_column is not None:
if self.inclusive_start:
key = 'start_qualifier_closed'
else:
key = 'start_qualifier_open'
column_range_kwargs[key] = _to_bytes(self.start_column)
if self.end_column is not None:
if self.inclusive_end:
key = 'end_qualifier_closed'
else:
key = 'end_qualifier_open'
column_range_kwargs[key] = _to_bytes(self.end_column)
column_range = data_v2_pb2.ColumnRange(**column_range_kwargs)
return data_v2_pb2.RowFilter(column_range_filter=column_range)
[docs]class ValueRegexFilter(_RegexFilter):
"""Row filter for a value regular expression.
The ``regex`` must be valid RE2 patterns. See Google's
`RE2 reference`_ for the accepted syntax.
.. _RE2 reference: https://github.com/google/re2/wiki/Syntax
.. note::
Special care need be used with the expression used. Since
each of these properties can contain arbitrary bytes, the ``\\C``
escape sequence must be used if a true wildcard is desired. The ``.``
character will not match the new line character ``\\n``, which may be
present in a binary value.
:type regex: bytes
:param regex: A regular expression (RE2) to match cells with values that
match this regex.
"""
[docs] def to_pb(self):
"""Converts the row filter to a protobuf.
:rtype: :class:`.data_v2_pb2.RowFilter`
:returns: The converted current object.
"""
return data_v2_pb2.RowFilter(value_regex_filter=self.regex)
[docs]class ValueRangeFilter(RowFilter):
"""A range of values to restrict to in a row filter.
Will only match cells that have values in this range.
Both the start and end value can be included or excluded in the range.
By default, we include them both, but this can be changed with optional
flags.
:type start_value: bytes
:param start_value: The start of the range of values. If no value is used,
the backend applies no lower bound to the values.
:type end_value: bytes
:param end_value: The end of the range of values. If no value is used,
the backend applies no upper bound to the values.
:type inclusive_start: bool
:param inclusive_start: Boolean indicating if the start value should be
included in the range (or excluded). Defaults
to :data:`True` if ``start_value`` is passed and
no ``inclusive_start`` was given.
:type inclusive_end: bool
:param inclusive_end: Boolean indicating if the end value should be
included in the range (or excluded). Defaults
to :data:`True` if ``end_value`` is passed and
no ``inclusive_end`` was given.
:raises: :class:`ValueError <exceptions.ValueError>` if ``inclusive_start``
is set but no ``start_value`` is given or if ``inclusive_end``
is set but no ``end_value`` is given
"""
def __init__(self, start_value=None, end_value=None,
inclusive_start=None, inclusive_end=None):
if inclusive_start is None:
inclusive_start = True
elif start_value is None:
raise ValueError('Inclusive start was specified but no '
'start value was given.')
self.start_value = start_value
self.inclusive_start = inclusive_start
if inclusive_end is None:
inclusive_end = True
elif end_value is None:
raise ValueError('Inclusive end was specified but no '
'end value was given.')
self.end_value = end_value
self.inclusive_end = inclusive_end
def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return (other.start_value == self.start_value and
other.end_value == self.end_value and
other.inclusive_start == self.inclusive_start and
other.inclusive_end == self.inclusive_end)
[docs] def to_pb(self):
"""Converts the row filter to a protobuf.
First converts to a :class:`.data_v2_pb2.ValueRange` and then uses
it to create a row filter protobuf.
:rtype: :class:`.data_v2_pb2.RowFilter`
:returns: The converted current object.
"""
value_range_kwargs = {}
if self.start_value is not None:
if self.inclusive_start:
key = 'start_value_closed'
else:
key = 'start_value_open'
value_range_kwargs[key] = _to_bytes(self.start_value)
if self.end_value is not None:
if self.inclusive_end:
key = 'end_value_closed'
else:
key = 'end_value_open'
value_range_kwargs[key] = _to_bytes(self.end_value)
value_range = data_v2_pb2.ValueRange(**value_range_kwargs)
return data_v2_pb2.RowFilter(value_range_filter=value_range)
class _CellCountFilter(RowFilter):
"""Row filter that uses an integer count of cells.
The cell count is used as an offset or a limit for the number
of results returned.
:type num_cells: int
:param num_cells: An integer count / offset / limit.
"""
def __init__(self, num_cells):
self.num_cells = num_cells
def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return other.num_cells == self.num_cells
[docs]class CellsRowOffsetFilter(_CellCountFilter):
"""Row filter to skip cells in a row.
:type num_cells: int
:param num_cells: Skips the first N cells of the row.
"""
[docs] def to_pb(self):
"""Converts the row filter to a protobuf.
:rtype: :class:`.data_v2_pb2.RowFilter`
:returns: The converted current object.
"""
return data_v2_pb2.RowFilter(
cells_per_row_offset_filter=self.num_cells)
[docs]class CellsRowLimitFilter(_CellCountFilter):
"""Row filter to limit cells in a row.
:type num_cells: int
:param num_cells: Matches only the first N cells of the row.
"""
[docs] def to_pb(self):
"""Converts the row filter to a protobuf.
:rtype: :class:`.data_v2_pb2.RowFilter`
:returns: The converted current object.
"""
return data_v2_pb2.RowFilter(cells_per_row_limit_filter=self.num_cells)
[docs]class CellsColumnLimitFilter(_CellCountFilter):
"""Row filter to limit cells in a column.
:type num_cells: int
:param num_cells: Matches only the most recent N cells within each column.
This filters a (family name, column) pair, based on
timestamps of each cell.
"""
[docs] def to_pb(self):
"""Converts the row filter to a protobuf.
:rtype: :class:`.data_v2_pb2.RowFilter`
:returns: The converted current object.
"""
return data_v2_pb2.RowFilter(
cells_per_column_limit_filter=self.num_cells)
[docs]class ApplyLabelFilter(RowFilter):
"""Filter to apply labels to cells.
Intended to be used as an intermediate filter on a pre-existing filtered
result set. This way if two sets are combined, the label can tell where
the cell(s) originated.This allows the client to determine which results
were produced from which part of the filter.
.. note::
Due to a technical limitation of the backend, it is not currently
possible to apply multiple labels to a cell.
:type label: str
:param label: Label to apply to cells in the output row. Values must be
at most 15 characters long, and match the pattern
``[a-z0-9\\-]+``.
"""
def __init__(self, label):
self.label = label
def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return other.label == self.label
[docs] def to_pb(self):
"""Converts the row filter to a protobuf.
:rtype: :class:`.data_v2_pb2.RowFilter`
:returns: The converted current object.
"""
return data_v2_pb2.RowFilter(apply_label_transformer=self.label)
class _FilterCombination(RowFilter):
"""Chain of row filters.
Sends rows through several filters in sequence. The filters are "chained"
together to process a row. After the first filter is applied, the second
is applied to the filtered output and so on for subsequent filters.
:type filters: list
:param filters: List of :class:`RowFilter`
"""
def __init__(self, filters=None):
if filters is None:
filters = []
self.filters = filters
def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return other.filters == self.filters
[docs]class RowFilterChain(_FilterCombination):
"""Chain of row filters.
Sends rows through several filters in sequence. The filters are "chained"
together to process a row. After the first filter is applied, the second
is applied to the filtered output and so on for subsequent filters.
:type filters: list
:param filters: List of :class:`RowFilter`
"""
[docs] def to_pb(self):
"""Converts the row filter to a protobuf.
:rtype: :class:`.data_v2_pb2.RowFilter`
:returns: The converted current object.
"""
chain = data_v2_pb2.RowFilter.Chain(
filters=[row_filter.to_pb() for row_filter in self.filters])
return data_v2_pb2.RowFilter(chain=chain)
[docs]class RowFilterUnion(_FilterCombination):
"""Union of row filters.
Sends rows through several filters simultaneously, then
merges / interleaves all the filtered results together.
If multiple cells are produced with the same column and timestamp,
they will all appear in the output row in an unspecified mutual order.
:type filters: list
:param filters: List of :class:`RowFilter`
"""
[docs] def to_pb(self):
"""Converts the row filter to a protobuf.
:rtype: :class:`.data_v2_pb2.RowFilter`
:returns: The converted current object.
"""
interleave = data_v2_pb2.RowFilter.Interleave(
filters=[row_filter.to_pb() for row_filter in self.filters])
return data_v2_pb2.RowFilter(interleave=interleave)
[docs]class ConditionalRowFilter(RowFilter):
"""Conditional row filter which exhibits ternary behavior.
Executes one of two filters based on another filter. If the ``base_filter``
returns any cells in the row, then ``true_filter`` is executed. If not,
then ``false_filter`` is executed.
.. note::
The ``base_filter`` does not execute atomically with the true and false
filters, which may lead to inconsistent or unexpected results.
Additionally, executing a :class:`ConditionalRowFilter` has poor
performance on the server, especially when ``false_filter`` is set.
:type base_filter: :class:`RowFilter`
:param base_filter: The filter to condition on before executing the
true/false filters.
:type true_filter: :class:`RowFilter`
:param true_filter: (Optional) The filter to execute if there are any cells
matching ``base_filter``. If not provided, no results
will be returned in the true case.
:type false_filter: :class:`RowFilter`
:param false_filter: (Optional) The filter to execute if there are no cells
matching ``base_filter``. If not provided, no results
will be returned in the false case.
"""
def __init__(self, base_filter, true_filter=None, false_filter=None):
self.base_filter = base_filter
self.true_filter = true_filter
self.false_filter = false_filter
def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
return (other.base_filter == self.base_filter and
other.true_filter == self.true_filter and
other.false_filter == self.false_filter)
[docs] def to_pb(self):
"""Converts the row filter to a protobuf.
:rtype: :class:`.data_v2_pb2.RowFilter`
:returns: The converted current object.
"""
condition_kwargs = {'predicate_filter': self.base_filter.to_pb()}
if self.true_filter is not None:
condition_kwargs['true_filter'] = self.true_filter.to_pb()
if self.false_filter is not None:
condition_kwargs['false_filter'] = self.false_filter.to_pb()
condition = data_v2_pb2.RowFilter.Condition(**condition_kwargs)
return data_v2_pb2.RowFilter(condition=condition)