Source code for google.cloud.bigtable.row_filters

# Copyright 2016 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Filters for Google Cloud Bigtable Row classes."""


from google.cloud._helpers import _microseconds_from_datetime
from google.cloud._helpers import _to_bytes
from google.cloud.bigtable._generated import (
    data_pb2 as data_v2_pb2)


[docs]class RowFilter(object): """Basic filter to apply to cells in a row. These values can be combined via :class:`RowFilterChain`, :class:`RowFilterUnion` and :class:`ConditionalRowFilter`. .. note:: This class is a do-nothing base class for all row filters. """ def __ne__(self, other): return not self.__eq__(other)
class _BoolFilter(RowFilter): """Row filter that uses a boolean flag. :type flag: bool :param flag: An indicator if a setting is turned on or off. """ def __init__(self, flag): self.flag = flag def __eq__(self, other): if not isinstance(other, self.__class__): return False return other.flag == self.flag
[docs]class SinkFilter(_BoolFilter): """Advanced row filter to skip parent filters. :type flag: bool :param flag: ADVANCED USE ONLY. Hook for introspection into the row filter. Outputs all cells directly to the output of the read rather than to any parent filter. Cannot be used within the ``predicate_filter``, ``true_filter``, or ``false_filter`` of a :class:`ConditionalRowFilter`. """
[docs] def to_pb(self): """Converts the row filter to a protobuf. :rtype: :class:`.data_v2_pb2.RowFilter` :returns: The converted current object. """ return data_v2_pb2.RowFilter(sink=self.flag)
[docs]class PassAllFilter(_BoolFilter): """Row filter equivalent to not filtering at all. :type flag: bool :param flag: Matches all cells, regardless of input. Functionally equivalent to leaving ``filter`` unset, but included for completeness. """
[docs] def to_pb(self): """Converts the row filter to a protobuf. :rtype: :class:`.data_v2_pb2.RowFilter` :returns: The converted current object. """ return data_v2_pb2.RowFilter(pass_all_filter=self.flag)
[docs]class BlockAllFilter(_BoolFilter): """Row filter that doesn't match any cells. :type flag: bool :param flag: Does not match any cells, regardless of input. Useful for temporarily disabling just part of a filter. """
[docs] def to_pb(self): """Converts the row filter to a protobuf. :rtype: :class:`.data_v2_pb2.RowFilter` :returns: The converted current object. """ return data_v2_pb2.RowFilter(block_all_filter=self.flag)
class _RegexFilter(RowFilter): """Row filter that uses a regular expression. The ``regex`` must be valid RE2 patterns. See Google's `RE2 reference`_ for the accepted syntax. .. _RE2 reference: https://github.com/google/re2/wiki/Syntax :type regex: bytes or str :param regex: A regular expression (RE2) for some row filter. """ def __init__(self, regex): self.regex = _to_bytes(regex) def __eq__(self, other): if not isinstance(other, self.__class__): return False return other.regex == self.regex
[docs]class RowKeyRegexFilter(_RegexFilter): """Row filter for a row key regular expression. The ``regex`` must be valid RE2 patterns. See Google's `RE2 reference`_ for the accepted syntax. .. _RE2 reference: https://github.com/google/re2/wiki/Syntax .. note:: Special care need be used with the expression used. Since each of these properties can contain arbitrary bytes, the ``\\C`` escape sequence must be used if a true wildcard is desired. The ``.`` character will not match the new line character ``\\n``, which may be present in a binary value. :type regex: bytes :param regex: A regular expression (RE2) to match cells from rows with row keys that satisfy this regex. For a ``CheckAndMutateRowRequest``, this filter is unnecessary since the row key is already specified. """
[docs] def to_pb(self): """Converts the row filter to a protobuf. :rtype: :class:`.data_v2_pb2.RowFilter` :returns: The converted current object. """ return data_v2_pb2.RowFilter(row_key_regex_filter=self.regex)
[docs]class RowSampleFilter(RowFilter): """Matches all cells from a row with probability p. :type sample: float :param sample: The probability of matching a cell (must be in the interval ``[0, 1]``). """ def __init__(self, sample): self.sample = sample def __eq__(self, other): if not isinstance(other, self.__class__): return False return other.sample == self.sample
[docs] def to_pb(self): """Converts the row filter to a protobuf. :rtype: :class:`.data_v2_pb2.RowFilter` :returns: The converted current object. """ return data_v2_pb2.RowFilter(row_sample_filter=self.sample)
[docs]class FamilyNameRegexFilter(_RegexFilter): """Row filter for a family name regular expression. The ``regex`` must be valid RE2 patterns. See Google's `RE2 reference`_ for the accepted syntax. .. _RE2 reference: https://github.com/google/re2/wiki/Syntax :type regex: str :param regex: A regular expression (RE2) to match cells from columns in a given column family. For technical reasons, the regex must not contain the ``':'`` character, even if it is not being used as a literal. """
[docs] def to_pb(self): """Converts the row filter to a protobuf. :rtype: :class:`.data_v2_pb2.RowFilter` :returns: The converted current object. """ return data_v2_pb2.RowFilter(family_name_regex_filter=self.regex)
[docs]class ColumnQualifierRegexFilter(_RegexFilter): """Row filter for a column qualifier regular expression. The ``regex`` must be valid RE2 patterns. See Google's `RE2 reference`_ for the accepted syntax. .. _RE2 reference: https://github.com/google/re2/wiki/Syntax .. note:: Special care need be used with the expression used. Since each of these properties can contain arbitrary bytes, the ``\\C`` escape sequence must be used if a true wildcard is desired. The ``.`` character will not match the new line character ``\\n``, which may be present in a binary value. :type regex: bytes :param regex: A regular expression (RE2) to match cells from column that match this regex (irrespective of column family). """
[docs] def to_pb(self): """Converts the row filter to a protobuf. :rtype: :class:`.data_v2_pb2.RowFilter` :returns: The converted current object. """ return data_v2_pb2.RowFilter(column_qualifier_regex_filter=self.regex)
[docs]class TimestampRange(object): """Range of time with inclusive lower and exclusive upper bounds. :type start: :class:`datetime.datetime` :param start: (Optional) The (inclusive) lower bound of the timestamp range. If omitted, defaults to Unix epoch. :type end: :class:`datetime.datetime` :param end: (Optional) The (exclusive) upper bound of the timestamp range. If omitted, no upper bound is used. """ def __init__(self, start=None, end=None): self.start = start self.end = end def __eq__(self, other): if not isinstance(other, self.__class__): return False return (other.start == self.start and other.end == self.end) def __ne__(self, other): return not self.__eq__(other)
[docs] def to_pb(self): """Converts the :class:`TimestampRange` to a protobuf. :rtype: :class:`.data_v2_pb2.TimestampRange` :returns: The converted current object. """ timestamp_range_kwargs = {} if self.start is not None: timestamp_range_kwargs['start_timestamp_micros'] = ( _microseconds_from_datetime(self.start)) if self.end is not None: timestamp_range_kwargs['end_timestamp_micros'] = ( _microseconds_from_datetime(self.end)) return data_v2_pb2.TimestampRange(**timestamp_range_kwargs)
[docs]class TimestampRangeFilter(RowFilter): """Row filter that limits cells to a range of time. :type range_: :class:`TimestampRange` :param range_: Range of time that cells should match against. """ def __init__(self, range_): self.range_ = range_ def __eq__(self, other): if not isinstance(other, self.__class__): return False return other.range_ == self.range_
[docs] def to_pb(self): """Converts the row filter to a protobuf. First converts the ``range_`` on the current object to a protobuf and then uses it in the ``timestamp_range_filter`` field. :rtype: :class:`.data_v2_pb2.RowFilter` :returns: The converted current object. """ return data_v2_pb2.RowFilter( timestamp_range_filter=self.range_.to_pb())
[docs]class ColumnRangeFilter(RowFilter): """A row filter to restrict to a range of columns. Both the start and end column can be included or excluded in the range. By default, we include them both, but this can be changed with optional flags. :type column_family_id: str :param column_family_id: The column family that contains the columns. Must be of the form ``[_a-zA-Z0-9][-_.a-zA-Z0-9]*``. :type start_column: bytes :param start_column: The start of the range of columns. If no value is used, the backend applies no upper bound to the values. :type end_column: bytes :param end_column: The end of the range of columns. If no value is used, the backend applies no upper bound to the values. :type inclusive_start: bool :param inclusive_start: Boolean indicating if the start column should be included in the range (or excluded). Defaults to :data:`True` if ``start_column`` is passed and no ``inclusive_start`` was given. :type inclusive_end: bool :param inclusive_end: Boolean indicating if the end column should be included in the range (or excluded). Defaults to :data:`True` if ``end_column`` is passed and no ``inclusive_end`` was given. :raises: :class:`ValueError <exceptions.ValueError>` if ``inclusive_start`` is set but no ``start_column`` is given or if ``inclusive_end`` is set but no ``end_column`` is given """ def __init__(self, column_family_id, start_column=None, end_column=None, inclusive_start=None, inclusive_end=None): self.column_family_id = column_family_id if inclusive_start is None: inclusive_start = True elif start_column is None: raise ValueError('Inclusive start was specified but no ' 'start column was given.') self.start_column = start_column self.inclusive_start = inclusive_start if inclusive_end is None: inclusive_end = True elif end_column is None: raise ValueError('Inclusive end was specified but no ' 'end column was given.') self.end_column = end_column self.inclusive_end = inclusive_end def __eq__(self, other): if not isinstance(other, self.__class__): return False return (other.column_family_id == self.column_family_id and other.start_column == self.start_column and other.end_column == self.end_column and other.inclusive_start == self.inclusive_start and other.inclusive_end == self.inclusive_end)
[docs] def to_pb(self): """Converts the row filter to a protobuf. First converts to a :class:`.data_v2_pb2.ColumnRange` and then uses it in the ``column_range_filter`` field. :rtype: :class:`.data_v2_pb2.RowFilter` :returns: The converted current object. """ column_range_kwargs = {'family_name': self.column_family_id} if self.start_column is not None: if self.inclusive_start: key = 'start_qualifier_closed' else: key = 'start_qualifier_open' column_range_kwargs[key] = _to_bytes(self.start_column) if self.end_column is not None: if self.inclusive_end: key = 'end_qualifier_closed' else: key = 'end_qualifier_open' column_range_kwargs[key] = _to_bytes(self.end_column) column_range = data_v2_pb2.ColumnRange(**column_range_kwargs) return data_v2_pb2.RowFilter(column_range_filter=column_range)
[docs]class ValueRegexFilter(_RegexFilter): """Row filter for a value regular expression. The ``regex`` must be valid RE2 patterns. See Google's `RE2 reference`_ for the accepted syntax. .. _RE2 reference: https://github.com/google/re2/wiki/Syntax .. note:: Special care need be used with the expression used. Since each of these properties can contain arbitrary bytes, the ``\\C`` escape sequence must be used if a true wildcard is desired. The ``.`` character will not match the new line character ``\\n``, which may be present in a binary value. :type regex: bytes :param regex: A regular expression (RE2) to match cells with values that match this regex. """
[docs] def to_pb(self): """Converts the row filter to a protobuf. :rtype: :class:`.data_v2_pb2.RowFilter` :returns: The converted current object. """ return data_v2_pb2.RowFilter(value_regex_filter=self.regex)
[docs]class ValueRangeFilter(RowFilter): """A range of values to restrict to in a row filter. Will only match cells that have values in this range. Both the start and end value can be included or excluded in the range. By default, we include them both, but this can be changed with optional flags. :type start_value: bytes :param start_value: The start of the range of values. If no value is used, the backend applies no lower bound to the values. :type end_value: bytes :param end_value: The end of the range of values. If no value is used, the backend applies no upper bound to the values. :type inclusive_start: bool :param inclusive_start: Boolean indicating if the start value should be included in the range (or excluded). Defaults to :data:`True` if ``start_value`` is passed and no ``inclusive_start`` was given. :type inclusive_end: bool :param inclusive_end: Boolean indicating if the end value should be included in the range (or excluded). Defaults to :data:`True` if ``end_value`` is passed and no ``inclusive_end`` was given. :raises: :class:`ValueError <exceptions.ValueError>` if ``inclusive_start`` is set but no ``start_value`` is given or if ``inclusive_end`` is set but no ``end_value`` is given """ def __init__(self, start_value=None, end_value=None, inclusive_start=None, inclusive_end=None): if inclusive_start is None: inclusive_start = True elif start_value is None: raise ValueError('Inclusive start was specified but no ' 'start value was given.') self.start_value = start_value self.inclusive_start = inclusive_start if inclusive_end is None: inclusive_end = True elif end_value is None: raise ValueError('Inclusive end was specified but no ' 'end value was given.') self.end_value = end_value self.inclusive_end = inclusive_end def __eq__(self, other): if not isinstance(other, self.__class__): return False return (other.start_value == self.start_value and other.end_value == self.end_value and other.inclusive_start == self.inclusive_start and other.inclusive_end == self.inclusive_end)
[docs] def to_pb(self): """Converts the row filter to a protobuf. First converts to a :class:`.data_v2_pb2.ValueRange` and then uses it to create a row filter protobuf. :rtype: :class:`.data_v2_pb2.RowFilter` :returns: The converted current object. """ value_range_kwargs = {} if self.start_value is not None: if self.inclusive_start: key = 'start_value_closed' else: key = 'start_value_open' value_range_kwargs[key] = _to_bytes(self.start_value) if self.end_value is not None: if self.inclusive_end: key = 'end_value_closed' else: key = 'end_value_open' value_range_kwargs[key] = _to_bytes(self.end_value) value_range = data_v2_pb2.ValueRange(**value_range_kwargs) return data_v2_pb2.RowFilter(value_range_filter=value_range)
class _CellCountFilter(RowFilter): """Row filter that uses an integer count of cells. The cell count is used as an offset or a limit for the number of results returned. :type num_cells: int :param num_cells: An integer count / offset / limit. """ def __init__(self, num_cells): self.num_cells = num_cells def __eq__(self, other): if not isinstance(other, self.__class__): return False return other.num_cells == self.num_cells
[docs]class CellsRowOffsetFilter(_CellCountFilter): """Row filter to skip cells in a row. :type num_cells: int :param num_cells: Skips the first N cells of the row. """
[docs] def to_pb(self): """Converts the row filter to a protobuf. :rtype: :class:`.data_v2_pb2.RowFilter` :returns: The converted current object. """ return data_v2_pb2.RowFilter( cells_per_row_offset_filter=self.num_cells)
[docs]class CellsRowLimitFilter(_CellCountFilter): """Row filter to limit cells in a row. :type num_cells: int :param num_cells: Matches only the first N cells of the row. """
[docs] def to_pb(self): """Converts the row filter to a protobuf. :rtype: :class:`.data_v2_pb2.RowFilter` :returns: The converted current object. """ return data_v2_pb2.RowFilter(cells_per_row_limit_filter=self.num_cells)
[docs]class CellsColumnLimitFilter(_CellCountFilter): """Row filter to limit cells in a column. :type num_cells: int :param num_cells: Matches only the most recent N cells within each column. This filters a (family name, column) pair, based on timestamps of each cell. """
[docs] def to_pb(self): """Converts the row filter to a protobuf. :rtype: :class:`.data_v2_pb2.RowFilter` :returns: The converted current object. """ return data_v2_pb2.RowFilter( cells_per_column_limit_filter=self.num_cells)
[docs]class StripValueTransformerFilter(_BoolFilter): """Row filter that transforms cells into empty string (0 bytes). :type flag: bool :param flag: If :data:`True`, replaces each cell's value with the empty string. As the name indicates, this is more useful as a transformer than a generic query / filter. """
[docs] def to_pb(self): """Converts the row filter to a protobuf. :rtype: :class:`.data_v2_pb2.RowFilter` :returns: The converted current object. """ return data_v2_pb2.RowFilter(strip_value_transformer=self.flag)
[docs]class ApplyLabelFilter(RowFilter): """Filter to apply labels to cells. Intended to be used as an intermediate filter on a pre-existing filtered result set. This way if two sets are combined, the label can tell where the cell(s) originated.This allows the client to determine which results were produced from which part of the filter. .. note:: Due to a technical limitation of the backend, it is not currently possible to apply multiple labels to a cell. :type label: str :param label: Label to apply to cells in the output row. Values must be at most 15 characters long, and match the pattern ``[a-z0-9\\-]+``. """ def __init__(self, label): self.label = label def __eq__(self, other): if not isinstance(other, self.__class__): return False return other.label == self.label
[docs] def to_pb(self): """Converts the row filter to a protobuf. :rtype: :class:`.data_v2_pb2.RowFilter` :returns: The converted current object. """ return data_v2_pb2.RowFilter(apply_label_transformer=self.label)
class _FilterCombination(RowFilter): """Chain of row filters. Sends rows through several filters in sequence. The filters are "chained" together to process a row. After the first filter is applied, the second is applied to the filtered output and so on for subsequent filters. :type filters: list :param filters: List of :class:`RowFilter` """ def __init__(self, filters=None): if filters is None: filters = [] self.filters = filters def __eq__(self, other): if not isinstance(other, self.__class__): return False return other.filters == self.filters
[docs]class RowFilterChain(_FilterCombination): """Chain of row filters. Sends rows through several filters in sequence. The filters are "chained" together to process a row. After the first filter is applied, the second is applied to the filtered output and so on for subsequent filters. :type filters: list :param filters: List of :class:`RowFilter` """
[docs] def to_pb(self): """Converts the row filter to a protobuf. :rtype: :class:`.data_v2_pb2.RowFilter` :returns: The converted current object. """ chain = data_v2_pb2.RowFilter.Chain( filters=[row_filter.to_pb() for row_filter in self.filters]) return data_v2_pb2.RowFilter(chain=chain)
[docs]class RowFilterUnion(_FilterCombination): """Union of row filters. Sends rows through several filters simultaneously, then merges / interleaves all the filtered results together. If multiple cells are produced with the same column and timestamp, they will all appear in the output row in an unspecified mutual order. :type filters: list :param filters: List of :class:`RowFilter` """
[docs] def to_pb(self): """Converts the row filter to a protobuf. :rtype: :class:`.data_v2_pb2.RowFilter` :returns: The converted current object. """ interleave = data_v2_pb2.RowFilter.Interleave( filters=[row_filter.to_pb() for row_filter in self.filters]) return data_v2_pb2.RowFilter(interleave=interleave)
[docs]class ConditionalRowFilter(RowFilter): """Conditional row filter which exhibits ternary behavior. Executes one of two filters based on another filter. If the ``base_filter`` returns any cells in the row, then ``true_filter`` is executed. If not, then ``false_filter`` is executed. .. note:: The ``base_filter`` does not execute atomically with the true and false filters, which may lead to inconsistent or unexpected results. Additionally, executing a :class:`ConditionalRowFilter` has poor performance on the server, especially when ``false_filter`` is set. :type base_filter: :class:`RowFilter` :param base_filter: The filter to condition on before executing the true/false filters. :type true_filter: :class:`RowFilter` :param true_filter: (Optional) The filter to execute if there are any cells matching ``base_filter``. If not provided, no results will be returned in the true case. :type false_filter: :class:`RowFilter` :param false_filter: (Optional) The filter to execute if there are no cells matching ``base_filter``. If not provided, no results will be returned in the false case. """ def __init__(self, base_filter, true_filter=None, false_filter=None): self.base_filter = base_filter self.true_filter = true_filter self.false_filter = false_filter def __eq__(self, other): if not isinstance(other, self.__class__): return False return (other.base_filter == self.base_filter and other.true_filter == self.true_filter and other.false_filter == self.false_filter)
[docs] def to_pb(self): """Converts the row filter to a protobuf. :rtype: :class:`.data_v2_pb2.RowFilter` :returns: The converted current object. """ condition_kwargs = {'predicate_filter': self.base_filter.to_pb()} if self.true_filter is not None: condition_kwargs['true_filter'] = self.true_filter.to_pb() if self.false_filter is not None: condition_kwargs['false_filter'] = self.false_filter.to_pb() condition = data_v2_pb2.RowFilter.Condition(**condition_kwargs) return data_v2_pb2.RowFilter(condition=condition)