Source code for s3vectorm.metadata

# -*- coding: utf-8 -*-

"""
S3 Vector Metadata Query Framework

This module provides a framework for building type-safe query expressions
for S3 vector metadata filtering. It supports AWS S3 vector search metadata
filtering operators and allows building complex nested queries using Python
operators.

The framework is designed with the following principles:
- Type safety through dataclasses and type hints
- Pythonic query building using operator overloading (&, |)
- Support for inheritance to create hierarchical metadata models
- Clean separation between data models and query logic

Example:
    >>> class DocumentMeta(BaseMetadata):
    ...     document_id = MetaKey()
    ...     chunk_seq = MetaKey()
    ...
    >>> meta = DocumentMeta()
    >>> query = meta.document_id.eq("doc-1") & meta.chunk_seq.gt(5)
    >>> query.to_doc()
    {"$and": [{"document_id": {"$eq": "doc-1"}}, {"chunk_seq": {"$gt": 5}}]}
"""

import typing as T
import enum
import dataclasses


[docs] class OperatorEnum(str, enum.Enum): """ Enumeration of supported query operators for S3 vector metadata filtering. These operators correspond to the AWS S3 vector metadata filtering operators as documented in the AWS S3 User Guide. Reference: https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors-metadata-filtering.html#s3-vectors-metadata-filtering-filterable """ eq = "$eq" ne = "$ne" gt = "$gt" gte = "$gte" lt = "$lt" lte = "$lte" in_ = "$in" nin = "$nin" exists = "$exists" and_ = "$and" or_ = "$or"
[docs] @dataclasses.dataclass class Expr: """ Represents a single query expression for metadata filtering. An expression consists of a field name, an operator, and a value. For example: field="document_id", operator="$eq", value="doc-1" Attributes: field: The metadata field name to filter on operator: The filtering operator (e.g., "$eq", "$gt", "$in") value: The value to compare against Example: >>> expr = Expr(field="status", operator="$eq", value="active") >>> expr.to_doc() {"status": {"$eq": "active"}} """ field: str = dataclasses.field() operator: str = dataclasses.field() value: T.Any = dataclasses.field() def __and__(self, other: "Expr") -> "CompoundExpr": """ Combine this expression with another using AND logic. Args: other: Another Expr to combine with this one Returns: A CompoundExpr representing the AND operation """ return CompoundExpr(left=self, operator="$and", right=other) def __or__(self, other: "Expr") -> "CompoundExpr": """ Combine this expression with another using OR logic. Args: other: Another Expr to combine with this one Returns: A CompoundExpr representing the OR operation """ return CompoundExpr(left=self, operator="$or", right=other)
[docs] def to_doc(self) -> dict: """ Convert the expression to a dictionary format suitable for S3 filtering. Returns: A dictionary with the field as key and operator/value as nested dict """ return {self.field: {self.operator: self.value}}
[docs] @dataclasses.dataclass class CompoundExpr: """ Represents a compound query expression combining multiple expressions. A compound expression contains two sub-expressions (left and right) combined with either AND or OR logic. This allows building complex nested queries. Attributes: left: The left-side expression (can be Expr or CompoundExpr) operator: The logical operator ("$and" or "$or") right: The right-side expression (can be Expr or CompoundExpr) Example: >>> expr1 = Expr(field="status", operator="$eq", value="active") >>> expr2 = Expr(field="priority", operator="$gt", value=5) >>> compound = CompoundExpr(left=expr1, operator="$and", right=expr2) >>> compound.to_doc() {"$and": [{"status": {"$eq": "active"}}, {"priority": {"$gt": 5}}]} """ left: T.Union["Expr", "CompoundExpr"] = dataclasses.field() operator: str = dataclasses.field() # "$and" or "$or" right: T.Union["Expr", "CompoundExpr"] = dataclasses.field() def __and__(self, other: T.Union[Expr, "CompoundExpr"]) -> "CompoundExpr": """ Chain this compound expression with another using AND logic. Args: other: Another Expr or CompoundExpr to combine with this one Returns: A new CompoundExpr representing the AND operation """ return CompoundExpr(left=self, operator=OperatorEnum.and_.value, right=other) def __or__(self, other: T.Union[Expr, "CompoundExpr"]) -> "CompoundExpr": """ Chain this compound expression with another using OR logic. Args: other: Another Expr or CompoundExpr to combine with this one Returns: A new CompoundExpr representing the OR operation """ return CompoundExpr(left=self, operator=OperatorEnum.or_.value, right=other)
[docs] def to_doc(self) -> dict: """ Convert the compound expression to a dictionary format for S3 filtering. Returns: A dictionary with the operator as key and list of sub-expressions as value """ return {self.operator: [self.left.to_doc(), self.right.to_doc()]}
[docs] @dataclasses.dataclass class MetaKey: """ Represents a metadata field that can be used in query expressions. A MetaKey provides methods to create filtering expressions using various operators. Each method returns an Expr object that can be combined with other expressions to build complex queries. Attributes: name: The field name used in query expressions Example: >>> field = MetaKey(name="status") >>> expr = field.eq("active") >>> expr.to_doc() {"status": {"$eq": "active"}} """ name: str = dataclasses.field(default="") def _to_expr(self, op: OperatorEnum, other: T.Any) -> Expr: """ Create an Expr using this field with the given operator and value. Args: op: The operator to use other: The value to compare against Returns: An Expr representing the comparison """ return Expr( field=self.name, operator=op.value, value=other, )
[docs] def eq(self, other: T.Any) -> Expr: """Create an equality expression (field == value).""" return self._to_expr(op=OperatorEnum.eq, other=other)
[docs] def ne(self, other: T.Any) -> Expr: """Create a not-equal expression (field != value).""" return self._to_expr(op=OperatorEnum.ne, other=other)
[docs] def gt(self, other: T.Any) -> Expr: """Create a greater-than expression (field > value).""" return self._to_expr(op=OperatorEnum.gt, other=other)
[docs] def gte(self, other: T.Any) -> Expr: """Create a greater-than-or-equal expression (field >= value).""" return self._to_expr(op=OperatorEnum.gte, other=other)
[docs] def lt(self, other: T.Any) -> Expr: """Create a less-than expression (field < value).""" return self._to_expr(op=OperatorEnum.lt, other=other)
[docs] def lte(self, other: T.Any) -> Expr: """Create a less-than-or-equal expression (field <= value).""" return self._to_expr(op=OperatorEnum.lte, other=other)
[docs] def in_(self, other: T.Any) -> Expr: """Create an 'in' expression (field in [values]).""" return self._to_expr(op=OperatorEnum.in_, other=other)
[docs] def nin(self, other: T.Any) -> Expr: """Create a 'not in' expression (field not in [values]).""" return self._to_expr(op=OperatorEnum.nin, other=other)
[docs] def exists(self, other: bool) -> Expr: """Create an existence check expression (field exists/doesn't exist).""" return self._to_expr(op=OperatorEnum.exists, other=other)
[docs] class MetaClass(type): """ Metaclass that scans class definitions for MetaKey fields and registers them. This metaclass automatically processes class definitions to: 1. Collect MetaKey fields from base classes (supporting inheritance) 2. Scan for annotated and non-annotated MetaKey fields in the current class 3. Ensure all MetaKey instances have proper field names 4. Store field information on the class for runtime access The metaclass enables the declarative syntax where you can define metadata fields as class attributes and they become queryable at runtime. """ def __new__(mcs, name, bases, namespace, **kwargs): """ Create a new metadata class with registered MetaKey fields. Args: mcs: The metaclass name: The name of the class being created bases: Base classes namespace: Class namespace containing attributes and methods **kwargs: Additional keyword arguments Returns: The newly created class with _model_fields attribute """ # Collect all field definitions fields = {} # Collect fields from all base classes (supporting inheritance) for base in reversed(bases): if hasattr(base, "_model_fields"): fields.update(base._model_fields) # Note: Annotations support removed as current usage pattern doesn't use type annotations # Scan class attributes for MetaKey instances (supports non-annotated definitions) for field_name, field_value in namespace.items(): if isinstance(field_value, MetaKey) and field_name not in fields: # Ensure MetaKey has the correct name if not field_value.name: field_value.name = field_name fields[field_name] = field_value # Create the class cls = super().__new__(mcs, name, bases, namespace, **kwargs) # Store field information on the class cls._model_fields = fields return cls
[docs] class BaseMetadata(metaclass=MetaClass): """ Base class for metadata models providing field access and query functionality. This class serves as the foundation for creating metadata models with queryable fields. It automatically manages MetaKey instances through the MetaClass metaclass. Features: - Automatic field registration through the MetaClass metaclass - Class-level field access for building queries - Support for inheritance of fields from parent classes Example: >>> class DocumentMeta(BaseMetadata): ... document_id = MetaKey() ... status = MetaKey() ... >>> query = DocumentMeta.document_id.eq("doc-1") & DocumentMeta.status.eq("active") """ pass