diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ddc5b2f..a7961683 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,8 +2,15 @@ ## Unreleased Changes None +## [2.1.0](https://github.com/pinecone-io/pinecone-python-client/compare/v2.0.13...v2.1.0) +- Fix "Connection Reset by peer" error after long idle periods +- Add typing and explicit names for arguments in all client operations +- Add docstrings to all client operations +- Support batch upsert by passing `batch_size` to `upsert` method +- Improve gRPC query results parsing performance -## [2.0.13](https://github.com/pinecone-io/pinecone-python-client/compare/v2.0.13...v2.0.12) + +## [2.0.13](https://github.com/pinecone-io/pinecone-python-client/compare/v2.0.12...v2.0.13) - Added support for collections - Users can manage collections using ``create_collection`` , ``describe_collection`` and ``delete_collection`` calls. - Users can specify additional ``source_collection`` parameter during index creation to create index from a collection @@ -11,13 +18,13 @@ None - Added support for vertical scaling. This can be done by changing ```pod_type ``` via the ```configure_index``` call or during index creation. - Updated dependency requirements for grpc client. -## [2.0.12](https://github.com/pinecone-io/pinecone-python-client/compare/v2.0.12...v2.0.11) +## [2.0.12](https://github.com/pinecone-io/pinecone-python-client/compare/v2.0.11...v2.0.12) - Changed grpcio verison to be > 1.44.1 - Sanitized repo by removing leftover files from old versions. - Added more info to ```describe_index_stats``` call. The call now gives a namespace wise vector count breakdown. -## [2.0.11](https://github.com/pinecone-io/pinecone-python-client/compare/v2.0.11...v2.0.10) +## [2.0.11](https://github.com/pinecone-io/pinecone-python-client/compare/v2.0.10...v2.0.11) ### Changed - Added support of querying by a single vector. - This is a step in deprecating batch queries. diff --git a/pinecone/__version__ b/pinecone/__version__ index 82bd22f9..7ec1d6db 100644 --- a/pinecone/__version__ +++ b/pinecone/__version__ @@ -1 +1 @@ -2.0.13 +2.1.0 diff --git a/pinecone/config.py b/pinecone/config.py index 876b34d1..84b5793a 100644 --- a/pinecone/config.py +++ b/pinecone/config.py @@ -2,17 +2,22 @@ # Copyright (c) 2020-2021 Pinecone Systems Inc. All right reserved. # import logging -from typing import NamedTuple +import sys +from typing import NamedTuple, List import os import certifi import requests import configparser +import socket + +from urllib3.connection import HTTPConnection from pinecone.core.client.exceptions import ApiKeyError from pinecone.core.api_action import ActionAPI, WhoAmIResponse from pinecone.core.utils import warn_deprecated -from pinecone.core.utils.constants import CLIENT_VERSION, PARENT_LOGGER_NAME, DEFAULT_PARENT_LOGGER_LEVEL +from pinecone.core.utils.constants import CLIENT_VERSION, PARENT_LOGGER_NAME, DEFAULT_PARENT_LOGGER_LEVEL, \ + TCP_KEEPIDLE, TCP_KEEPINTVL, TCP_KEEPCNT from pinecone.core.client.configuration import Configuration as OpenApiConfiguration __all__ = [ @@ -37,7 +42,7 @@ class _CONFIG: Order of configs to load: - - configs specified explictly in reset + - configs specified explicitly in reset - environment variables - configs specified in the INI file - default configs @@ -109,6 +114,8 @@ def reset(self, config_file=None, **kwargs): or default_openapi_config ) + openapi_config.socket_options = self._get_socket_options() + config = config._replace(openapi_config=openapi_config) self._config = config @@ -144,6 +151,54 @@ def _load_config_file(self, config_file: str) -> dict: config_obj = {**parser["default"]} return config_obj + @staticmethod + def _get_socket_options(do_keep_alive: bool = True, + keep_alive_idle_sec: int = TCP_KEEPIDLE, + keep_alive_interval_sec: int = TCP_KEEPINTVL, + keep_alive_tries: int = TCP_KEEPCNT + ) -> List[tuple]: + """ + Returns the socket options to pass to OpenAPI's Rest client + Args: + do_keep_alive: Whether to enable TCP keep alive mechanism + keep_alive_idle_sec: Time in seconds of connection idleness before starting to send keep alive probes + keep_alive_interval_sec: Interval time in seconds between keep alive probe messages + keep_alive_tries: Number of failed keep alive tries (unanswered KA messages) before terminating the connection + + Returns: + A list of socket options for the Rest client's connection pool + """ + # Source: https://www.finbourne.com/blog/the-mysterious-hanging-client-tcp-keep-alives + + socket_params = HTTPConnection.default_socket_options + if not do_keep_alive: + return socket_params + + socket_params += [(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)] + + # TCP Keep Alive Probes for different platforms + platform = sys.platform + # TCP Keep Alive Probes for Linux + if platform == 'linux' and hasattr(socket, "TCP_KEEPIDLE") and hasattr(socket, "TCP_KEEPINTVL") \ + and hasattr(socket, "TCP_KEEPCNT"): + socket_params += [(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, keep_alive_idle_sec)] + socket_params += [(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, keep_alive_interval_sec)] + socket_params += [(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, keep_alive_tries)] + + # TCP Keep Alive Probes for Windows OS + # NOTE: Changing TCP KA params on windows is done via a different mechanism which OpenAPI's Rest client doesn't expose. + # Since the default values work well, it seems setting `(socket.SO_KEEPALIVE, 1)` is sufficient. + # Leaving this code here for future reference. + # elif platform == 'win32' and hasattr(socket, "SIO_KEEPALIVE_VALS"): + # socket.ioctl((socket.SIO_KEEPALIVE_VALS, (1, keep_alive_idle_sec * 1000, keep_alive_interval_sec * 1000))) + + # TCP Keep Alive Probes for Mac OS + elif platform == 'darwin': + TCP_KEEPALIVE = 0x10 + socket_params += [(socket.IPPROTO_TCP, TCP_KEEPALIVE, keep_alive_interval_sec)] + + return socket_params + @property def ENVIRONMENT(self): return self._config.environment diff --git a/pinecone/core/client/model/query_request.py b/pinecone/core/client/model/query_request.py index b90f3395..dee6245f 100644 --- a/pinecone/core/client/model/query_request.py +++ b/pinecone/core/client/model/query_request.py @@ -308,4 +308,4 @@ def __init__(self, top_k, *args, **kwargs): # noqa: E501 setattr(self, var_name, var_value) if var_name in self.read_only_vars: raise ApiAttributeError(f"`{var_name}` is a read-only attribute. Use `from_openapi_data` to instantiate " - f"class with read only attributes.") + f"class with read only attributes.") diff --git a/pinecone/core/grpc/index_grpc.py b/pinecone/core/grpc/index_grpc.py index b7704f52..7136d131 100644 --- a/pinecone/core/grpc/index_grpc.py +++ b/pinecone/core/grpc/index_grpc.py @@ -4,20 +4,21 @@ import logging from abc import ABC, abstractmethod from functools import wraps -from typing import NamedTuple, Optional, Dict, Iterable +from typing import NamedTuple, Optional, Dict, Iterable, Union, List, Tuple, Any import certifi import grpc from google.protobuf import json_format from grpc._channel import _InactiveRpcError, _MultiThreadedRendezvous -from pinecone import FetchResponse, QueryResponse, ScoredVector, SingleQueryResults, \ - UpsertResponse, DescribeIndexStatsResponse +from tqdm import tqdm + +from pinecone import FetchResponse, QueryResponse, ScoredVector, SingleQueryResults, DescribeIndexStatsResponse from pinecone.config import Config from pinecone.core.client.model.namespace_summary import NamespaceSummary from pinecone.core.client.model.vector import Vector as _Vector from pinecone.core.grpc.protos.vector_service_pb2 import Vector as GRPCVector, \ - QueryVector as GRPCQueryVector, UpsertRequest, DeleteRequest, QueryRequest, \ - FetchRequest, UpdateRequest, DescribeIndexStatsRequest + QueryVector as GRPCQueryVector, UpsertRequest, UpsertResponse, DeleteRequest, QueryRequest, \ + FetchRequest, UpdateRequest, DescribeIndexStatsRequest, DeleteResponse, UpdateResponse from pinecone.core.grpc.protos.vector_service_pb2_grpc import VectorServiceStub from pinecone.core.grpc.retry import RetryOnRpcErrorClientInterceptor, RetryConfig from pinecone.core.utils import _generate_request_id, dict_to_proto_struct, fix_tuple_length @@ -171,9 +172,10 @@ def parse_fetch_response(response: dict): return FetchResponse(vectors=vd, namespace=namespace, _check_type=False) -def parse_query_response(response: dict, unary_query: bool): +def parse_query_response(response: dict, unary_query: bool, _check_type: bool = False): res = [] + # TODO: consider deleting this deprecated case for match in response.get('results', []): namespace = match.get('namespace', '') m = [] @@ -187,10 +189,10 @@ def parse_query_response(response: dict, unary_query: bool): m = [] for item in response.get('matches', []): sc = ScoredVector(id=item['id'], score=item.get('score', 0.0), values=item.get('values', []), - metadata=item.get('metadata', {})) + metadata=item.get('metadata', {}), _check_type=_check_type) m.append(sc) - kwargs = {'check_type': False} + kwargs = {'_check_type': _check_type} if unary_query: kwargs['namespace'] = response.get('namespace', '') kwargs['matches'] = m @@ -199,11 +201,6 @@ def parse_query_response(response: dict, unary_query: bool): return QueryResponse(**kwargs) -def parse_upsert_response(response): - response = json_format.MessageToDict(response) - return UpsertResponse(upserted_count=response['upsertedCount'], _check_type=False) - - def parse_stats_response(response: dict): fullness = response.get('indexFullness', 0.0) total_vector_count = response.get('totalVectorCount', 0) @@ -251,11 +248,60 @@ def traceback(self, timeout=None): class GRPCIndex(GRPCIndexBase): + """A client for interacting with a Pinecone index via GRPC API.""" + @property def stub_class(self): return VectorServiceStub - def upsert(self, vectors, async_req=False, **kwargs): + def upsert(self, + vectors: Union[List[GRPCVector], List[Tuple]], + async_req: bool = False, + namespace: Optional[str] = None, + batch_size: Optional[int] = None, + show_progress: bool = True, + **kwargs) -> Union[UpsertResponse, PineconeGrpcFuture]: + """ + The upsert operation writes vectors into a namespace. + If a new value is upserted for an existing vector id, it will overwrite the previous value. + + Examples: + >>> index.upsert([('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0])], namespace='ns1') + >>> index.upsert([GRPCVector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}), + >>> GRPCVector(id='id2', values=[1.0, 2.0, 3.0])], async_req=True) + + Args: + vectors (Union[List[Vector], List[Tuple]]): A list of vectors to upsert. + + A vector can be represented by a 1) GRPCVector object or a 2) tuple. + 1) if a tuple is used, it must be of the form (id, values, metadata) or (id, values). + where id is a string, vector is a list of floats, and metadata is a dict. + Examples: ('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0]) + + 2) if a GRPCVector object is used, a GRPCVector object must be of the form + GRPCVector(id, values, metadata), where metadata is an optional argument of type + Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]] + Examples: GRPCVector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}), + GRPCVector(id='id2', values=[1.0, 2.0, 3.0]) + + Note: the dimension of each vector must match the dimension of the index. + async_req (bool): If True, the upsert operation will be performed asynchronously. + Cannot be used with batch_size. + Defaults to False. See: https://docs.pinecone.io/docs/performance-tuning [optional] + namespace (str): The namespace to write to. If not specified, the default namespace is used. [optional] + batch_size (int): The number of vectors to upsert in each batch. + Cannot be used with async_req=Ture. + If not specified, all vectors will be upserted in a single batch. [optional] + show_progress (bool): Whether to show a progress bar using tqdm. + Applied only if batch_size is provided. Default is True. + + Returns: UpsertResponse, contains the number of vectors upserted + """ + if async_req and batch_size is not None: + raise ValueError('async_req is not supported when batch_size is provided.' + 'To upsert in parallel, please follow: ' + 'https://docs.pinecone.io/docs/performance-tuning') + def _vector_transform(item): if isinstance(item, GRPCVector): return item @@ -264,37 +310,167 @@ def _vector_transform(item): return GRPCVector(id=id, values=values, metadata=dict_to_proto_struct(metadata) or {}) raise ValueError(f"Invalid vector value passed: cannot interpret type {type(item)}") - request = UpsertRequest(vectors=list(map(_vector_transform, vectors)), **kwargs) timeout = kwargs.pop('timeout', None) + + vectors = list(map(_vector_transform, vectors)) if async_req: + args_dict = self._parse_non_empty_args([('namespace', namespace)]) + request = UpsertRequest(vectors=vectors, **args_dict, **kwargs) future = self._wrap_grpc_call(self.stub.Upsert.future, request, timeout=timeout) return PineconeGrpcFuture(future) - else: - return self._wrap_grpc_call(self.stub.Upsert, request, timeout=timeout) - - def delete(self, *args, async_req=False, **kwargs): - _filter = dict_to_proto_struct(kwargs.pop('filter', None)) - filter_param = {} - if _filter: - filter_param['filter'] = _filter - request = DeleteRequest(*args, **kwargs, **filter_param) + + if batch_size is None: + return self._upsert_batch(vectors, namespace, timeout=timeout, **kwargs) + + if not isinstance(batch_size, int) or batch_size <= 0: + raise ValueError('batch_size must be a positive integer') + + pbar = tqdm(total=len(vectors), disable=not show_progress, desc='Upserted vectors') + total_upserted = 0 + for i in range(0, len(vectors), batch_size): + batch_result = self._upsert_batch(vectors[i:i + batch_size], namespace, timeout=timeout, **kwargs) + pbar.update(batch_result.upserted_count) + # we can't use here pbar.n for the case show_progress=False + total_upserted += batch_result.upserted_count + + return UpsertResponse(upserted_count=total_upserted) + + def _upsert_batch(self, + vectors: List[GRPCVector], + namespace: Optional[str], + timeout: Optional[float], + **kwargs) -> UpsertResponse: + args_dict = self._parse_non_empty_args([('namespace', namespace)]) + request = UpsertRequest(vectors=vectors, **args_dict) + return self._wrap_grpc_call(self.stub.Upsert, request, timeout=timeout, **kwargs) + + def delete(self, + ids: Optional[List[str]] = None, + delete_all: Optional[bool] = None, + namespace: Optional[str] = None, + filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, + async_req: bool = False, + **kwargs) -> Union[DeleteResponse, PineconeGrpcFuture]: + """ + The Delete operation deletes vectors from the index, from a single namespace. + No error raised if the vector id does not exist. + Note: for any delete call, if namespace is not specified, the default namespace is used. + + Delete can occur in the following mutual exclusive ways: + 1. Delete by ids from a single namespace + 2. Delete all vectors from a single namespace by setting delete_all to True + 3. Delete all vectors from a single namespace by specifying a metadata filter + (note that for this option delete all must be set to False) + + Examples: + >>> index.delete(ids=['id1', 'id2'], namespace='my_namespace') + >>> index.delete(delete_all=True, namespace='my_namespace') + >>> index.delete(filter={'key': 'value'}, namespace='my_namespace', async_req=True) + + Args: + ids (List[str]): Vector ids to delete [optional] + delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional] + Default is False. + namespace (str): The namespace to delete vectors from [optional] + If not specified, the default namespace is used. + filter (Dict[str, Union[str, float, int, bool, List, dict]]): + If specified, the metadata filter here will be used to select the vectors to delete. + This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True. + See https://www.pinecone.io/docs/metadata-filtering/.. [optional] + async_req (bool): If True, the delete operation will be performed asynchronously. + Defaults to False. [optional] + + Returns: DeleteResponse (contains no data) or a PineconeGrpcFuture object if async_req is True. + """ + + if filter is not None: + filter = dict_to_proto_struct(filter) + + args_dict = self._parse_non_empty_args([('ids', ids), + ('delete_all', delete_all), + ('namespace', namespace), + ('filter', filter)]) timeout = kwargs.pop('timeout', None) + + request = DeleteRequest(**args_dict, **kwargs) if async_req: future = self._wrap_grpc_call(self.stub.Delete.future, request, timeout=timeout) return PineconeGrpcFuture(future) else: return self._wrap_grpc_call(self.stub.Delete, request, timeout=timeout) - def fetch(self, *args, **kwargs): + def fetch(self, + ids: Optional[List[str]], + namespace: Optional[str] = None, + **kwargs) -> FetchResponse: + """ + The fetch operation looks up and returns vectors, by ID, from a single namespace. + The returned vectors include the vector data and/or metadata. + + Examples: + >>> index.fetch(ids=['id1', 'id2'], namespace='my_namespace') + >>> index.fetch(ids=['id1', 'id2']) + + Args: + ids (List[str]): The vector IDs to fetch. + namespace (str): The namespace to fetch vectors from. + If not specified, the default namespace is used. [optional] + + Returns: FetchResponse object which contains the list of Vector objects, and namespace name. + """ timeout = kwargs.pop('timeout', None) - request = FetchRequest(*args, **kwargs) + + args_dict = self._parse_non_empty_args([('namespace', namespace)]) + + request = FetchRequest(ids=ids, **args_dict, **kwargs) response = self._wrap_grpc_call(self.stub.Fetch, request, timeout=timeout) json_response = json_format.MessageToDict(response) return parse_fetch_response(json_response) - def query(self, vector=[], id='', queries=[], **kwargs): - timeout = kwargs.pop('timeout', None) - + def query(self, + vector: Optional[List[float]] = None, + id: Optional[str] = None, + queries: Optional[Union[List[GRPCQueryVector], List[Tuple]]] = None, + namespace: Optional[str] = None, + top_k: Optional[int] = None, + filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, + include_values: Optional[bool] = None, + include_metadata: Optional[bool] = None, + **kwargs) -> QueryResponse: + """ + The Query operation searches a namespace, using a query vector. + It retrieves the ids of the most similar items in a namespace, along with their similarity scores. + + Examples: + >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace') + >>> index.query(id='id1', top_k=10, namespace='my_namespace') + >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'}) + >>> index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True) + + Args: + vector (List[float]): The query vector. This should be the same length as the dimension of the index + being queried. Each `query()` request can contain only one of the parameters + `queries`, `id` or `vector`.. [optional] + id (str): The unique ID of the vector to be used as a query vector. + Each `query()` request can contain only one of the parameters + `queries`, `vector`, or `id`.. [optional] + queries ([GRPCQueryVector]): DEPRECATED. The query vectors. + Each `query()` request can contain only one of the parameters + `queries`, `vector`, or `id`.. [optional] + top_k (int): The number of results to return for each query. Must be an integer greater than 1. + namespace (str): The namespace to fetch vectors from. + If not specified, the default namespace is used. [optional] + filter (Dict[str, Union[str, float, int, bool, List, dict]]): + The filter to apply. You can use vector metadata to limit your search. + See https://www.pinecone.io/docs/metadata-filtering/.. [optional] + include_values (bool): Indicates whether vector values are included in the response. + If omitted the server will use the default value of False [optional] + include_metadata (bool): Indicates whether metadata is included in the response as well as the ids. + If omitted the server will use the default value of False [optional] + + Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects, + and namespace name. + """ def _query_transform(item): if isinstance(item, GRPCQueryVector): return item @@ -306,36 +482,99 @@ def _query_transform(item): return GRPCQueryVector(values=item) raise ValueError(f"Invalid query vector value passed: cannot interpret type {type(item)}") - _QUERY_ARGS = ['namespace', 'top_k', 'filter', 'include_values', 'include_metadata'] - if 'filter' in kwargs: - kwargs['filter'] = dict_to_proto_struct(kwargs['filter']) - request = QueryRequest(queries=list(map(_query_transform, queries)), - vector=vector, - id=id, - **{k: v for k, v in kwargs.items() if k in _QUERY_ARGS}) + queries = list(map(_query_transform, queries)) if queries is not None else None + + if filter is not None: + filter = dict_to_proto_struct(filter) + + args_dict = self._parse_non_empty_args([('vector', vector), + ('id', id), + ('queries', queries), + ('namespace', namespace), + ('top_k', top_k), + ('filter', filter), + ('include_values', include_values), + ('include_metadata', include_metadata)]) + + request = QueryRequest(**args_dict) + + timeout = kwargs.pop('timeout', None) response = self._wrap_grpc_call(self.stub.Query, request, timeout=timeout) json_response = json_format.MessageToDict(response) - return parse_query_response(json_response, vector or id) - - def update(self, id, async_req=False, **kwargs): - _UPDATE_ARGS = ['values', 'set_metadata', 'namespace'] - if 'set_metadata' in kwargs: - kwargs['set_metadata'] = dict_to_proto_struct(kwargs['set_metadata']) - request = UpdateRequest(id=id, **{k: v for k, v in kwargs.items() if k in _UPDATE_ARGS}) + return parse_query_response(json_response, vector is not None or id, _check_type=False) + + def update(self, + id: str, + async_req: bool = False, + values: Optional[List[float]] = None, + set_metadata: Optional[Dict[str, + Union[str, float, int, bool, List[int], List[float], List[str]]]] = None, + namespace: Optional[str] = None, + **kwargs) -> Union[UpdateResponse, PineconeGrpcFuture]: + """ + The Update operation updates vector in a namespace. + If a value is included, it will overwrite the previous value. + If a set_metadata is included, + the values of the fields specified in it will be added or overwrite the previous value. + + Examples: + >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace') + >>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace', async_req=True) + + Args: + id (str): Vector's unique id. + async_req (bool): If True, the update operation will be performed asynchronously. + Defaults to False. [optional] + values (List[float]): vector values to set. [optional] + set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): + metadata to set for vector. [optional] + namespace (str): Namespace name where to update the vector.. [optional] + + Returns: UpdateResponse (contains no data) or a PineconeGrpcFuture object if async_req is True. + """ + if set_metadata is not None: + set_metadata = dict_to_proto_struct(set_metadata) timeout = kwargs.pop('timeout', None) + + args_dict = self._parse_non_empty_args([('values', values), + ('set_metadata', set_metadata), + ('namespace', namespace)]) + + request = UpdateRequest(id=id, **args_dict) if async_req: future = self._wrap_grpc_call(self.stub.Update.future, request, timeout=timeout) return PineconeGrpcFuture(future) else: return self._wrap_grpc_call(self.stub.Update, request, timeout=timeout) - def describe_index_stats(self, **kwargs): - _filter = dict_to_proto_struct(kwargs.pop('filter', None)) - filter_param = {} - if _filter: - filter_param['filter'] = _filter + def describe_index_stats(self, + filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, + **kwargs) -> DescribeIndexStatsResponse: + """ + The DescribeIndexStats operation returns statistics about the index's contents. + For example: The vector count per namespace and the number of dimensions. + + Examples: + >>> index.describe_index_stats() + >>> index.describe_index_stats(filter={'key': 'value'}) + + Args: + filter (Dict[str, Union[str, float, int, bool, List, dict]]): + If this parameter is present, the operation only returns statistics for vectors that satisfy the filter. + See https://www.pinecone.io/docs/metadata-filtering/.. [optional] + + Returns: DescribeIndexStatsResponse object which contains stats about the index. + """ + if filter is not None: + filter = dict_to_proto_struct(filter) + args_dict = self._parse_non_empty_args([('filter', filter)]) timeout = kwargs.pop('timeout', None) - request = DescribeIndexStatsRequest(**filter_param) + + request = DescribeIndexStatsRequest(**args_dict) response = self._wrap_grpc_call(self.stub.DescribeIndexStats, request, timeout=timeout) json_response = json_format.MessageToDict(response) return parse_stats_response(json_response) + + @staticmethod + def _parse_non_empty_args(args: List[Tuple[str, Any]]) -> Dict[str, Any]: + return {arg_name: val for arg_name, val in args if val is not None} diff --git a/pinecone/core/utils/constants.py b/pinecone/core/utils/constants.py index e7ed81b5..d03f1a71 100644 --- a/pinecone/core/utils/constants.py +++ b/pinecone/core/utils/constants.py @@ -33,3 +33,7 @@ class NodeType(str, enum.Enum): PACKAGE_ENVIRONMENT = get_environment() or "development" CLIENT_VERSION = get_version() CLIENT_ID = f'python-client-{CLIENT_VERSION}' + +TCP_KEEPINTVL = 60 # Sec +TCP_KEEPIDLE = 300 # Sec +TCP_KEEPCNT = 4 diff --git a/pinecone/index.py b/pinecone/index.py index 945130e2..b14b39e7 100644 --- a/pinecone/index.py +++ b/pinecone/index.py @@ -1,11 +1,12 @@ # # Copyright (c) 2020-2021 Pinecone Systems Inc. All right reserved. # - +from tqdm import tqdm from collections.abc import Iterable +from typing import Union, List, Tuple, Optional, Dict, Any from pinecone import Config -from pinecone.core.client import ApiClient, Configuration +from pinecone.core.client import ApiClient from .core.client.models import FetchResponse, ProtobufAny, QueryRequest, QueryResponse, QueryVector, RpcStatus, \ ScoredVector, SingleQueryResults, DescribeIndexStatsResponse, UpsertRequest, UpsertResponse, UpdateRequest, \ Vector, DeleteRequest, UpdateRequest, DescribeIndexStatsRequest @@ -38,6 +39,10 @@ def parse_query_response(response: QueryResponse, unary_query: bool): class Index(ApiClient): + """ + A client for interacting with a Pinecone index via REST API. + For improved performance, use the Pinecone GRPC index client. + """ def __init__(self, index_name: str, pool_threads=1): openapi_client_config = copy.deepcopy(Config.OPENAPI_CONFIG) openapi_client_config.api_key = openapi_client_config.api_key or {} @@ -56,10 +61,83 @@ def __init__(self, index_name: str, pool_threads=1): self._vector_api = VectorOperationsApi(self) @validate_and_convert_errors - def upsert(self, vectors, **kwargs): + def upsert(self, + vectors: Union[List[Vector], List[Tuple]], + namespace: Optional[str] = None, + batch_size: Optional[int] = None, + show_progress: bool = True, + **kwargs) -> UpsertResponse: + """ + The upsert operation writes vectors into a namespace. + If a new value is upserted for an existing vector id, it will overwrite the previous value. + + API reference: https://docs.pinecone.io/reference/upsert + + To upsert in parallel follow: https://docs.pinecone.io/docs/insert-data#sending-upserts-in-parallel + + Examples: + >>> index.upsert([('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0])]) + >>> index.upsert([Vector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}), + >>> Vector(id='id2', values=[1.0, 2.0, 3.0])]) + + Args: + vectors (Union[List[Vector], List[Tuple]]): A list of vectors to upsert. + + A vector can be represented by a 1) Vector object or a 2) tuple. + 1) if a tuple is used, it must be of the form (id, values, metadata) or (id, values). + where id is a string, vector is a list of floats, and metadata is a dict. + Examples: ('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0]) + + 2) if a Vector object is used, a Vector object must be of the form Vector(id, values, metadata), + where metadata is an optional argument of the type + Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]. + Examples: Vector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}), + Vector(id='id2', values=[1.0, 2.0, 3.0]) + + Note: the dimension of each vector must match the dimension of the index. + + namespace (str): The namespace to write to. If not specified, the default namespace is used. [optional] + batch_size (int): The number of vectors to upsert in each batch. + If not specified, all vectors will be upserted in a single batch. [optional] + show_progress (bool): Whether to show a progress bar using tqdm. + Applied only if batch_size is provided. Default is True. + Keyword Args: + Supports OpenAPI client keyword arguments. See pinecone.core.client.models.UpsertRequest for more details. + + Returns: UpsertResponse, includes the number of vectors upserted. + """ _check_type = kwargs.pop('_check_type', False) - def _vector_transform(item): + if kwargs.get('async_req', False) and batch_size is not None: + raise ValueError('async_req is not supported when batch_size is provided.' + 'To upsert in parallel, please follow: ' + 'https://docs.pinecone.io/docs/insert-data#sending-upserts-in-parallel') + + if batch_size is None: + return self._upsert_batch(vectors, namespace, _check_type, **kwargs) + + if not isinstance(batch_size, int) or batch_size <= 0: + raise ValueError('batch_size must be a positive integer') + + pbar = tqdm(total=len(vectors), disable=not show_progress, desc='Upserted vectors') + total_upserted = 0 + for i in range(0, len(vectors), batch_size): + batch_result = self._upsert_batch(vectors[i:i + batch_size], namespace, _check_type, **kwargs) + pbar.update(batch_result.upserted_count) + # we can't use here pbar.n for the case show_progress=False + total_upserted += batch_result.upserted_count + + return UpsertResponse(upserted_count=total_upserted) + + def _upsert_batch(self, + vectors: List[Vector], + namespace: Optional[str], + _check_type: bool, + **kwargs) -> UpsertResponse: + + args_dict = self._parse_non_empty_args([('namespace', namespace)]) + + def _vector_transform(item: Union[Vector, Tuple]): if isinstance(item, Vector): return item if isinstance(item, tuple): @@ -70,6 +148,7 @@ def _vector_transform(item): return self._vector_api.upsert( UpsertRequest( vectors=list(map(_vector_transform, vectors)), + **args_dict, _check_type=_check_type, **{k: v for k, v in kwargs.items() if k not in _OPENAPI_ENDPOINT_PARAMS} ), @@ -77,24 +156,163 @@ def _vector_transform(item): ) @validate_and_convert_errors - def delete(self, *args, **kwargs): + def delete(self, + ids: Optional[List[str]] = None, + delete_all: Optional[bool] = None, + namespace: Optional[str] = None, + filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, + **kwargs) -> Dict[str, Any]: + """ + The Delete operation deletes vectors from the index, from a single namespace. + No error raised if the vector id does not exist. + Note: for any delete call, if namespace is not specified, the default namespace is used. + + Delete can occur in the following mutual exclusive ways: + 1. Delete by ids from a single namespace + 2. Delete all vectors from a single namespace by setting delete_all to True + 3. Delete all vectors from a single namespace by specifying a metadata filter + (note that for this option delete all must be set to False) + + API reference: https://docs.pinecone.io/reference/delete_post + + Examples: + >>> index.delete(ids=['id1', 'id2'], namespace='my_namespace') + >>> index.delete(delete_all=True, namespace='my_namespace') + >>> index.delete(filter={'key': 'value'}, namespace='my_namespace') + + Args: + ids (List[str]): Vector ids to delete [optional] + delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional] + Default is False. + namespace (str): The namespace to delete vectors from [optional] + If not specified, the default namespace is used. + filter (Dict[str, Union[str, float, int, bool, List, dict]]): + If specified, the metadata filter here will be used to select the vectors to delete. + This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True. + See https://www.pinecone.io/docs/metadata-filtering/.. [optional] + + Keyword Args: + Supports OpenAPI client keyword arguments. See pinecone.core.client.models.DeleteRequest for more details. + + + Returns: An empty dictionary if the delete operation was successful. + """ _check_type = kwargs.pop('_check_type', False) + args_dict = self._parse_non_empty_args([('ids', ids), + ('delete_all', delete_all), + ('namespace', namespace), + ('filter', filter)]) + return self._vector_api.delete( DeleteRequest( - *args, - **{k: v for k, v in kwargs.items() if k not in _OPENAPI_ENDPOINT_PARAMS}, + **args_dict, + **{k: v for k, v in kwargs.items() if k not in _OPENAPI_ENDPOINT_PARAMS and v is not None}, _check_type=_check_type ), **{k: v for k, v in kwargs.items() if k in _OPENAPI_ENDPOINT_PARAMS} ) @validate_and_convert_errors - def fetch(self, *args, **kwargs): - return self._vector_api.fetch(*args, **kwargs) + def fetch(self, + ids: List[str], + namespace: Optional[str] = None, + **kwargs) -> FetchResponse: + """ + The fetch operation looks up and returns vectors, by ID, from a single namespace. + The returned vectors include the vector data and/or metadata. + + API reference: https://docs.pinecone.io/reference/fetch + + Examples: + >>> index.fetch(ids=['id1', 'id2'], namespace='my_namespace') + >>> index.fetch(ids=['id1', 'id2']) + + Args: + ids (List[str]): The vector IDs to fetch. + namespace (str): The namespace to fetch vectors from. + If not specified, the default namespace is used. [optional] + Keyword Args: + Supports OpenAPI client keyword arguments. See pinecone.core.client.models.FetchResponse for more details. + + + Returns: FetchResponse object which contains the list of Vector objects, and namespace name. + """ + args_dict = self._parse_non_empty_args([('namespace', namespace)]) + return self._vector_api.fetch(ids=ids, **args_dict, **kwargs) @validate_and_convert_errors - def query(self, vector=[], id='', queries=[], **kwargs): + def query(self, + vector: Optional[List[float]] = None, + id: Optional[str] = None, + queries: Optional[Union[List[QueryVector], List[Tuple]]] = None, + top_k: Optional[int] = None, + namespace: Optional[str] = None, + filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, + include_values: Optional[bool] = None, + include_metadata: Optional[bool] = None, + **kwargs) -> QueryResponse: + """ + The Query operation searches a namespace, using a query vector. + It retrieves the ids of the most similar items in a namespace, along with their similarity scores. + + API reference: https://docs.pinecone.io/reference/query + + Examples: + >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace') + >>> index.query(id='id1', top_k=10, namespace='my_namespace') + >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'}) + >>> index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True) + + Args: + vector (List[float]): The query vector. This should be the same length as the dimension of the index + being queried. Each `query()` request can contain only one of the parameters + `queries`, `id` or `vector`.. [optional] + id (str): The unique ID of the vector to be used as a query vector. + Each `query()` request can contain only one of the parameters + `queries`, `vector`, or `id`.. [optional] + queries ([QueryVector]): DEPRECATED. The query vectors. + Each `query()` request can contain only one of the parameters + `queries`, `vector`, or `id`.. [optional] + top_k (int): The number of results to return for each query. Must be an integer greater than 1. + namespace (str): The namespace to fetch vectors from. + If not specified, the default namespace is used. [optional] + filter (Dict[str, Union[str, float, int, bool, List, dict]): + The filter to apply. You can use vector metadata to limit your search. + See https://www.pinecone.io/docs/metadata-filtering/.. [optional] + include_values (bool): Indicates whether vector values are included in the response. + If omitted the server will use the default value of False [optional] + include_metadata (bool): Indicates whether metadata is included in the response as well as the ids. + If omitted the server will use the default value of False [optional] + + Keyword Args: + Supports OpenAPI client keyword arguments. See pinecone.core.client.models.QueryRequest for more details. + + Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects, + and namespace name. + """ + def _query_transform(item): + if isinstance(item, QueryVector): + return item + if isinstance(item, tuple): + values, filter = fix_tuple_length(item, 2) + if filter is None: + return QueryVector(values=values, _check_type=_check_type) + else: + return QueryVector(values=values, filter=filter, _check_type=_check_type) + if isinstance(item, Iterable): + return QueryVector(values=item, _check_type=_check_type) + raise ValueError(f"Invalid query vector value passed: cannot interpret type {type(item)}") + _check_type = kwargs.pop('_check_type', False) + queries = list(map(_query_transform, queries)) if queries is not None else None + args_dict = self._parse_non_empty_args([('vector', vector), + ('id', id), + ('queries', queries), + ('top_k', top_k), + ('namespace', namespace), + ('filter', filter), + ('include_values', include_values), + ('include_metadata', include_metadata)]) def _query_transform(item): if isinstance(item, QueryVector): @@ -108,34 +326,91 @@ def _query_transform(item): response = self._vector_api.query( QueryRequest( - queries=list(map(_query_transform, queries)), - vector=vector, - id=id, + **args_dict, _check_type=_check_type, **{k: v for k, v in kwargs.items() if k not in _OPENAPI_ENDPOINT_PARAMS} ), **{k: v for k, v in kwargs.items() if k in _OPENAPI_ENDPOINT_PARAMS} ) - return parse_query_response(response, vector or id) + return parse_query_response(response, vector is not None or id) @validate_and_convert_errors - def update(self, id, **kwargs): + def update(self, + id: str, + values: Optional[List[float]] = None, + set_metadata: Optional[Dict[str, + Union[str, float, int, bool, List[int], List[float], List[str]]]] = None, + namespace: Optional[str] = None, + **kwargs) -> Dict[str, Any]: + """ + The Update operation updates vector in a namespace. + If a value is included, it will overwrite the previous value. + If a set_metadata is included, + the values of the fields specified in it will be added or overwrite the previous value. + + API reference: https://docs.pinecone.io/reference/update + + Examples: + >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace') + >>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace') + + Args: + id (str): Vector's unique id. + values (List[float]): vector values to set. [optional] + set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): + metadata to set for vector. [optional] + namespace (str): Namespace name where to update the vector.. [optional] + + Keyword Args: + Supports OpenAPI client keyword arguments. See pinecone.core.client.models.UpdateRequest for more details. + + Returns: An empty dictionary if the update was successful. + """ _check_type = kwargs.pop('_check_type', False) + args_dict = self._parse_non_empty_args([('values', values), + ('set_metadata', set_metadata), + ('namespace', namespace)]) return self._vector_api.update(UpdateRequest( id=id, + **args_dict, _check_type=_check_type, **{k: v for k, v in kwargs.items() if k not in _OPENAPI_ENDPOINT_PARAMS} ), **{k: v for k, v in kwargs.items() if k in _OPENAPI_ENDPOINT_PARAMS}) @validate_and_convert_errors - def describe_index_stats(self, *args, **kwargs): + def describe_index_stats(self, + filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, + **kwargs) -> DescribeIndexStatsResponse: + """ + The DescribeIndexStats operation returns statistics about the index's contents. + For example: The vector count per namespace and the number of dimensions. + + API reference: https://docs.pinecone.io/reference/describe_index_stats_post + + Examples: + >>> index.describe_index_stats() + >>> index.describe_index_stats(filter={'key': 'value'}) + + Args: + filter (Dict[str, Union[str, float, int, bool, List, dict]]): + If this parameter is present, the operation only returns statistics for vectors that satisfy the filter. + See https://www.pinecone.io/docs/metadata-filtering/.. [optional] + + Returns: DescribeIndexStatsResponse object which contains stats about the index. + """ _check_type = kwargs.pop('_check_type', False) + args_dict = self._parse_non_empty_args([('filter', filter)]) + return self._vector_api.describe_index_stats( DescribeIndexStatsRequest( - *args, + **args_dict, **{k: v for k, v in kwargs.items() if k not in _OPENAPI_ENDPOINT_PARAMS}, _check_type=_check_type ), **{k: v for k, v in kwargs.items() if k in _OPENAPI_ENDPOINT_PARAMS} ) + + @staticmethod + def _parse_non_empty_args(args: List[Tuple[str, Any]]) -> Dict[str, Any]: + return {arg_name: val for arg_name, val in args if val is not None} diff --git a/requirements.txt b/requirements.txt index 05b55a2a..82331727 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ typing-extensions>=3.7.4 dnspython>=2.0.0 # openapi generated client: python_dateutil >= 2.5.3 -urllib3 >= 1.21.1 \ No newline at end of file +urllib3 >= 1.21.1 +tqdm >= 4.64.1 \ No newline at end of file diff --git a/tests/unit/test_grpc_index.py b/tests/unit/test_grpc_index.py new file mode 100644 index 00000000..3d192167 --- /dev/null +++ b/tests/unit/test_grpc_index.py @@ -0,0 +1,378 @@ +import pytest + +import pinecone +from core.utils import dict_to_proto_struct +from pinecone import DescribeIndexStatsRequest +from pinecone.core.grpc.protos.vector_service_pb2 import Vector, DescribeIndexStatsRequest, UpdateRequest, \ + UpsertRequest, FetchRequest, QueryRequest, DeleteRequest, QueryVector, UpsertResponse + + +class TestGrpcIndex: + + def setup_method(self): + self.vector_dim = 8 + self.vals1 = [0.1] * self.vector_dim + self.vals2 = [0.2] * self.vector_dim + self.md1 = {'genre': 'action', 'year': 2021} + self.md2 = {'genre': 'documentary', 'year': 2020} + self.filter1 = {'genre': {'$in': ['action']}} + self.filter2 = {'year': {'$eq': 2020}} + + pinecone.init(api_key='example-key') + self.index = pinecone.GRPCIndex('example-name') + + # region: upsert tests + + def test_upsert_tuplesOfIdVec_UpserWithoutMD(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True) + self.index.upsert([('vec1', self.vals1), ('vec2', self.vals2)], namespace='ns') + self.index._wrap_grpc_call.assert_called_once_with( + self.index.stub.Upsert, + UpsertRequest( + vectors=[ + Vector(id='vec1', values=self.vals1, metadata={}), + Vector(id='vec2', values=self.vals2, metadata={})], + namespace='ns'), + timeout=None + ) + + def test_upsert_tuplesOfIdVecMD_UpsertVectorsWithMD(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True) + self.index.upsert([('vec1', self.vals1, self.md1), ('vec2', self.vals2, self.md2)], namespace='ns') + self.index._wrap_grpc_call.assert_called_once_with( + self.index.stub.Upsert, + UpsertRequest( + vectors=[ + Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)), + Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))], + namespace='ns'), + timeout=None) + + def test_upsert_vectors_upsertInputVectors(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True) + self.index.upsert([Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)), + Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))], + namespace='ns') + self.index._wrap_grpc_call.assert_called_once_with( + self.index.stub.Upsert, + UpsertRequest( + vectors=[ + Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)), + Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))], + namespace='ns'), + timeout=None) + + def test_upsert_async_upsertInputVectorsAsync(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True) + self.index.upsert([Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)), + Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))], + namespace='ns', + async_req=True) + self.index._wrap_grpc_call.assert_called_once_with( + self.index.stub.Upsert.future, + UpsertRequest( + vectors=[ + Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)), + Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))], + namespace='ns'), + timeout=None) + + def test_upsert_vectorListIsMultiplyOfBatchSize_vectorsUpsertedInBatches(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True, + side_effect=lambda stub, upsert_request, timeout: UpsertResponse( + upserted_count=len(upsert_request.vectors))) + + result = self.index.upsert([Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)), + Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))], + namespace='ns', + batch_size=1, + show_progress=False) + self.index._wrap_grpc_call.assert_any_call( + self.index.stub.Upsert, + UpsertRequest( + vectors=[ + Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1))], + namespace='ns'), + timeout=None) + + self.index._wrap_grpc_call.assert_any_call( + self.index.stub.Upsert, + UpsertRequest( + vectors=[Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))], + namespace='ns'), + timeout=None) + + assert result.upserted_count == 2 + + def test_upsert_vectorListNotMultiplyOfBatchSize_vectorsUpsertedInBatches(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True, + side_effect=lambda stub, upsert_request, timeout: UpsertResponse( + upserted_count=len(upsert_request.vectors))) + + result = self.index.upsert([Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)), + Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2)), + Vector(id='vec3', values=self.vals1, metadata=dict_to_proto_struct(self.md1))], + namespace='ns', + batch_size=2) + self.index._wrap_grpc_call.assert_any_call( + self.index.stub.Upsert, + UpsertRequest( + vectors=[ + Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)), + Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))], + namespace='ns'), + timeout=None) + + self.index._wrap_grpc_call.assert_any_call( + self.index.stub.Upsert, + UpsertRequest( + vectors=[Vector(id='vec3', values=self.vals1, metadata=dict_to_proto_struct(self.md1))], + namespace='ns'), + timeout=None) + + assert result.upserted_count == 3 + + def test_upsert_vectorListSmallerThanBatchSize_vectorsUpsertedInBatches(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True, + side_effect=lambda stub, upsert_request, timeout: UpsertResponse( + upserted_count=len(upsert_request.vectors))) + + result = self.index.upsert([Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)), + Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))], + namespace='ns', + batch_size=5) + + self.index._wrap_grpc_call.assert_called_once_with( + self.index.stub.Upsert, + UpsertRequest( + vectors=[ + Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)), + Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))], + namespace='ns'), + timeout=None) + + assert result.upserted_count == 2 + + def test_upsert_tuplesList_vectorsUpsertedInBatches(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True, + side_effect=lambda stub, upsert_request, timeout: UpsertResponse( + upserted_count=len(upsert_request.vectors))) + + result = self.index.upsert([('vec1', self.vals1, self.md1), + ('vec2', self.vals2, self.md2), + ('vec3', self.vals1, self.md1)], + namespace='ns', + batch_size=2) + self.index._wrap_grpc_call.assert_any_call( + self.index.stub.Upsert, + UpsertRequest( + vectors=[ + Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)), + Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))], + namespace='ns'), + timeout=None) + + self.index._wrap_grpc_call.assert_any_call( + self.index.stub.Upsert, + UpsertRequest( + vectors=[Vector(id='vec3', values=self.vals1, metadata=dict_to_proto_struct(self.md1))], + namespace='ns'), + timeout=None) + + assert result.upserted_count == 3 + + def test_upsert_batchSizeIsNotPositive_errorIsRaised(self): + with pytest.raises(ValueError, match='batch_size must be a positive integer'): + self.index.upsert([Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1))], + namespace='ns', + batch_size=0) + + with pytest.raises(ValueError, match='batch_size must be a positive integer'): + self.index.upsert([Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1))], + namespace='ns', + batch_size=-1) + + def test_upsert_useBatchSizeAndAsyncReq_valueErrorRaised(self): + with pytest.raises(ValueError, match='async_req is not supported when batch_size is provided.'): + self.index.upsert([Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1))], + namespace='ns', + batch_size=2, + async_req=True) + + # endregion + + # region: query tests + + def test_query_byVectorNoFilter_queryVectorNoFilter(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True) + self.index.query(top_k=10, vector=self.vals1) + self.index._wrap_grpc_call.assert_called_once_with( + self.index.stub.Query, + QueryRequest(top_k=10, vector=self.vals1), + timeout=None, + ) + + def test_query_byVectorWithFilter_queryVectorWithFilter(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True) + self.index.query(top_k=10, vector=self.vals1, filter=self.filter1, namespace='ns', timeout=10) + self.index._wrap_grpc_call.assert_called_once_with( + self.index.stub.Query, + QueryRequest(top_k=10, vector=self.vals1, filter=dict_to_proto_struct(self.filter1), namespace='ns'), + timeout=10, + ) + + def test_query_byTuplesNoFilter_queryVectorsNoFilter(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True) + self.index.query(top_k=10, queries=[ + (self.vals1,), + (self.vals2,) + ]) + self.index._wrap_grpc_call.assert_called_once_with( + self.index.stub.Query, + QueryRequest(queries=[ + QueryVector(values=self.vals1, filter={}), + QueryVector(values=self.vals2, filter={}) + ], top_k=10), + timeout=None, + ) + + def test_query_byTuplesWithFilter_queryVectorsWithFilter(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True) + self.index.query(top_k=10, queries=[ + (self.vals1, self.filter1), + (self.vals2, self.filter2) + ]) + self.index._wrap_grpc_call.assert_called_once_with( + self.index.stub.Query, + QueryRequest(queries=[ + QueryVector(values=self.vals1, filter=dict_to_proto_struct(self.filter1)), + QueryVector(values=self.vals2, filter=dict_to_proto_struct(self.filter2)) + ], top_k=10), + timeout=None, + ) + + def test_query_byVecId_queryByVecId(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True) + self.index.query(top_k=10, id='vec1', include_metadata=True, include_values=False) + self.index._wrap_grpc_call.assert_called_once_with( + self.index.stub.Query, + QueryRequest(top_k=10, id='vec1', include_metadata=True, include_values=False), + timeout=None, + ) + + # endregion + + # region: delete tests + + def test_delete_byIds_deleteByIds(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True) + self.index.delete(ids=['vec1', 'vec2']) + self.index._wrap_grpc_call.assert_called_once_with( + self.index.stub.Delete, + DeleteRequest(ids=['vec1', 'vec2']), + timeout=None, + ) + + def test_delete_byIdsAsync_deleteByIdsAsync(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True) + self.index.delete(ids=['vec1', 'vec2'], async_req=True) + self.index._wrap_grpc_call.assert_called_once_with( + self.index.stub.Delete.future, + DeleteRequest(ids=['vec1', 'vec2']), + timeout=None, + ) + + def test_delete_deleteAllByFilter_deleteAllByFilter(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True) + self.index.delete(delete_all=True, filter=self.filter1, namespace='ns', timeout=30) + self.index._wrap_grpc_call.assert_called_once_with( + self.index.stub.Delete, + DeleteRequest(delete_all=True, filter=dict_to_proto_struct(self.filter1), namespace='ns'), + timeout=30, + ) + + def test_delete_deleteAllNoFilter_deleteNoFilter(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True) + self.index.delete(delete_all=True) + self.index._wrap_grpc_call.assert_called_once_with( + self.index.stub.Delete, + DeleteRequest(delete_all=True), + timeout=None, + ) + + # endregion + + # region: fetch tests + + def test_fetch_byIds_fetchByIds(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True) + self.index.fetch(['vec1', 'vec2']) + self.index._wrap_grpc_call.assert_called_once_with( + self.index.stub.Fetch, + FetchRequest(ids=['vec1', 'vec2']), + timeout=None, + ) + + def test_fetch_byIdsAndNS_fetchByIdsAndNS(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True) + self.index.fetch(['vec1', 'vec2'], namespace='ns', timeout=30) + self.index._wrap_grpc_call.assert_called_once_with( + self.index.stub.Fetch, + FetchRequest(ids=['vec1', 'vec2'], namespace='ns'), + timeout=30, + ) + + # endregion + + # region: update tests + + def test_update_byIdAnValues_updateByIdAndValues(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True) + self.index.update(id='vec1', values=self.vals1, namespace='ns', timeout=30) + self.index._wrap_grpc_call.assert_called_once_with( + self.index.stub.Update, + UpdateRequest(id='vec1', values=self.vals1, namespace='ns'), + timeout=30, + ) + + def test_update_byIdAnValuesAsync_updateByIdAndValuesAsync(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True) + self.index.update(id='vec1', values=self.vals1, namespace='ns', timeout=30, async_req=True) + self.index._wrap_grpc_call.assert_called_once_with( + self.index.stub.Update.future, + UpdateRequest(id='vec1', values=self.vals1, namespace='ns'), + timeout=30, + ) + + def test_update_byIdAnValuesAndMetadata_updateByIdAndValuesAndMetadata(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True) + self.index.update('vec1', values=self.vals1, set_metadata=self.md1) + self.index._wrap_grpc_call.assert_called_once_with( + self.index.stub.Update, + UpdateRequest(id='vec1', values=self.vals1, set_metadata=dict_to_proto_struct(self.md1)), + timeout=None, + ) + + # endregion + + # region: describe index tests + + def test_describeIndexStats_callWithoutFilter_CalledWithoutFilter(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True) + self.index.describe_index_stats() + self.index._wrap_grpc_call.assert_called_once_with( + self.index.stub.DescribeIndexStats, + DescribeIndexStatsRequest(), + timeout=None, + ) + + def test_describeIndexStats_callWithFilter_CalledWithFilter(self, mocker): + mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True) + self.index.describe_index_stats(filter=self.filter1) + self.index._wrap_grpc_call.assert_called_once_with( + self.index.stub.DescribeIndexStats, + DescribeIndexStatsRequest(filter=dict_to_proto_struct(self.filter1)), + timeout=None, + ) + + # endregion diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 142f1ed7..6c89467a 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -1,64 +1,365 @@ -vector_dim = 8 -vals1 = [0.1] * vector_dim -vals2 = [0.2] * vector_dim -md1 = {'genre': 'action', 'year': 2021} -md2 = {'genre': 'documentary', 'year': 2020} -filter1 = {'genre': {'$in': ['action']}} -filter2 = {'year': {'$eq': 2020}} - - -def test_upsert_request_tuples_id_data(mocker): - import pinecone - pinecone.init(api_key='example-key') - index = pinecone.Index('example-name') - mocker.patch.object(index._vector_api, 'upsert', autospec=True) - index.upsert([('vec1', vals1), ('vec2', vals2)]) - index._vector_api.upsert.assert_called_once_with( - pinecone.UpsertRequest(vectors=[ - pinecone.Vector(id='vec1', values=vals1, metadata={}), - pinecone.Vector(id='vec2', values=vals2, metadata={}) - ]) - ) - - -def test_upsert_request_tuples_id_data_metadata(mocker): - import pinecone - pinecone.init(api_key='example-key') - index = pinecone.Index('example-name') - mocker.patch.object(index._vector_api, 'upsert', autospec=True) - index.upsert([('vec1', vals1, md1), - ('vec2', vals2, md2)]) - index._vector_api.upsert.assert_called_once_with( - pinecone.UpsertRequest(vectors=[ - pinecone.Vector(id='vec1', values=vals1, metadata=md1), - pinecone.Vector(id='vec2', values=vals2, metadata=md2) +import pytest + +from pinecone.core.client.api_client import Endpoint + +import pinecone +from pinecone import DescribeIndexStatsRequest, ScoredVector, QueryResponse, UpsertResponse + + +class TestRestIndex: + + def setup_method(self): + self.vector_dim = 8 + self.vals1 = [0.1] * self.vector_dim + self.vals2 = [0.2] * self.vector_dim + self.md1 = {'genre': 'action', 'year': 2021} + self.md2 = {'genre': 'documentary', 'year': 2020} + self.filter1 = {'genre': {'$in': ['action']}} + self.filter2 = {'year': {'$eq': 2020}} + + pinecone.init(api_key='example-key') + self.index = pinecone.Index('example-name') + + # region: upsert tests + + def test_upsert_tuplesOfIdVec_UpserWithoutMD(self, mocker): + mocker.patch.object(self.index._vector_api, 'upsert', autospec=True) + self.index.upsert([('vec1', self.vals1), ('vec2', self.vals2)], namespace='ns') + self.index._vector_api.upsert.assert_called_once_with( + pinecone.UpsertRequest(vectors=[ + pinecone.Vector(id='vec1', values=self.vals1, metadata={}), + pinecone.Vector(id='vec2', values=self.vals2, metadata={}) + ], namespace='ns') + ) + + def test_upsert_tuplesOfIdVecMD_UpsertVectorsWithMD(self, mocker): + mocker.patch.object(self.index._vector_api, 'upsert', autospec=True) + self.index.upsert([('vec1', self.vals1, self.md1), + ('vec2', self.vals2, self.md2)]) + self.index._vector_api.upsert.assert_called_once_with( + pinecone.UpsertRequest(vectors=[ + pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1), + pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2) + ]) + ) + + def test_upsert_vectors_upsertInputVectors(self, mocker): + mocker.patch.object(self.index._vector_api, 'upsert', autospec=True) + self.index.upsert(vectors=[ + pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1), + pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2)], + namespace='ns') + self.index._vector_api.upsert.assert_called_once_with( + pinecone.UpsertRequest(vectors=[ + pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1), + pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2) + ], namespace='ns') + ) + + def test_upsert_parallelUpsert_callUpsertParallel(self, mocker): + mocker.patch.object(Endpoint, '__call__', autospec=True) + chunks = [[pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1)], + [pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2)]] + with pinecone.Index('example-index', pool_threads=30) as index: + # Send requests in parallel + async_results = [ + index.upsert(vectors=ids_vectors_chunk, namespace="ns", async_req=True) + for ids_vectors_chunk in chunks + ] + # Wait for and retrieve responses (this raises in case of error) + [async_result.get() for async_result in async_results] + + Endpoint.__call__.assert_any_call( + index._vector_api.upsert, + pinecone.UpsertRequest(vectors=[ + pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1), + ], + namespace='ns'), + async_req=True + ) + + Endpoint.__call__.assert_any_call( + index._vector_api.upsert, + pinecone.UpsertRequest(vectors=[ + pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2), + ], + namespace='ns'), + async_req=True + ) + + def test_upsert_vectorListIsMultiplyOfBatchSize_vectorsUpsertedInBatches(self, mocker): + mocker.patch.object(self.index._vector_api, + 'upsert', + autospec=True, + side_effect=lambda upsert_request: UpsertResponse( + upserted_count=len(upsert_request.vectors))) + + result = self.index.upsert(vectors=[ + pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1), + pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2)], + namespace='ns', + batch_size=1, + show_progress=False) + + self.index._vector_api.upsert.assert_any_call( + pinecone.UpsertRequest(vectors=[ + pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1), + ], namespace='ns') + ) + + self.index._vector_api.upsert.assert_any_call( + pinecone.UpsertRequest(vectors=[ + pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2), + ], namespace='ns') + ) + + assert result.upserted_count == 2 + + def test_upsert_vectorListNotMultiplyOfBatchSize_vectorsUpsertedInBatches(self, mocker): + mocker.patch.object(self.index._vector_api, + 'upsert', + autospec=True, + side_effect=lambda upsert_request: UpsertResponse( + upserted_count=len(upsert_request.vectors))) + + result = self.index.upsert(vectors=[ + pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1), + pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2), + pinecone.Vector(id='vec3', values=self.vals1, metadata=self.md1)], + namespace='ns', + batch_size=2) + + self.index._vector_api.upsert.assert_any_call( + pinecone.UpsertRequest(vectors=[ + pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1), + pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2), + ], namespace='ns') + ) + + self.index._vector_api.upsert.assert_any_call( + pinecone.UpsertRequest(vectors=[ + pinecone.Vector(id='vec3', values=self.vals1, metadata=self.md1), + ], namespace='ns') + ) + + assert result.upserted_count == 3 + + def test_upsert_vectorListSmallerThanBatchSize_vectorsUpsertedInBatches(self, mocker): + mocker.patch.object(self.index._vector_api, + 'upsert', + autospec=True, + side_effect=lambda upsert_request: UpsertResponse( + upserted_count=len(upsert_request.vectors))) + + result = self.index.upsert(vectors=[ + pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1), + pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2), + pinecone.Vector(id='vec3', values=self.vals1, metadata=self.md1)], + namespace='ns', + batch_size=5) + + self.index._vector_api.upsert.assert_called_once_with( + pinecone.UpsertRequest(vectors=[ + pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1), + pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2), + pinecone.Vector(id='vec3', values=self.vals1, metadata=self.md1), + ], namespace='ns') + ) + + assert result.upserted_count == 3 + + def test_upsert_tuplesList_vectorsUpsertedInBatches(self, mocker): + mocker.patch.object(self.index._vector_api, + 'upsert', + autospec=True, + side_effect=lambda upsert_request: UpsertResponse( + upserted_count=len(upsert_request.vectors))) + + result = self.index.upsert(vectors= + [('vec1', self.vals1, self.md1), + ('vec2', self.vals2, self.md2), + ('vec3', self.vals1, self.md1)], + namespace='ns', + batch_size=2) + + self.index._vector_api.upsert.assert_any_call( + pinecone.UpsertRequest(vectors=[ + pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1), + pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2), + ], namespace='ns') + ) + + self.index._vector_api.upsert.assert_any_call( + pinecone.UpsertRequest(vectors=[ + pinecone.Vector(id='vec3', values=self.vals1, metadata=self.md1), + ], namespace='ns') + ) + + assert result.upserted_count == 3 + + def test_upsert_batchSizeIsNotPositive_errorIsRaised(self): + with pytest.raises(ValueError, match='batch_size must be a positive integer'): + self.index.upsert(vectors=[ + pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1)], + namespace='ns', + batch_size=0) + + with pytest.raises(ValueError, match='batch_size must be a positive integer'): + self.index.upsert(vectors=[ + pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1)], + namespace='ns', + batch_size=-1) + + def test_upsert_useBatchSizeAndAsyncReq_valueErrorRaised(self): + with pytest.raises(ValueError, match='async_req is not supported when batch_size is provided.'): + self.index.upsert(vectors=[ + pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1)], + namespace='ns', + batch_size=1, + async_req=True) + + # endregion + + # region: query tests + + def test_query_byVectorNoFilter_queryVectorNoFilter(self, mocker): + response = QueryResponse(results=[], + matches=[ScoredVector(id="1", + score=0.9, + values=[0.0], + metadata={"a": 2})], + namespace="test") + + mocker.patch.object(self.index._vector_api, 'query', autospec=True, return_value=response) + + actual = self.index.query(top_k=10, vector=self.vals1) + + self.index._vector_api.query.assert_called_once_with( + pinecone.QueryRequest(top_k=10, vector=self.vals1) + ) + expected = QueryResponse(matches=[ScoredVector(id="1", + score=0.9, + values=[0.0], + metadata={"a": 2})], + namespace="test") + expected._data_store.pop('results', None) + assert actual == expected + + def test_query_byVectorWithFilter_queryVectorWithFilter(self, mocker): + mocker.patch.object(self.index._vector_api, 'query', autospec=True) + self.index.query(top_k=10, vector=self.vals1, filter=self.filter1, namespace='ns') + self.index._vector_api.query.assert_called_once_with( + pinecone.QueryRequest(top_k=10, vector=self.vals1, filter=self.filter1, namespace='ns') + ) + + def test_query_byTuplesNoFilter_queryVectorsNoFilter(self, mocker): + mocker.patch.object(self.index._vector_api, 'query', autospec=True) + self.index.query(top_k=10, queries=[ + (self.vals1,), + (self.vals2,) ]) - ) - - -def test_query_request_tuples_query_only(mocker): - import pinecone - pinecone.init(api_key='example-key') - index = pinecone.Index('example-name') - mocker.patch.object(index._vector_api, 'query', autospec=True) - index.query(top_k=10, vector=vals1) - index._vector_api.query.assert_called_once_with( - pinecone.QueryRequest(top_k=10, vector=vals1, id='', queries=[]) - ) - - -def test_query_request_tuples_query_filter(mocker): - import pinecone - pinecone.init(api_key='example-key') - index = pinecone.Index('example-name') - mocker.patch.object(index._vector_api, 'query', autospec=True) - index.query(top_k=10, queries=[ - (vals1, filter1), - (vals2, filter2) - ]) - index._vector_api.query.assert_called_once_with( - pinecone.QueryRequest(top_k=10, vector=[], id='', queries=[ - pinecone.QueryVector(values=vals1, filter=filter1), - pinecone.QueryVector(values=vals2, filter=filter2) + self.index._vector_api.query.assert_called_once_with( + pinecone.QueryRequest(top_k=10, queries=[ + pinecone.QueryVector(values=self.vals1), + pinecone.QueryVector(values=self.vals2) + ]) + ) + + def test_query_byTuplesWithFilter_queryVectorsWithFilter(self, mocker): + mocker.patch.object(self.index._vector_api, 'query', autospec=True) + self.index.query(top_k=10, queries=[ + (self.vals1, self.filter1), + (self.vals2, self.filter2) ]) - ) + self.index._vector_api.query.assert_called_once_with( + pinecone.QueryRequest(top_k=10, queries=[ + pinecone.QueryVector(values=self.vals1, filter=self.filter1), + pinecone.QueryVector(values=self.vals2, filter=self.filter2) + ]) + ) + + def test_query_byVecId_queryByVecId(self, mocker): + mocker.patch.object(self.index._vector_api, 'query', autospec=True) + self.index.query(top_k=10, id='vec1', include_metadata=True, include_values=False) + self.index._vector_api.query.assert_called_once_with( + pinecone.QueryRequest(top_k=10, id='vec1', include_metadata=True, include_values=False) + ) + + # endregion + + # region: delete tests + + def test_delete_byIds_deleteByIds(self, mocker): + mocker.patch.object(self.index._vector_api, 'delete', autospec=True) + self.index.delete(ids=['vec1', 'vec2']) + self.index._vector_api.delete.assert_called_once_with( + pinecone.DeleteRequest(ids=['vec1', 'vec2']) + ) + + def test_delete_deleteAllByFilter_deleteAllByFilter(self, mocker): + mocker.patch.object(self.index._vector_api, 'delete', autospec=True) + self.index.delete(delete_all=True, filter=self.filter1, namespace='ns') + self.index._vector_api.delete.assert_called_once_with( + pinecone.DeleteRequest(delete_all=True, filter=self.filter1, namespace='ns') + ) + + def test_delete_deleteAllNoFilter_deleteNoFilter(self, mocker): + mocker.patch.object(self.index._vector_api, 'delete', autospec=True) + self.index.delete(delete_all=True) + self.index._vector_api.delete.assert_called_once_with( + pinecone.DeleteRequest(delete_all=True) + ) + + # endregion + + # region: fetch tests + + def test_fetch_byIds_fetchByIds(self, mocker): + mocker.patch.object(self.index._vector_api, 'fetch', autospec=True) + self.index.fetch(ids=['vec1', 'vec2']) + self.index._vector_api.fetch.assert_called_once_with( + ids=['vec1', 'vec2'] + ) + + def test_fetch_byIdsAndNS_fetchByIdsAndNS(self, mocker): + mocker.patch.object(self.index._vector_api, 'fetch', autospec=True) + self.index.fetch(ids=['vec1', 'vec2'], namespace='ns') + self.index._vector_api.fetch.assert_called_once_with( + ids=['vec1', 'vec2'], namespace='ns' + ) + + # endregion + + # region: update tests + + def test_update_byIdAnValues_updateByIdAndValues(self, mocker): + mocker.patch.object(self.index._vector_api, 'update', autospec=True) + self.index.update(id='vec1', values=self.vals1, namespace='ns') + self.index._vector_api.update.assert_called_once_with( + pinecone.UpdateRequest(id='vec1', values=self.vals1, namespace='ns') + ) + + def test_update_byIdAnValuesAndMetadata_updateByIdAndValuesAndMetadata(self, mocker): + mocker.patch.object(self.index._vector_api, 'update', autospec=True) + self.index.update('vec1', values=self.vals1, metadata=self.md1) + self.index._vector_api.update.assert_called_once_with( + pinecone.UpdateRequest(id='vec1', values=self.vals1, metadata=self.md1) + ) + + # endregion + + # region: describe index tests + + def test_describeIndexStats_callWithoutFilter_CalledWithoutFilter(self, mocker): + mocker.patch.object(self.index._vector_api, 'describe_index_stats', autospec=True) + self.index.describe_index_stats() + self.index._vector_api.describe_index_stats.assert_called_once_with( + DescribeIndexStatsRequest()) + + def test_describeIndexStats_callWithFilter_CalledWithFilter(self, mocker): + mocker.patch.object(self.index._vector_api, 'describe_index_stats', autospec=True) + self.index.describe_index_stats(filter=self.filter1) + self.index._vector_api.describe_index_stats.assert_called_once_with( + DescribeIndexStatsRequest(filter=self.filter1)) + + # endregion diff --git a/tox.ini b/tox.ini index 8e3d6c25..c38a0d1d 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py{36,37,38,39}-pcgrpc_{yes,no}-old_deps_{yes,no},flake8,docs +envlist = flake8,docs skip_missing_interpreters = true [testenv] @@ -15,7 +15,7 @@ deps= old_deps_yes: urllib3==1.21.1 -r {toxinidir}/requirements.txt -r {toxinidir}/test-requirements.txt - pcgrpc_yes: -r {toxinidir}/requirements-grpc.txt + -r {toxinidir}/requirements-grpc.txt commands = pytest --cov=pinecone --timeout=120 tests/unit {posargs}