From 1c49b2998f19c1ecd978a1bb3b7e5dc63fc6b890 Mon Sep 17 00:00:00 2001 From: Austin DeNoble Date: Mon, 16 Oct 2023 19:34:55 -0400 Subject: [PATCH 1/3] overhaul docstrings for config, and manage modules, remove core and info from generated reference, fix config issues with ini file --- .github/actions/build-docs/action.yml | 2 +- .gitignore | 4 + .pinecone.example | 4 + README.md | 10 +- pinecone/config.py | 74 ++++++- pinecone/index.py | 57 ++++-- pinecone/manage.py | 266 +++++++++++++++++++------- 7 files changed, 317 insertions(+), 100 deletions(-) create mode 100644 .pinecone.example diff --git a/.github/actions/build-docs/action.yml b/.github/actions/build-docs/action.yml index 63181c44..79689834 100644 --- a/.github/actions/build-docs/action.yml +++ b/.github/actions/build-docs/action.yml @@ -14,4 +14,4 @@ runs: - name: Build html documentation shell: bash run: | - poetry run pdoc pinecone/ --favicon ./favicon-32x32.png --docformat google -o ./docs + poetry run pdoc pinecone/ '!pinecone.core' '!pinecone.info' --favicon ./favicon-32x32.png --docformat google -o ./docs diff --git a/.gitignore b/.gitignore index 3f605053..2b70ec06 100644 --- a/.gitignore +++ b/.gitignore @@ -154,3 +154,7 @@ dmypy.json # Datasets *.hdf5 *~ + +# INI files +.pinecone +*.ini diff --git a/.pinecone.example b/.pinecone.example new file mode 100644 index 00000000..00fb4078 --- /dev/null +++ b/.pinecone.example @@ -0,0 +1,4 @@ +# For testing purposes only +[default] +api_key= +environment= \ No newline at end of file diff --git a/README.md b/README.md index bc9d9071..9a66d248 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ -# pinecone-client +# pinecone-python-client + The Pinecone python client For more information, see the docs at https://www.pinecone.io/docs/ @@ -6,6 +7,7 @@ For more information, see the docs at https://www.pinecone.io/docs/ ## Installation Install a released version from pip: + ```shell pip3 install pinecone-client ``` @@ -17,11 +19,13 @@ pip3 install "pinecone-client[grpc]" ``` Or the latest development version: + ```shell pip3 install git+https://git@github.com/pinecone-io/pinecone-python-client.git ``` Or a specific development version: + ```shell pip3 install git+https://git@github.com/pinecone-io/pinecone-python-client.git pip3 install git+https://git@github.com/pinecone-io/pinecone-python-client.git@example-branch-name @@ -120,7 +124,6 @@ index = pinecone.Index("example-index") index_stats_response = index.describe_index_stats() ``` - ## Upsert vectors The following example upserts vectors to `example-index`. @@ -189,7 +192,6 @@ index = pinecone.Index("example-index") fetch_response = index.fetch(ids=["vec1", "vec2"], namespace="example-namespace") ``` - ## Update vectors The following example updates vectors by ID. @@ -259,6 +261,6 @@ pinecone.init(api_key="YOUR_API_KEY", environment="us-west1-gcp") pinecone.delete_collection("example-collection") ``` -# Contributing +# Contributing If you'd like to make a contribution, or get setup locally to develop the Pinecone python client, please see our [contributing guide](./CONTRIBUTING.md) diff --git a/pinecone/config.py b/pinecone/config.py index dfde4b97..c0278208 100644 --- a/pinecone/config.py +++ b/pinecone/config.py @@ -79,6 +79,7 @@ def reset(self, config_file=None, **kwargs): # Set INI file config config = config._replace(**self._preprocess_and_validate_config(file_config)) + print("post init config: ", config) # Set environment config env_config = ConfigBase( @@ -242,18 +243,71 @@ def init( project_name: str = None, log_level: str = None, openapi_config: OpenApiConfiguration = None, - config: str = "~/.pinecone", + config: str = "./.pinecone", **kwargs ): - """Initializes the Pinecone client. - - :param api_key: Required if not set in config file or by environment variable ``PINECONE_API_KEY``. - :param host: Optional. Controller host. - :param environment: Optional. Deployment environment. - :param project_name: Optional. Pinecone project name. Overrides the value that is otherwise looked up and used from the Pinecone backend. - :param openapi_config: Optional. Set OpenAPI client configuration. - :param config: Optional. An INI configuration file. - :param log_level: Deprecated since v2.0.2 [Will be removed in v3.0.0]; use the standard logging module to manage logger "pinecone" instead. + """Initializes configuration for the Pinecone client. + + The `pinecone` module is the main entrypoint to this sdk. You will use instances of it to create and manage indexes as well as + perform data operations on those indexes after they are created. + + **Initializing the client** + + There are two pieces of configuration required to use the Pinecone client: an API key and environment value. These values can + be passed using environment variables, an INI configuration file, or explicitly as arguments to the ``init`` function. Find + your configuration values in the console dashboard at [https://app.pinecone.io](https://app.pinecone.io). + + **Using environment variables** + + The environment variables used to configure the client are the following: + + ```python + export PINECONE_API_KEY="your_api_key" + export PINECONE_ENVIRONMENT="your_environment" + export PINECONE_PROJECT_NAME="your_project_name" + export PINECONE_CONTROLLER_HOST="your_controller_host" + ``` + + **Using an INI configuration file** + + You can use an INI configuration file to configure the client. The default location for this file is `./.pinecone`. + You must place configuration values in the `default` group, and the keys must have the following format: + + ```python + [default] + api_key=your_api_key + environment=your_environment + project_name=your_project_name + controller_host=your_controller_host + ``` + + When environment variables or a config file are provided, you do not need to initialize the client explicitly: + + ```python + import pinecone + pinecone.list_indexes() + ``` + + *Passing configuration values* + + If you prefer to pass configuration in code, the constructor accepts the following arguments. This could be useful if + your application needs to interact with multiple projects, each with a different configuration. Explicitly passed values + will override any existing environment or configuration file values. + + ```python + pinecone.init(api_key="my-api-key", environment="my-environment") + ``` + + Args: + api_key (str, optional): The API key for your Pinecone project. Required if not set in environment variables or the config file. + You can find this in the [Pinecone console](https://app.pinecone.io). + host (str, optional): Custom controller host which will be used for API calls involving index operations. + environment (str, optional): The environment for your Pinecone project. Required if not set in environment variables or the config file. + You can find this in the [Pinecone console](https://app.pinecone.io). + project_name (str, optional): The Pinecone project name. Overrides the value that is otherwise looked up and used from the Pinecone backend. + openapi_config (`pinecone.core.client.configuration.Configuration`, optional): Sets a custom OpenAPI client configuration. + config (str, optional): The path to an INI configuration file. Defaults to `./.pinecone`. + log_level (str, optional): Deprecated since v2.0.2 [Will be removed in v3.0.0]; use the standard logging module to manage logger "pinecone" instead. """ check_kwargs(init, kwargs) Config.reset( diff --git a/pinecone/index.py b/pinecone/index.py index 64d66d67..3ed9de16 100644 --- a/pinecone/index.py +++ b/pinecone/index.py @@ -82,10 +82,20 @@ def upsert_numpy_deprecation_notice(context): class Index(ApiClient): - - """ - A client for interacting with a Pinecone index via REST API. - For improved performance, use the Pinecone GRPC index client. + """ A client for interacting with a Pinecone index via REST API. + + The ``Index`` class is used to perform data operations (upsert, query, etc) against Pinecone indexes. Usually it will + be instantiated using the `pinecone` module after the required configuration values have been initialized. + + ```python + import pinecone + pinecone.init(api_key="my-api-key", environment="my-environment") + index = pinecone.Index("my-index") + ``` + For improved performance, use the Pinecone GRPCIndex client. For more details, see (Performance tuning)[https://docs.pinecone.io/docs/performance-tuning]. + + Args: + index_name (str): The name of the index to interact with. """ def __init__(self, index_name: str, pool_threads=1): @@ -98,7 +108,9 @@ def __init__(self, index_name: str, pool_threads=1): **openapi_client_config.server_variables, } super().__init__(configuration=openapi_client_config, pool_threads=pool_threads) + self.user_agent = get_user_agent() + """@private""" self._vector_api = VectorOperationsApi(self) @validate_and_convert_errors @@ -116,25 +128,29 @@ def upsert( To upsert in parallel follow: https://docs.pinecone.io/docs/insert-data#sending-upserts-in-parallel - A vector can be represented by a 1) Vector object, a 2) tuple or 3) a dictionary + A vector can be represented by a 1) ``Vector`` object, a 2) tuple or 3) a dictionary If a tuple is used, it must be of the form `(id, values, metadata)` or `(id, values)`. where id is a string, vector is a list of floats, metadata is a dict, and sparse_values is a dict of the form `{'indices': List[int], 'values': List[float]}`. Examples: - >>> ('id1', [1.0, 2.0, 3.0], {'key': 'value'}, {'indices': [1, 2], 'values': [0.2, 0.4]}) - >>> ('id1', [1.0, 2.0, 3.0], None, {'indices': [1, 2], 'values': [0.2, 0.4]}) - >>> ('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0]) + ```python + ('id1', [1.0, 2.0, 3.0], {'key': 'value'}, {'indices': [1, 2], 'values': [0.2, 0.4]}) + ('id1', [1.0, 2.0, 3.0], None, {'indices': [1, 2], 'values': [0.2, 0.4]}) + ('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0]) + ``` If a Vector object is used, a Vector object must be of the form `Vector(id, values, metadata, sparse_values)`, where metadata and sparse_values are optional arguments. Examples: - >>> Vector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}) - >>> Vector(id='id2', values=[1.0, 2.0, 3.0]) - >>> Vector(id='id3', values=[1.0, 2.0, 3.0], sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4])) + ```python + Vector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}) + Vector(id='id2', values=[1.0, 2.0, 3.0]) + Vector(id='id3', values=[1.0, 2.0, 3.0], sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4])) + ``` **Note:** the dimension of each vector must match the dimension of the index. @@ -145,6 +161,7 @@ def upsert( >>> >>> index.upsert([{'id': 'id1', 'values': [1.0, 2.0, 3.0], 'metadata': {'key': 'value'}}, >>> {'id': 'id2', 'values': [1.0, 2.0, 3.0], 'sparse_values': {'indices': [1, 8], 'values': [0.2, 0.4]}]) + >>> >>> index.upsert([Vector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}), >>> Vector(id='id2', values=[1.0, 2.0, 3.0], sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4]))]) @@ -158,7 +175,7 @@ def upsert( show_progress (bool): Whether to show a progress bar using tqdm. Applied only if batch_size is provided. Default is True. Keyword Args: - Supports OpenAPI client keyword arguments. See pinecone.core.client.models.UpsertRequest for more details. + Supports OpenAPI client keyword arguments. See `UpsertRequest` for more details. Returns: UpsertResponse, includes the number of vectors upserted. """ @@ -450,9 +467,9 @@ def query( Expected to be either a SparseValues object or a dict of the form: {'indices': List[int], 'values': List[float]}, where the lists each have the same length. Keyword Args: - Supports OpenAPI client keyword arguments. See pinecone.core.client.models.QueryRequest for more details. + Supports OpenAPI client keyword arguments. See `QueryRequest` for more details. - Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects, + Returns: `QueryResponse` object which contains the list of the closest vectors as ScoredVector objects, and namespace name. """ @@ -515,12 +532,12 @@ def update( API reference: https://docs.pinecone.io/reference/update Examples: - >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace') - >>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace') - >>> index.update(id='id1', values=[1, 2, 3], sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]}, - >>> namespace='my_namespace') - >>> index.update(id='id1', values=[1, 2, 3], sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4]), - >>> namespace='my_namespace') + >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace') + >>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace') + >>> index.update(id='id1', values=[1, 2, 3], sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]}, + >>> namespace='my_namespace') + >>> index.update(id='id1', values=[1, 2, 3], sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4]), + >>> namespace='my_namespace') Args: id (str): Vector's unique id. diff --git a/pinecone/manage.py b/pinecone/manage.py index cc470148..15e78c4a 100644 --- a/pinecone/manage.py +++ b/pinecone/manage.py @@ -1,5 +1,5 @@ import time -from typing import NamedTuple, Optional +from typing import List, NamedTuple, Optional import pinecone from pinecone.config import Config @@ -27,6 +27,9 @@ class IndexDescription(NamedTuple): + """ + Represents the description of an index. + """ name: str metric: str replicas: int @@ -40,6 +43,9 @@ class IndexDescription(NamedTuple): class CollectionDescription(object): + """ + Represents the description of a collection. + """ def __init__(self, keys, values): for k, v in zip(keys, values): self.__dict__[k] = v @@ -79,40 +85,72 @@ def create_index( metadata_config: dict = None, source_collection: str = "", ): - """Creates a Pinecone index. - - :param name: the name of the index. - :type name: str - :param dimension: the dimension of vectors that would be inserted in the index - :param index_type: type of index, one of `{"approximated", "exact"}`, defaults to "approximated". - The "approximated" index uses fast approximate search algorithms developed by Pinecone. - The "exact" index uses accurate exact search algorithms. - It performs exhaustive searches and thus it is usually slower than the "approximated" index. - :type index_type: str, optional - :param metric: type of metric used in the vector index, one of `{"cosine", "dotproduct", "euclidean"}`, defaults to "cosine". - Use "cosine" for cosine similarity, - "dotproduct" for dot-product, - and "euclidean" for euclidean distance. - :type metric: str, optional - :param replicas: the number of replicas, defaults to 1. - Use at least 2 replicas if you need high availability (99.99% uptime) for querying. - For additional throughput (QPS) your index needs to support, provision additional replicas. - :type replicas: int, optional - :param shards: the number of shards per index, defaults to 1. - Use 1 shard per 1GB of vectors - :type shards: int,optional - :param pods: Total number of pods to be used by the index. pods = shard*replicas - :type pods: int,optional - :param pod_type: the pod type to be used for the index. can be one of p1 or s1. - :type pod_type: str,optional - :param index_config: Advanced configuration options for the index - :param metadata_config: Configuration related to the metadata index - :type metadata_config: dict, optional - :param source_collection: Collection name to create the index from - :type metadata_config: str, optional - :type timeout: int, optional - :param timeout: Timeout for wait until index gets ready. If None, wait indefinitely; if >=0, time out after this many seconds; - if -1, return immediately and do not wait. Default: None + """Creates a new index. + + Note that the index is not immediately ready to use. You can use the `timeout` parameter to control how long the ``create_index`` + call waits to return. You can use the ``describe_index`` function to check the status of an index. + + The minimum required configuration to create an index is the index name and dimension: + + ```python + pinecone.create_index(name="my-index", dimension=128) + ``` + + In a more expansive example, you can specify the metric, number of pods, number of replicas, and pod type: + + ```python + pinecone.create_index( + name="my-index", + dimension=1536, + metric="cosine", + pods=1, + replicas=2, + pod_type="p1.x1", + ) + ``` + + If you plan to begin upserting immediately after index creation is complete, you will want to leave `timeout` as the default `None`. + In this case, the ``create_index`` call will block until the index is ready to use: + + ```python + pinecone.init(api_key="YOUR_API_KEY", environment="us-west1-gcp") + index = pinecone.Index("example-index") + + upsert_response = index.upsert( + vectors=[ + ("vec1", [0.1, 0.2, 0.3, 0.4], {"genre": "drama"}), + ("vec2", [0.2, 0.3, 0.4, 0.5], {"genre": "action"}), + ], + namespace="example-namespace" + ) + ``` + + Args: + name (str): The name of the index. Must be unique within the project and contain only alphanumeric and hyphen characters. + The name must start and end with alphanumeric characters. + dimension (int): The dimension of the index. Must be a positive integer. The dimension of your index should match the + output dimension of your embedding model. For example, if you are using a model that outputs 128-dimensional vectors, + you should set the dimension to 128. + timeout (int, optional): Timeout in seconds to wait until an index is ready. If `None`, wait indefinitely until index is created; + if >=0, time out after this many seconds; if -1, return immediately and do not wait. Default: `None` + index_type (str, optional): type of index, one of `{"approximated", "exact"}`, defaults to "approximated". + The "approximated" index uses fast approximate search algorithms developed by Pinecone. + The "exact" index uses accurate exact search algorithms. + It performs exhaustive searches and thus it is usually slower than the "approximated" index. + metric (str, optional): The metric specifies how similarity is calculated in the index when querying. The default + metric is `'cosine'`. Supported metrics include `'cosine'`, `'dotproduct'`, and `'euclidean'`. To learn more + about these options, see [Distance metrics](https://docs.pinecone.io/docs/indexes#distance-metrics). + replicas (int, optional): The number of replicas in the index. The default number of replicas is 1. For more information + see [Replicas](https://docs.pinecone.io/docs/manage-indexes/#replicas). + shards (int, optional): The number of shards in the index. The default number of shards is 1. + pods (int, optional): The number of pods in the index. The default number of pods is 1. + pod_type (str, optional): The type of pod in the index. This string should combine a base pod type (`s1`, `p1`, or `p2`) with a + size (`x1`, `x2`, `x4`, `x8`) into a string such as `p1.x1` or `s1.x4`. The default pod type is `p1.x1`. For more + information on these, see this guide on [pod types and sizes](https://docs.pinecone.io/docs/indexes#pods-pod-types-and-pod-sizes). + index_config (dict, optional): Advanced configuration options for the index + metadata_config (dict, optional): Configuration for the behavior of Pinecone's internal metadata index. By default, + all metadata is indexed; when a `metadata_config` is present, only metadata fields specified are indexed. + source_collection (str, optional): If creating an index from a collection, you can specify the name of the collection here. """ api_instance = _get_api_instance() @@ -157,13 +195,20 @@ def is_ready(): def delete_index(name: str, timeout: int = None): - """Deletes a Pinecone index. - - :param name: the name of the index. - :type name: str - :param timeout: Timeout for wait until index gets ready. If None, wait indefinitely; if >=0, time out after this many seconds; - if -1, return immediately and do not wait. Default: None - :type timeout: int, optional + """Deletes an index. + + Note that the index is not immediately deleted. You can use the `timeout` parameter to control how long the ``delete_index`` + call waits to return. You can use the ``list_indexes`` function to determine if an index has been deleted. + + Example: + ```python + pinecone.delete_index(name="my-index") + ``` + + Args: + name (str): The name of the index to delete. + timeout (int, optional): Timeout in seconds to wait until an index is deleted. If `None` wait indefinitely until index is deleted; + if >=0, time out after this many seconds; if -1, return immediately and do not wait. Default: `None` """ api_instance = _get_api_instance() api_instance.delete_index(name) @@ -192,17 +237,48 @@ def get_remaining(): def list_indexes(): - """Lists all indexes.""" + """Lists all Pinecone indexes. + + Example: + ```python + indexes = pinecone.list_indexes() + print(indexes) + # ["my-index", "my-other-index"] + ``` + + Returns: + A list of index names. + """ api_instance = _get_api_instance() response = api_instance.list_indexes() return response def describe_index(name: str): - """Describes a Pinecone index. - - :param name: the name of the index to describe. - :return: Returns an `IndexDescription` object + """Describe a Pinecone index. + + Example: + ```python + pinecone.describe_index("my-index") + # { + # name='my-index', + # metric='cosine', + # replicas=1, + # dimension=128, + # shards=1, + # pods=1, + # pod_type='p1', + # status={'ready': True, 'state': 'RUNNING'}, + # metadata_config=None, + # source_collection=None + # } + ``` + + Args: + name(str): The name of the index to describe. + + Returns: + An ``IndexDescription`` object. """ api_instance = _get_api_instance() response = api_instance.describe_index(name) @@ -224,45 +300,93 @@ def describe_index(name: str): def scale_index(name: str, replicas: int): - """Increases number of replicas for the index. + """Changes the number of replicas for the index, lowest value is 0. + + Example: + ```python + pinecone.scale_index(name="my-index", replicas=2) + ``` - :param name: the name of the Index - :type name: str - :param replicas: the number of replicas in the index now, lowest value is 0. - :type replicas: int + Args: + name(str): The name of the index to scale. + replicas(int): The new number of replicas for the index. """ api_instance = _get_api_instance() api_instance.configure_index(name, patch_request=PatchRequest(replicas=replicas, pod_type="")) def create_collection(name: str, source: str): - """Create a collection - :param name: Name of the collection - :param source: Name of the source index + """Create a new collection from an existing index. + + Example: + ```python + index_list = pinecone.list_indexes() + pinecone.create_collection(name="my-collection", source=index_list[0]) + ``` + + Args: + name(str): The name of the collection you would like to create. + source(str): The name of the index you would like to create the collection from. """ api_instance = _get_api_instance() api_instance.create_collection(create_collection_request=CreateCollectionRequest(name=name, source=source)) def list_collections(): - """List all collections""" + """List all collections in a project. + + Example: + ```python + collection_list = pinecone.list_collections() + print(collection_list) + # ["my-collection", "my-other-collection"] + ``` + + Returns: + A list of collection names. + """ api_instance = _get_api_instance() response = api_instance.list_collections() return response def delete_collection(name: str): - """Deletes a collection. - :param: name: The name of the collection + """Delete a collection by collection name. + + Example: + ```python + collection_list = pinecone.list_collections() + collection_name = collection_list[0] + pinecone.delete_collection(collection_name) + ``` + + Args: + name(str): The name of the collection to delete. """ api_instance = _get_api_instance() api_instance.delete_collection(name) def describe_collection(name: str): - """Describes a collection. - :param: The name of the collection - :return: Description of the collection + """Describe a collection. + + Example: + ```python + pinecone.describe_collection("my-collection") + # { + # 'name': 'my-collection', + # 'status': 'Ready', + # 'size': 3089687, + # 'dimension': 3.0, + # 'vector_count': 2.0 + # } + ``` + + Args: + name(str): The name of the collection to describe. + + Returns: + A ``CollectionDescription`` object. """ api_instance = _get_api_instance() response = api_instance.describe_collection(name).to_dict() @@ -271,10 +395,22 @@ def describe_collection(name: str): def configure_index(name: str, replicas: Optional[int] = None, pod_type: Optional[str] = ""): - """Changes current configuration of the index. - :param: name: the name of the Index - :param: replicas: the desired number of replicas, lowest value is 0. - :param: pod_type: the new pod_type for the index. + """Configure an index. + + Use this method to update configuration on an existing index. You can update the number of pods, + replicas, and pod type. + + Example: + ```python + pinecone.configure_index(name="my-index", replicas=2, pod_type="p1.x2") + + ``` + Args: + name(str): The name of the index to configure. + replicas(int, optional): The desired number of replicas, lowest value is 0. + pod_type(str, optional): The type of pod in the index. This string should combine a base pod type (`s1`, `p1`, or `p2`) + with a size (`x1`, `x2`, `x4`, `x8`) into a string such as `p1.x1` or `s1.x4`. The default pod type is `p1.x1`, + For more information on these, see this guide on [pod types and sizes](https://docs.pinecone.io/docs/indexes#pods-pod-types-and-pod-sizes). """ api_instance = _get_api_instance() config_args = {} From b1add1c762a41269b3437e6345ec1de7b8db7a9f Mon Sep 17 00:00:00 2001 From: Austin DeNoble Date: Mon, 16 Oct 2023 21:44:41 -0400 Subject: [PATCH 2/3] reformat docstrings in the index file --- pinecone/index.py | 334 ++++++++++++++++++++++++++-------------------- 1 file changed, 191 insertions(+), 143 deletions(-) diff --git a/pinecone/index.py b/pinecone/index.py index 3ed9de16..108d1357 100644 --- a/pinecone/index.py +++ b/pinecone/index.py @@ -82,7 +82,7 @@ def upsert_numpy_deprecation_notice(context): class Index(ApiClient): - """ A client for interacting with a Pinecone index via REST API. + """A class for interacting with a Pinecone index via REST API. The ``Index`` class is used to perform data operations (upsert, query, etc) against Pinecone indexes. Usually it will be instantiated using the `pinecone` module after the required configuration values have been initialized. @@ -92,7 +92,7 @@ class Index(ApiClient): pinecone.init(api_key="my-api-key", environment="my-environment") index = pinecone.Index("my-index") ``` - For improved performance, use the Pinecone GRPCIndex client. For more details, see (Performance tuning)[https://docs.pinecone.io/docs/performance-tuning]. + For improved performance, use the Pinecone GRPCIndex client. For more details, see [Performance tuning](https://docs.pinecone.io/docs/performance-tuning]). Args: index_name (str): The name of the index to interact with. @@ -122,62 +122,90 @@ def upsert( show_progress: bool = True, **kwargs, ) -> UpsertResponse: - """ - The upsert operation writes vectors into a namespace. - If a new value is upserted for an existing vector id, it will overwrite the previous value. + """Upsert records to the index. + + If a new value is upserted for an existing vector id, it will overwrite the previous value. To upsert + in parallel follow [sending upserts in parallel](https://docs.pinecone.io/docs/insert-data#sending-upserts-in-parallel). + + A vector can be represented by a list of either a ``Vector`` object, a tuple, or a dictionary. - To upsert in parallel follow: https://docs.pinecone.io/docs/insert-data#sending-upserts-in-parallel + **Tuple** - A vector can be represented by a 1) ``Vector`` object, a 2) tuple or 3) a dictionary + If a tuple is used, it must be one of the following forms: + - `(id, values)` + - `(id, values, metadata)` + - `(id, values, metadata, sparse_values)` - If a tuple is used, it must be of the form `(id, values, metadata)` or `(id, values)`. - where id is a string, vector is a list of floats, metadata is a dict, - and sparse_values is a dict of the form `{'indices': List[int], 'values': List[float]}`. + where `id` is a string, `values` is a list of floats, `metadata` is a dict, and `sparse_values` + is a dict of the form `{'indices': List[int], 'values': List[float]}`. Examples: ```python + # id, values + ('id2', [1.0, 2.0, 3.0]) + # id, values, metadata + ('id1', [1.0, 2.0, 3.0], {'key': 'value'}) + # id, values, metadata, sparse_values ('id1', [1.0, 2.0, 3.0], {'key': 'value'}, {'indices': [1, 2], 'values': [0.2, 0.4]}) + # sending sparse_values without any metadata specified ('id1', [1.0, 2.0, 3.0], None, {'indices': [1, 2], 'values': [0.2, 0.4]}) - ('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0]) ``` + **Vector object** - If a Vector object is used, a Vector object must be of the form - `Vector(id, values, metadata, sparse_values)`, where metadata and sparse_values are optional - arguments. + If a ``Vector`` object is used, it can be instantiated like so: `Vector(id, values, metadata, sparse_values)`. + Metadata and sparse_values are optional arguments. Examples: ```python - Vector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}) Vector(id='id2', values=[1.0, 2.0, 3.0]) + Vector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}) Vector(id='id3', values=[1.0, 2.0, 3.0], sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4])) ``` + **Dictionary** - **Note:** the dimension of each vector must match the dimension of the index. - - If a dictionary is used, it must be in the form `{'id': str, 'values': List[float], 'sparse_values': {'indices': List[int], 'values': List[float]}, 'metadata': dict}` + If a dictionary is used, it must be in the form `{'id': str, 'values': List[float], 'sparse_values': {'indices': List[int], 'values': List[float]}, 'metadata': dict}`. Examples: - >>> index.upsert([('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0])]) - >>> - >>> index.upsert([{'id': 'id1', 'values': [1.0, 2.0, 3.0], 'metadata': {'key': 'value'}}, - >>> {'id': 'id2', 'values': [1.0, 2.0, 3.0], 'sparse_values': {'indices': [1, 8], 'values': [0.2, 0.4]}]) - >>> - >>> index.upsert([Vector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}), - >>> Vector(id='id2', values=[1.0, 2.0, 3.0], sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4]))]) + ```python + # upsert a list of tuples + index.upsert([('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0])]) + # upsert a list of Vector objects + index.upsert( + [ + Vector(id="id1", values=[1.0, 2.0, 3.0], metadata={"key": "value"}), + Vector( + id="id2", + values=[1.0, 2.0, 3.0], + sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4]), + ), + ] + ) + # upsert a list of dictionaries + index.upsert( + [ + {"id": "id1", "values": [1.0, 2.0, 3.0], "metadata": {"key": "value"}}, + { + "id": "id2", + "values": [1.0, 2.0, 3.0], + "sparse_values": {"indices": [1, 8], "values": [0.2, 0.4]}, + }, + ] + ) + ``` - API reference: https://docs.pinecone.io/reference/upsert + [Pinecone API reference](https://docs.pinecone.io/reference/upsert) Args: - vectors (Union[List[Vector], List[Tuple]]): A list of vectors to upsert. - namespace (str): The namespace to write to. If not specified, the default namespace is used. [optional] - batch_size (int): The number of vectors to upsert in each batch. - If not specified, all vectors will be upserted in a single batch. [optional] - show_progress (bool): Whether to show a progress bar using tqdm. - Applied only if batch_size is provided. Default is True. + vectors (Union[List[Vector], List[Tuple]]): A list of vectors to upsert. Must be a `tuple`, `dictionary`, or ``Vector`` object. + namespace (str, optional): The namespace to write to. If not specified, the default namespace of `''` is used. + batch_size (int, optional): The number of vectors to upsert in each batch. If not specified, all vectors will be upserted in a single batch. + show_progress (bool, optional): Whether to show a progress bar using tqdm. Applied only if batch_size is provided. Default: True + Keyword Args: Supports OpenAPI client keyword arguments. See `UpsertRequest` for more details. - Returns: UpsertResponse, includes the number of vectors upserted. + Returns: + An ``UpsertResponse`` which includes the number of vectors upserted. """ _check_type = kwargs.pop("_check_type", False) @@ -265,6 +293,7 @@ def _dict_to_vector(item): raise def _vector_transform(item: Union[Vector, Tuple]): + print("ITEM: ", item) if isinstance(item, Vector): return item elif isinstance(item, tuple): @@ -299,13 +328,31 @@ def _iter_dataframe(df, batch_size): def upsert_from_dataframe( self, df, namespace: str = None, batch_size: int = 500, show_progress: bool = True ) -> UpsertResponse: - """Upserts a dataframe into the index. + """Upserts a pandas dataframe to the index. + + The datafram must have the following columns: `id`, `vector`, `sparse_values`, and `metadata`. + + Example: + ```python + import pandas as pd + import pinecone + + data = { + 'id': ['id1', 'id2'], + 'vector': [[1.0, 2.0, 3.0], [3.0, 2.0, 1.0]], + 'sparse_values': [{'indices': [1, 2], 'values': [0.2, 0.4]}, None], + 'metadata': [None, {'genre': 'classical'}] + } + + dataframe = pd.DataFrame(data) + pinecone.upsert_from_dataframe(df=dataframe) + ``` Args: - df: A pandas dataframe with the following columns: id, vector, sparse_values, and metadata. - namespace: The namespace to upsert into. - batch_size: The number of rows to upsert in a single batch. - show_progress: Whether to show a progress bar. + df (DataFrame): A pandas dataframe with the following columns: `id`, `vector`, `sparse_values`, and `metadata`. + namespace (str, optional): The namespace to write to. If not specified, the default namespace of `''` is used. + batch_size (int, optional): The number of vectors to upsert in each batch. Default: 500 + show_progress (bool, optional): Whether to show a progress bar using tqdm. Applied only if batch_size is provided. Default: True """ try: import pandas as pd @@ -339,40 +386,40 @@ def delete( filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, **kwargs, ) -> Dict[str, Any]: - """ - The Delete operation deletes vectors from the index, from a single namespace. - No error raised if the vector id does not exist. - Note: for any delete call, if namespace is not specified, the default namespace is used. - - Delete can occur in the following mutual exclusive ways: - 1. Delete by ids from a single namespace - 2. Delete all vectors from a single namespace by setting delete_all to True - 3. Delete all vectors from a single namespace by specifying a metadata filter - (note that for this option delete all must be set to False) + """Delete vectors from the index within a single namespace. + + For any delete call, if namespace is not specified the default namespace of `''` will be used. + There are no errors raised if a vector id does not exist. - API reference: https://docs.pinecone.io/reference/delete_post + Deletion can occur in one of the following mutually exclusive ways: + - Delete by ids from a single namespace + - Delete all vectors from a single namespace by setting `delete_all` to True + - Delete all vectors from a single namespace by specifying a metadata filter. For this option + `delete_all` must be set to False Examples: - >>> index.delete(ids=['id1', 'id2'], namespace='my_namespace') - >>> index.delete(delete_all=True, namespace='my_namespace') - >>> index.delete(filter={'key': 'value'}, namespace='my_namespace') + ```python + index.delete(ids=['id1', 'id2'], namespace='my_namespace') + index.delete(delete_all=True, namespace='my_namespace') + index.delete(filter={'key': 'value'}, namespace='my_namespace') + ``` + [Pinecone API reference](https://docs.pinecone.io/reference/delete_post) + Args: - ids (List[str]): Vector ids to delete [optional] - delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional] - Default is False. - namespace (str): The namespace to delete vectors from [optional] - If not specified, the default namespace is used. - filter (Dict[str, Union[str, float, int, bool, List, dict]]): - If specified, the metadata filter here will be used to select the vectors to delete. - This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True. - See https://www.pinecone.io/docs/metadata-filtering/.. [optional] + ids (List[str], optional): The ids of the vectors to delete. + delete_all (bool, optional): This indicates that all vectors in the index namespace should be deleted. + namespace (str): The namespace to delete vectors from. If not specified, the default namespace of `''` is used. + filter (Dict[str, Union[str, float, int, bool, List, dict]]): If specified, the metadata filter will be used to + select the vectors to delete. This is mutually exclusive with specifying ids to delete in the `ids` param or + using `delete_all=True`. See [Filtering with metadata](https://www.pinecone.io/docs/metadata-filtering/) for + more on deleting records with filters. Keyword Args: - Supports OpenAPI client keyword arguments. See pinecone.core.client.models.DeleteRequest for more details. - + Supports OpenAPI client keyword arguments. See ``DeleteRequest`` for more details. - Returns: An empty dictionary if the delete operation was successful. + Returns: + An empty dictionary if the delete operation was successful. """ _check_type = kwargs.pop("_check_type", False) args_dict = self._parse_non_empty_args( @@ -390,25 +437,24 @@ def delete( @validate_and_convert_errors def fetch(self, ids: List[str], namespace: Optional[str] = None, **kwargs) -> FetchResponse: - """ - The fetch operation looks up and returns vectors, by ID, from a single namespace. - The returned vectors include the vector data and/or metadata. - - API reference: https://docs.pinecone.io/reference/fetch + """Fetch vectors from the index. Examples: - >>> index.fetch(ids=['id1', 'id2'], namespace='my_namespace') - >>> index.fetch(ids=['id1', 'id2']) + ```python + index.fetch(ids=['id1', 'id2'], namespace='my_namespace') + index.fetch(ids=['id1', 'id2']) + ``` + [Pinecone API reference](https://docs.pinecone.io/reference/fetch) + Args: ids (List[str]): The vector IDs to fetch. - namespace (str): The namespace to fetch vectors from. - If not specified, the default namespace is used. [optional] + namespace (str, optional): The namespace to fetch vectors from. If not specified, the default namespace of `''` is used. Keyword Args: - Supports OpenAPI client keyword arguments. See pinecone.core.client.models.FetchResponse for more details. - + Supports OpenAPI client keyword arguments. See ``FetchResponse`` for more details. - Returns: FetchResponse object which contains the list of Vector objects, and namespace name. + Returns: + ``FetchResponse`` object which contains the list of Vector objects, and namespace name. """ args_dict = self._parse_non_empty_args([("namespace", namespace)]) return self._vector_api.fetch(ids=ids, **args_dict, **kwargs) @@ -427,50 +473,49 @@ def query( sparse_vector: Optional[Union[SparseValues, Dict[str, Union[List[float], List[int]]]]] = None, **kwargs, ) -> QueryResponse: - """ - The Query operation searches a namespace, using a query vector. - It retrieves the ids of the most similar items in a namespace, along with their similarity scores. + """Query vectors from the index using a query vector. + + Query is used to find the `top_k` vectors in the index whose vector values are most similar to the vector values of + the query according to the distance metric you have configured for your index. See [Query data](https://docs.pinecone.io/docs/query-data) + for more on querying. - API reference: https://docs.pinecone.io/reference/query + **Note:** Each query request can only contain one of the following parameters: `vector`, `id`, or `queries`. Examples: - >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace') - >>> index.query(id='id1', top_k=10, namespace='my_namespace') - >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'}) - >>> index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True) - >>> index.query(vector=[1, 2, 3], sparse_vector={'indices': [1, 2], 'values': [0.2, 0.4]}, - >>> top_k=10, namespace='my_namespace') - >>> index.query(vector=[1, 2, 3], sparse_vector=SparseValues([1, 2], [0.2, 0.4]), - >>> top_k=10, namespace='my_namespace') + ```python + index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace') + index.query(id='id1', top_k=10, namespace='my_namespace') + index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'}) + index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True) + index.query(vector=[1, 2, 3], sparse_vector={'indices': [1, 2], 'values': [0.2, 0.4]}, + top_k=10, namespace='my_namespace') + index.query(vector=[1, 2, 3], sparse_vector=SparseValues([1, 2], [0.2, 0.4]), + top_k=10, namespace='my_namespace') + ``` + [Pinecone API reference](https://docs.pinecone.io/reference/query) + Args: - vector (List[float]): The query vector. This should be the same length as the dimension of the index - being queried. Each `query()` request can contain only one of the parameters - `queries`, `id` or `vector`.. [optional] - id (str): The unique ID of the vector to be used as a query vector. - Each `query()` request can contain only one of the parameters - `queries`, `vector`, or `id`.. [optional] - queries ([QueryVector]): DEPRECATED. The query vectors. - Each `query()` request can contain only one of the parameters - `queries`, `vector`, or `id`.. [optional] - top_k (int): The number of results to return for each query. Must be an integer greater than 1. - namespace (str): The namespace to fetch vectors from. - If not specified, the default namespace is used. [optional] - filter (Dict[str, Union[str, float, int, bool, List, dict]): - The filter to apply. You can use vector metadata to limit your search. - See https://www.pinecone.io/docs/metadata-filtering/.. [optional] - include_values (bool): Indicates whether vector values are included in the response. - If omitted the server will use the default value of False [optional] - include_metadata (bool): Indicates whether metadata is included in the response as well as the ids. - If omitted the server will use the default value of False [optional] - sparse_vector: (Union[SparseValues, Dict[str, Union[List[float], List[int]]]]): sparse values of the query vector. - Expected to be either a SparseValues object or a dict of the form: - {'indices': List[int], 'values': List[float]}, where the lists each have the same length. + vector (List[float], optional): The query vector. This should be the same length as the dimension of the index being queried. + id (str, optional): The unique ID of the vector to be used as a query vector. + queries ([QueryVector], optional): DEPRECATED. The query vectors. + top_k (int, optional): The number of results to return for each query. Must be an integer greater than 1. + namespace (str, optional): The namespace to query vectors from. If not specified, the default namespace of `''` is used. + filter (Dict[str, Union[str, float, int, bool, List, dict]): The metadata filter to apply. You can use vector metadata + to limit your search. See [Filtering with metadata](https://www.pinecone.io/docs/metadata-filtering/) for more on filtering. + include_values (bool, optional): Indicates whether vector values are included in the response. If omitted, the server will + use the default value of False. + include_metadata (bool, optional): Indicates whether metadata is included in the response as well as the ids. If omitted the server will + use the default value of False. + sparse_vector: (Union[SparseValues, Dict[str, Union[List[float], List[int]]]]): sparse values of the query vector. Expected to be + either a ``SparseValues`` object or a dict of the form: `{'indices': List[int], 'values': List[float]}`, + where the lists each have the same length. + Keyword Args: - Supports OpenAPI client keyword arguments. See `QueryRequest` for more details. + Supports OpenAPI client keyword arguments. See ``QueryRequest`` for more details. - Returns: `QueryResponse` object which contains the list of the closest vectors as ScoredVector objects, - and namespace name. + Returns: + ``QueryResponse`` object which contains the list of the closest vectors as ``ScoredVector`` objects along with namespace name. """ def _query_transform(item): @@ -523,36 +568,38 @@ def update( sparse_values: Optional[Union[SparseValues, Dict[str, Union[List[float], List[int]]]]] = None, **kwargs, ) -> Dict[str, Any]: - """ - The Update operation updates vector in a namespace. - If a value is included, it will overwrite the previous value. - If a set_metadata is included, - the values of the fields specified in it will be added or overwrite the previous value. + """ Update a vector in the index within a specific namespace. - API reference: https://docs.pinecone.io/reference/update + If `values` are included they will overwrite the previous values. If `set_metadata` is included, + the values of the fields specified will overwrite and merge with existing metadata. Examples: - >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace') - >>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace') - >>> index.update(id='id1', values=[1, 2, 3], sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]}, - >>> namespace='my_namespace') - >>> index.update(id='id1', values=[1, 2, 3], sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4]), - >>> namespace='my_namespace') + ```python + index.update(id='id1', values=[1, 2, 3], namespace='my_namespace') + index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace') + index.update(id='id1', values=[1, 2, 3], sparse_values={'indices': [1, 2], 'values': [0.2, 0.4]}, + namespace='my_namespace') + index.update(id='id1', values=[1, 2, 3], sparse_values=SparseValues(indices=[1, 2], values=[0.2, 0.4]), + namespace='my_namespace') + ``` + + [Pinecone API reference](https://docs.pinecone.io/reference/update) Args: - id (str): Vector's unique id. - values (List[float]): vector values to set. [optional] - set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]): - metadata to set for vector. [optional] - namespace (str): Namespace name where to update the vector.. [optional] - sparse_values: (Dict[str, Union[List[float], List[int]]]): sparse values to update for the vector. - Expected to be either a SparseValues object or a dict of the form: - {'indices': List[int], 'values': List[float]} where the lists each have the same length. + id (str): The unique id of the vector you would like to update. + values (List[float], optional): The vector values you would like to update. + set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]], optional): + The metadata you would like to update. + namespace (str, optional): The namespace from which to update the vector. If not specified, the default namespace of `''` is used. + sparse_values: (Dict[str, Union[List[float], List[int]]], optional): The sparse values you would like to store with this vector. + Expected to be either a SparseValues object or a dict of the form: `{'indices': List[int], 'values': List[float]}` where + the lists each have the same length. Keyword Args: - Supports OpenAPI client keyword arguments. See pinecone.core.client.models.UpdateRequest for more details. + Supports OpenAPI client keyword arguments. See ``UpdateRequest`` for more details. - Returns: An empty dictionary if the update was successful. + Returns: + An empty dictionary if the update was successful. """ _check_type = kwargs.pop("_check_type", False) sparse_values = self._parse_sparse_values_arg(sparse_values) @@ -578,22 +625,23 @@ def update( def describe_index_stats( self, filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None, **kwargs ) -> DescribeIndexStatsResponse: - """ - The DescribeIndexStats operation returns statistics about the index's contents. - For example: The vector count per namespace and the number of dimensions. + """ Describe statistics about the index's contents. - API reference: https://docs.pinecone.io/reference/describe_index_stats_post + Returns details such as total number of vectors, vectors per namespace, and the index's dimension size. Examples: >>> index.describe_index_stats() >>> index.describe_index_stats(filter={'key': 'value'}) + [Pinecone API reference](https://docs.pinecone.io/reference/describe_index_stats_post) + Args: - filter (Dict[str, Union[str, float, int, bool, List, dict]]): - If this parameter is present, the operation only returns statistics for vectors that satisfy the filter. - See https://www.pinecone.io/docs/metadata-filtering/.. [optional] + filter (Dict[str, Union[str, float, int, bool, List, dict]], optional): If this parameter is present, + the operation only returns statistics for vectors that satisfy the filter. + See [Filtering with metadata](https://www.pinecone.io/docs/metadata-filtering/) for more on filtering. - Returns: DescribeIndexStatsResponse object which contains stats about the index. + Returns: + ``DescribeIndexStatsResponse`` object which contains stats about the index. """ _check_type = kwargs.pop("_check_type", False) args_dict = self._parse_non_empty_args([("filter", filter)]) From cb0031ca7865cbf32b076020ec2d542f6bebaa2d Mon Sep 17 00:00:00 2001 From: Austin DeNoble Date: Tue, 17 Oct 2023 09:27:21 -0400 Subject: [PATCH 3/3] remove print --- pinecone/config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pinecone/config.py b/pinecone/config.py index c0278208..84d0ee04 100644 --- a/pinecone/config.py +++ b/pinecone/config.py @@ -79,7 +79,6 @@ def reset(self, config_file=None, **kwargs): # Set INI file config config = config._replace(**self._preprocess_and_validate_config(file_config)) - print("post init config: ", config) # Set environment config env_config = ConfigBase(