diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8ddc5b2f..a7961683 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,8 +2,15 @@
 
 ## Unreleased Changes
 None
+## [2.1.0](https://github.com/pinecone-io/pinecone-python-client/compare/v2.0.13...v2.1.0)
+- Fix "Connection Reset by peer" error after long idle periods 
+- Add typing and explicit names for arguments in all client operations
+- Add docstrings to all client operations
+- Support batch upsert by passing `batch_size` to `upsert` method
+- Improve gRPC query results parsing performance 
 
-## [2.0.13](https://github.com/pinecone-io/pinecone-python-client/compare/v2.0.13...v2.0.12)
+
+## [2.0.13](https://github.com/pinecone-io/pinecone-python-client/compare/v2.0.12...v2.0.13)
 - Added support for collections 
   - Users can manage collections using ``create_collection`` , ``describe_collection`` and ``delete_collection`` calls.
   - Users can specify additional ``source_collection`` parameter during index creation to create index from a collection
@@ -11,13 +18,13 @@ None
 - Added support for vertical scaling. This can be done by changing ```pod_type ``` via the ```configure_index``` call or during index creation.
 - Updated dependency requirements for grpc client.
 
-## [2.0.12](https://github.com/pinecone-io/pinecone-python-client/compare/v2.0.12...v2.0.11)
+## [2.0.12](https://github.com/pinecone-io/pinecone-python-client/compare/v2.0.11...v2.0.12)
 
 - Changed grpcio verison to be > 1.44.1
 - Sanitized repo by removing leftover files from old versions.
 - Added more info to ```describe_index_stats``` call. The call now gives a namespace wise vector count breakdown.
 
-## [2.0.11](https://github.com/pinecone-io/pinecone-python-client/compare/v2.0.11...v2.0.10)
+## [2.0.11](https://github.com/pinecone-io/pinecone-python-client/compare/v2.0.10...v2.0.11)
 ### Changed
 - Added support of querying by a single vector.
   - This is a step in deprecating batch queries.
diff --git a/pinecone/__version__ b/pinecone/__version__
index 82bd22f9..7ec1d6db 100644
--- a/pinecone/__version__
+++ b/pinecone/__version__
@@ -1 +1 @@
-2.0.13
+2.1.0
diff --git a/pinecone/config.py b/pinecone/config.py
index 876b34d1..84b5793a 100644
--- a/pinecone/config.py
+++ b/pinecone/config.py
@@ -2,17 +2,22 @@
 # Copyright (c) 2020-2021 Pinecone Systems Inc. All right reserved.
 #
 import logging
-from typing import NamedTuple
+import sys
+from typing import NamedTuple, List
 import os
 
 import certifi
 import requests
 import configparser
+import socket
+
+from urllib3.connection import HTTPConnection
 
 from pinecone.core.client.exceptions import ApiKeyError
 from pinecone.core.api_action import ActionAPI, WhoAmIResponse
 from pinecone.core.utils import warn_deprecated
-from pinecone.core.utils.constants import CLIENT_VERSION, PARENT_LOGGER_NAME, DEFAULT_PARENT_LOGGER_LEVEL
+from pinecone.core.utils.constants import CLIENT_VERSION, PARENT_LOGGER_NAME, DEFAULT_PARENT_LOGGER_LEVEL, \
+    TCP_KEEPIDLE, TCP_KEEPINTVL, TCP_KEEPCNT
 from pinecone.core.client.configuration import Configuration as OpenApiConfiguration
 
 __all__ = [
@@ -37,7 +42,7 @@ class _CONFIG:
 
     Order of configs to load:
 
-    - configs specified explictly in reset
+    - configs specified explicitly in reset
     - environment variables
     - configs specified in the INI file
     - default configs
@@ -109,6 +114,8 @@ def reset(self, config_file=None, **kwargs):
                 or default_openapi_config
         )
 
+        openapi_config.socket_options = self._get_socket_options()
+
         config = config._replace(openapi_config=openapi_config)
         self._config = config
 
@@ -144,6 +151,54 @@ def _load_config_file(self, config_file: str) -> dict:
                     config_obj = {**parser["default"]}
         return config_obj
 
+    @staticmethod
+    def _get_socket_options(do_keep_alive: bool = True,
+                            keep_alive_idle_sec: int = TCP_KEEPIDLE,
+                            keep_alive_interval_sec: int = TCP_KEEPINTVL,
+                            keep_alive_tries: int = TCP_KEEPCNT
+                            ) -> List[tuple]:
+        """
+        Returns the socket options to pass to OpenAPI's Rest client
+        Args:
+            do_keep_alive: Whether to enable TCP keep alive mechanism
+            keep_alive_idle_sec: Time in seconds of connection idleness before starting to send keep alive probes
+            keep_alive_interval_sec: Interval time in seconds between keep alive probe messages
+            keep_alive_tries: Number of failed keep alive tries (unanswered KA messages) before terminating the connection
+
+        Returns:
+            A list of socket options for the Rest client's connection pool
+        """
+        # Source: https://www.finbourne.com/blog/the-mysterious-hanging-client-tcp-keep-alives
+
+        socket_params = HTTPConnection.default_socket_options
+        if not do_keep_alive:
+            return socket_params
+
+        socket_params += [(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)]
+
+        # TCP Keep Alive Probes for different platforms
+        platform = sys.platform
+        # TCP Keep Alive Probes for Linux
+        if platform == 'linux' and hasattr(socket, "TCP_KEEPIDLE") and hasattr(socket, "TCP_KEEPINTVL") \
+                and hasattr(socket, "TCP_KEEPCNT"):
+            socket_params += [(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, keep_alive_idle_sec)]
+            socket_params += [(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, keep_alive_interval_sec)]
+            socket_params += [(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, keep_alive_tries)]
+
+        # TCP Keep Alive Probes for Windows OS
+        # NOTE: Changing TCP KA params on windows is done via a different mechanism which OpenAPI's Rest client doesn't expose.
+        # Since the default values work well, it seems setting `(socket.SO_KEEPALIVE, 1)` is sufficient.
+        # Leaving this code here for future reference.
+        # elif platform == 'win32' and hasattr(socket, "SIO_KEEPALIVE_VALS"):
+        #     socket.ioctl((socket.SIO_KEEPALIVE_VALS, (1, keep_alive_idle_sec * 1000, keep_alive_interval_sec * 1000)))
+
+        # TCP Keep Alive Probes for Mac OS
+        elif platform == 'darwin':
+            TCP_KEEPALIVE = 0x10
+            socket_params += [(socket.IPPROTO_TCP, TCP_KEEPALIVE, keep_alive_interval_sec)]
+
+        return socket_params
+
     @property
     def ENVIRONMENT(self):
         return self._config.environment
diff --git a/pinecone/core/client/model/query_request.py b/pinecone/core/client/model/query_request.py
index b90f3395..dee6245f 100644
--- a/pinecone/core/client/model/query_request.py
+++ b/pinecone/core/client/model/query_request.py
@@ -308,4 +308,4 @@ def __init__(self, top_k, *args, **kwargs):  # noqa: E501
             setattr(self, var_name, var_value)
             if var_name in self.read_only_vars:
                 raise ApiAttributeError(f"`{var_name}` is a read-only attribute. Use `from_openapi_data` to instantiate "
-                                     f"class with read only attributes.")
+                                        f"class with read only attributes.")
diff --git a/pinecone/core/grpc/index_grpc.py b/pinecone/core/grpc/index_grpc.py
index b7704f52..7136d131 100644
--- a/pinecone/core/grpc/index_grpc.py
+++ b/pinecone/core/grpc/index_grpc.py
@@ -4,20 +4,21 @@
 import logging
 from abc import ABC, abstractmethod
 from functools import wraps
-from typing import NamedTuple, Optional, Dict, Iterable
+from typing import NamedTuple, Optional, Dict, Iterable, Union, List, Tuple, Any
 
 import certifi
 import grpc
 from google.protobuf import json_format
 from grpc._channel import _InactiveRpcError, _MultiThreadedRendezvous
-from pinecone import FetchResponse, QueryResponse, ScoredVector, SingleQueryResults, \
-    UpsertResponse, DescribeIndexStatsResponse
+from tqdm import tqdm
+
+from pinecone import FetchResponse, QueryResponse, ScoredVector, SingleQueryResults, DescribeIndexStatsResponse
 from pinecone.config import Config
 from pinecone.core.client.model.namespace_summary import NamespaceSummary
 from pinecone.core.client.model.vector import Vector as _Vector
 from pinecone.core.grpc.protos.vector_service_pb2 import Vector as GRPCVector, \
-    QueryVector as GRPCQueryVector, UpsertRequest, DeleteRequest, QueryRequest, \
-    FetchRequest, UpdateRequest, DescribeIndexStatsRequest
+    QueryVector as GRPCQueryVector, UpsertRequest, UpsertResponse, DeleteRequest, QueryRequest, \
+    FetchRequest, UpdateRequest, DescribeIndexStatsRequest, DeleteResponse, UpdateResponse
 from pinecone.core.grpc.protos.vector_service_pb2_grpc import VectorServiceStub
 from pinecone.core.grpc.retry import RetryOnRpcErrorClientInterceptor, RetryConfig
 from pinecone.core.utils import _generate_request_id, dict_to_proto_struct, fix_tuple_length
@@ -171,9 +172,10 @@ def parse_fetch_response(response: dict):
     return FetchResponse(vectors=vd, namespace=namespace, _check_type=False)
 
 
-def parse_query_response(response: dict, unary_query: bool):
+def parse_query_response(response: dict, unary_query: bool, _check_type: bool = False):
     res = []
 
+    # TODO: consider deleting this deprecated case
     for match in response.get('results', []):
         namespace = match.get('namespace', '')
         m = []
@@ -187,10 +189,10 @@ def parse_query_response(response: dict, unary_query: bool):
     m = []
     for item in response.get('matches', []):
         sc = ScoredVector(id=item['id'], score=item.get('score', 0.0), values=item.get('values', []),
-                          metadata=item.get('metadata', {}))
+                          metadata=item.get('metadata', {}), _check_type=_check_type)
         m.append(sc)
 
-    kwargs = {'check_type': False}
+    kwargs = {'_check_type': _check_type}
     if unary_query:
         kwargs['namespace'] = response.get('namespace', '')
         kwargs['matches'] = m
@@ -199,11 +201,6 @@ def parse_query_response(response: dict, unary_query: bool):
     return QueryResponse(**kwargs)
 
 
-def parse_upsert_response(response):
-    response = json_format.MessageToDict(response)
-    return UpsertResponse(upserted_count=response['upsertedCount'], _check_type=False)
-
-
 def parse_stats_response(response: dict):
     fullness = response.get('indexFullness', 0.0)
     total_vector_count = response.get('totalVectorCount', 0)
@@ -251,11 +248,60 @@ def traceback(self, timeout=None):
 
 class GRPCIndex(GRPCIndexBase):
 
+    """A client for interacting with a Pinecone index via GRPC API."""
+
     @property
     def stub_class(self):
         return VectorServiceStub
 
-    def upsert(self, vectors, async_req=False, **kwargs):
+    def upsert(self,
+               vectors: Union[List[GRPCVector], List[Tuple]],
+               async_req: bool = False,
+               namespace: Optional[str] = None,
+               batch_size: Optional[int] = None,
+               show_progress: bool = True,
+               **kwargs) -> Union[UpsertResponse, PineconeGrpcFuture]:
+        """
+        The upsert operation writes vectors into a namespace.
+        If a new value is upserted for an existing vector id, it will overwrite the previous value.
+
+        Examples:
+            >>> index.upsert([('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0])], namespace='ns1')
+            >>> index.upsert([GRPCVector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}),
+            >>>              GRPCVector(id='id2', values=[1.0, 2.0, 3.0])], async_req=True)
+
+        Args:
+            vectors (Union[List[Vector], List[Tuple]]): A list of vectors to upsert.
+
+                     A vector can be represented by a 1) GRPCVector object or a 2) tuple.
+                     1) if a tuple is used, it must be of the form (id, values, metadata) or (id, values).
+                        where id is a string, vector is a list of floats, and metadata is a dict.
+                        Examples: ('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0])
+
+                    2) if a GRPCVector object is used, a GRPCVector object must be of the form
+                        GRPCVector(id, values, metadata), where metadata is an optional argument of type
+                        Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]
+                       Examples: GRPCVector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}),
+                                 GRPCVector(id='id2', values=[1.0, 2.0, 3.0])
+
+                    Note: the dimension of each vector must match the dimension of the index.
+            async_req (bool): If True, the upsert operation will be performed asynchronously.
+                              Cannot be used with batch_size.
+                              Defaults to False. See: https://docs.pinecone.io/docs/performance-tuning [optional]
+            namespace (str): The namespace to write to. If not specified, the default namespace is used. [optional]
+            batch_size (int): The number of vectors to upsert in each batch.
+                                Cannot be used with async_req=Ture.
+                               If not specified, all vectors will be upserted in a single batch. [optional]
+            show_progress (bool): Whether to show a progress bar using tqdm.
+                                  Applied only if batch_size is provided. Default is True.
+
+        Returns: UpsertResponse, contains the number of vectors upserted
+        """
+        if async_req and batch_size is not None:
+            raise ValueError('async_req is not supported when batch_size is provided.'
+                             'To upsert in parallel, please follow: '
+                             'https://docs.pinecone.io/docs/performance-tuning')
+
         def _vector_transform(item):
             if isinstance(item, GRPCVector):
                 return item
@@ -264,37 +310,167 @@ def _vector_transform(item):
                 return GRPCVector(id=id, values=values, metadata=dict_to_proto_struct(metadata) or {})
             raise ValueError(f"Invalid vector value passed: cannot interpret type {type(item)}")
 
-        request = UpsertRequest(vectors=list(map(_vector_transform, vectors)), **kwargs)
         timeout = kwargs.pop('timeout', None)
+
+        vectors = list(map(_vector_transform, vectors))
         if async_req:
+            args_dict = self._parse_non_empty_args([('namespace', namespace)])
+            request = UpsertRequest(vectors=vectors, **args_dict, **kwargs)
             future = self._wrap_grpc_call(self.stub.Upsert.future, request, timeout=timeout)
             return PineconeGrpcFuture(future)
-        else:
-            return self._wrap_grpc_call(self.stub.Upsert, request, timeout=timeout)
-
-    def delete(self, *args, async_req=False, **kwargs):
-        _filter = dict_to_proto_struct(kwargs.pop('filter', None))
-        filter_param = {}
-        if _filter:
-            filter_param['filter'] = _filter
-        request = DeleteRequest(*args, **kwargs, **filter_param)
+
+        if batch_size is None:
+            return self._upsert_batch(vectors, namespace, timeout=timeout, **kwargs)
+
+        if not isinstance(batch_size, int) or batch_size <= 0:
+            raise ValueError('batch_size must be a positive integer')
+
+        pbar = tqdm(total=len(vectors), disable=not show_progress, desc='Upserted vectors')
+        total_upserted = 0
+        for i in range(0, len(vectors), batch_size):
+            batch_result = self._upsert_batch(vectors[i:i + batch_size], namespace, timeout=timeout, **kwargs)
+            pbar.update(batch_result.upserted_count)
+            # we can't use here pbar.n for the case show_progress=False
+            total_upserted += batch_result.upserted_count
+
+        return UpsertResponse(upserted_count=total_upserted)
+
+    def _upsert_batch(self,
+                      vectors: List[GRPCVector],
+                      namespace: Optional[str],
+                      timeout: Optional[float],
+                      **kwargs) -> UpsertResponse:
+        args_dict = self._parse_non_empty_args([('namespace', namespace)])
+        request = UpsertRequest(vectors=vectors, **args_dict)
+        return self._wrap_grpc_call(self.stub.Upsert, request, timeout=timeout, **kwargs)
+
+    def delete(self,
+               ids: Optional[List[str]] = None,
+               delete_all: Optional[bool] = None,
+               namespace: Optional[str] = None,
+               filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None,
+               async_req: bool = False,
+               **kwargs) -> Union[DeleteResponse, PineconeGrpcFuture]:
+        """
+        The Delete operation deletes vectors from the index, from a single namespace.
+        No error raised if the vector id does not exist.
+        Note: for any delete call, if namespace is not specified, the default namespace is used.
+
+        Delete can occur in the following mutual exclusive ways:
+        1. Delete by ids from a single namespace
+        2. Delete all vectors from a single namespace by setting delete_all to True
+        3. Delete all vectors from a single namespace by specifying a metadata filter
+           (note that for this option delete all must be set to False)
+
+        Examples:
+            >>> index.delete(ids=['id1', 'id2'], namespace='my_namespace')
+            >>> index.delete(delete_all=True, namespace='my_namespace')
+            >>> index.delete(filter={'key': 'value'}, namespace='my_namespace', async_req=True)
+
+        Args:
+            ids (List[str]): Vector ids to delete [optional]
+            delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional]
+                               Default is False.
+            namespace (str): The namespace to delete vectors from [optional]
+                             If not specified, the default namespace is used.
+            filter (Dict[str, Union[str, float, int, bool, List, dict]]):
+                    If specified, the metadata filter here will be used to select the vectors to delete.
+                    This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True.
+                     See https://www.pinecone.io/docs/metadata-filtering/.. [optional]
+            async_req (bool): If True, the delete operation will be performed asynchronously.
+                              Defaults to False. [optional]
+
+        Returns: DeleteResponse (contains no data) or a PineconeGrpcFuture object if async_req is True.
+        """
+
+        if filter is not None:
+            filter = dict_to_proto_struct(filter)
+
+        args_dict = self._parse_non_empty_args([('ids', ids),
+                                                ('delete_all', delete_all),
+                                                ('namespace', namespace),
+                                                ('filter', filter)])
         timeout = kwargs.pop('timeout', None)
+
+        request = DeleteRequest(**args_dict, **kwargs)
         if async_req:
             future = self._wrap_grpc_call(self.stub.Delete.future, request, timeout=timeout)
             return PineconeGrpcFuture(future)
         else:
             return self._wrap_grpc_call(self.stub.Delete, request, timeout=timeout)
 
-    def fetch(self, *args, **kwargs):
+    def fetch(self,
+              ids: Optional[List[str]],
+              namespace: Optional[str] = None,
+              **kwargs) -> FetchResponse:
+        """
+        The fetch operation looks up and returns vectors, by ID, from a single namespace.
+        The returned vectors include the vector data and/or metadata.
+
+        Examples:
+            >>> index.fetch(ids=['id1', 'id2'], namespace='my_namespace')
+            >>> index.fetch(ids=['id1', 'id2'])
+
+        Args:
+            ids (List[str]): The vector IDs to fetch.
+            namespace (str): The namespace to fetch vectors from.
+                             If not specified, the default namespace is used. [optional]
+
+        Returns: FetchResponse object which contains the list of Vector objects, and namespace name.
+        """
         timeout = kwargs.pop('timeout', None)
-        request = FetchRequest(*args, **kwargs)
+
+        args_dict = self._parse_non_empty_args([('namespace', namespace)])
+
+        request = FetchRequest(ids=ids, **args_dict, **kwargs)
         response = self._wrap_grpc_call(self.stub.Fetch, request, timeout=timeout)
         json_response = json_format.MessageToDict(response)
         return parse_fetch_response(json_response)
 
-    def query(self, vector=[], id='', queries=[], **kwargs):
-        timeout = kwargs.pop('timeout', None)
-
+    def query(self,
+              vector: Optional[List[float]] = None,
+              id: Optional[str] = None,
+              queries: Optional[Union[List[GRPCQueryVector], List[Tuple]]] = None,
+              namespace: Optional[str] = None,
+              top_k: Optional[int] = None,
+              filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None,
+              include_values: Optional[bool] = None,
+              include_metadata: Optional[bool] = None,
+              **kwargs) -> QueryResponse:
+        """
+        The Query operation searches a namespace, using a query vector.
+        It retrieves the ids of the most similar items in a namespace, along with their similarity scores.
+
+        Examples:
+            >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace')
+            >>> index.query(id='id1', top_k=10, namespace='my_namespace')
+            >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'})
+            >>> index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True)
+
+        Args:
+            vector (List[float]): The query vector. This should be the same length as the dimension of the index
+                                  being queried. Each `query()` request can contain only one of the parameters
+                                  `queries`, `id` or `vector`.. [optional]
+            id (str): The unique ID of the vector to be used as a query vector.
+                      Each `query()` request can contain only one of the parameters
+                      `queries`, `vector`, or  `id`.. [optional]
+            queries ([GRPCQueryVector]): DEPRECATED. The query vectors.
+                                     Each `query()` request can contain only one of the parameters
+                                     `queries`, `vector`, or  `id`.. [optional]
+            top_k (int): The number of results to return for each query. Must be an integer greater than 1.
+            namespace (str): The namespace to fetch vectors from.
+                             If not specified, the default namespace is used. [optional]
+            filter (Dict[str, Union[str, float, int, bool, List, dict]]):
+                    The filter to apply. You can use vector metadata to limit your search.
+                    See https://www.pinecone.io/docs/metadata-filtering/.. [optional]
+            include_values (bool): Indicates whether vector values are included in the response.
+                                   If omitted the server will use the default value of False [optional]
+            include_metadata (bool): Indicates whether metadata is included in the response as well as the ids.
+                                     If omitted the server will use the default value of False  [optional]
+
+        Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects,
+                 and namespace name.
+        """
         def _query_transform(item):
             if isinstance(item, GRPCQueryVector):
                 return item
@@ -306,36 +482,99 @@ def _query_transform(item):
                 return GRPCQueryVector(values=item)
             raise ValueError(f"Invalid query vector value passed: cannot interpret type {type(item)}")
 
-        _QUERY_ARGS = ['namespace', 'top_k', 'filter', 'include_values', 'include_metadata']
-        if 'filter' in kwargs:
-            kwargs['filter'] = dict_to_proto_struct(kwargs['filter'])
-        request = QueryRequest(queries=list(map(_query_transform, queries)),
-                               vector=vector,
-                               id=id,
-                               **{k: v for k, v in kwargs.items() if k in _QUERY_ARGS})
+        queries = list(map(_query_transform, queries)) if queries is not None else None
+
+        if filter is not None:
+            filter = dict_to_proto_struct(filter)
+
+        args_dict = self._parse_non_empty_args([('vector', vector),
+                                                ('id', id),
+                                                ('queries', queries),
+                                                ('namespace', namespace),
+                                                ('top_k', top_k),
+                                                ('filter', filter),
+                                                ('include_values', include_values),
+                                                ('include_metadata', include_metadata)])
+
+        request = QueryRequest(**args_dict)
+
+        timeout = kwargs.pop('timeout', None)
         response = self._wrap_grpc_call(self.stub.Query, request, timeout=timeout)
         json_response = json_format.MessageToDict(response)
-        return parse_query_response(json_response, vector or id)
-
-    def update(self, id, async_req=False, **kwargs):
-        _UPDATE_ARGS = ['values', 'set_metadata', 'namespace']
-        if 'set_metadata' in kwargs:
-            kwargs['set_metadata'] = dict_to_proto_struct(kwargs['set_metadata'])
-        request = UpdateRequest(id=id, **{k: v for k, v in kwargs.items() if k in _UPDATE_ARGS})
+        return parse_query_response(json_response, vector is not None or id, _check_type=False)
+
+    def update(self,
+               id: str,
+               async_req: bool = False,
+               values: Optional[List[float]] = None,
+               set_metadata: Optional[Dict[str,
+                                           Union[str, float, int, bool, List[int], List[float], List[str]]]] = None,
+               namespace: Optional[str] = None,
+               **kwargs) -> Union[UpdateResponse, PineconeGrpcFuture]:
+        """
+        The Update operation updates vector in a namespace.
+        If a value is included, it will overwrite the previous value.
+        If a set_metadata is included,
+        the values of the fields specified in it will be added or overwrite the previous value.
+
+        Examples:
+            >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace')
+            >>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace', async_req=True)
+
+        Args:
+            id (str): Vector's unique id.
+            async_req (bool): If True, the update operation will be performed asynchronously.
+                              Defaults to False. [optional]
+            values (List[float]): vector values to set. [optional]
+            set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
+                metadata to set for vector. [optional]
+            namespace (str): Namespace name where to update the vector.. [optional]
+
+        Returns: UpdateResponse (contains no data) or a PineconeGrpcFuture object if async_req is True.
+        """
+        if set_metadata is not None:
+            set_metadata = dict_to_proto_struct(set_metadata)
         timeout = kwargs.pop('timeout', None)
+
+        args_dict = self._parse_non_empty_args([('values', values),
+                                                ('set_metadata', set_metadata),
+                                                ('namespace', namespace)])
+
+        request = UpdateRequest(id=id, **args_dict)
         if async_req:
             future = self._wrap_grpc_call(self.stub.Update.future, request, timeout=timeout)
             return PineconeGrpcFuture(future)
         else:
             return self._wrap_grpc_call(self.stub.Update, request, timeout=timeout)
 
-    def describe_index_stats(self, **kwargs):
-        _filter = dict_to_proto_struct(kwargs.pop('filter', None))
-        filter_param = {}
-        if _filter:
-            filter_param['filter'] = _filter
+    def describe_index_stats(self,
+                             filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None,
+                             **kwargs) -> DescribeIndexStatsResponse:
+        """
+        The DescribeIndexStats operation returns statistics about the index's contents.
+        For example: The vector count per namespace and the number of dimensions.
+
+        Examples:
+            >>> index.describe_index_stats()
+            >>> index.describe_index_stats(filter={'key': 'value'})
+
+        Args:
+            filter (Dict[str, Union[str, float, int, bool, List, dict]]):
+            If this parameter is present, the operation only returns statistics for vectors that satisfy the filter.
+            See https://www.pinecone.io/docs/metadata-filtering/.. [optional]
+
+        Returns: DescribeIndexStatsResponse object which contains stats about the index.
+        """
+        if filter is not None:
+            filter = dict_to_proto_struct(filter)
+        args_dict = self._parse_non_empty_args([('filter', filter)])
         timeout = kwargs.pop('timeout', None)
-        request = DescribeIndexStatsRequest(**filter_param)
+
+        request = DescribeIndexStatsRequest(**args_dict)
         response = self._wrap_grpc_call(self.stub.DescribeIndexStats, request, timeout=timeout)
         json_response = json_format.MessageToDict(response)
         return parse_stats_response(json_response)
+
+    @staticmethod
+    def _parse_non_empty_args(args: List[Tuple[str, Any]]) -> Dict[str, Any]:
+        return {arg_name: val for arg_name, val in args if val is not None}
diff --git a/pinecone/core/utils/constants.py b/pinecone/core/utils/constants.py
index e7ed81b5..d03f1a71 100644
--- a/pinecone/core/utils/constants.py
+++ b/pinecone/core/utils/constants.py
@@ -33,3 +33,7 @@ class NodeType(str, enum.Enum):
 PACKAGE_ENVIRONMENT = get_environment() or "development"
 CLIENT_VERSION = get_version()
 CLIENT_ID = f'python-client-{CLIENT_VERSION}'
+
+TCP_KEEPINTVL = 60   # Sec
+TCP_KEEPIDLE = 300   # Sec
+TCP_KEEPCNT = 4
diff --git a/pinecone/index.py b/pinecone/index.py
index 945130e2..b14b39e7 100644
--- a/pinecone/index.py
+++ b/pinecone/index.py
@@ -1,11 +1,12 @@
 #
 # Copyright (c) 2020-2021 Pinecone Systems Inc. All right reserved.
 #
-
+from tqdm import tqdm
 from collections.abc import Iterable
+from typing import Union, List, Tuple, Optional, Dict, Any
 
 from pinecone import Config
-from pinecone.core.client import ApiClient, Configuration
+from pinecone.core.client import ApiClient
 from .core.client.models import FetchResponse, ProtobufAny, QueryRequest, QueryResponse, QueryVector, RpcStatus, \
     ScoredVector, SingleQueryResults, DescribeIndexStatsResponse, UpsertRequest, UpsertResponse, UpdateRequest, \
     Vector, DeleteRequest, UpdateRequest, DescribeIndexStatsRequest
@@ -38,6 +39,10 @@ def parse_query_response(response: QueryResponse, unary_query: bool):
 
 class Index(ApiClient):
 
+    """
+    A client for interacting with a Pinecone index via REST API.
+    For improved performance, use the Pinecone GRPC index client.
+    """
     def __init__(self, index_name: str, pool_threads=1):
         openapi_client_config = copy.deepcopy(Config.OPENAPI_CONFIG)
         openapi_client_config.api_key = openapi_client_config.api_key or {}
@@ -56,10 +61,83 @@ def __init__(self, index_name: str, pool_threads=1):
         self._vector_api = VectorOperationsApi(self)
 
     @validate_and_convert_errors
-    def upsert(self, vectors, **kwargs):
+    def upsert(self,
+               vectors: Union[List[Vector], List[Tuple]],
+               namespace: Optional[str] = None,
+               batch_size: Optional[int] = None,
+               show_progress: bool = True,
+               **kwargs) -> UpsertResponse:
+        """
+        The upsert operation writes vectors into a namespace.
+        If a new value is upserted for an existing vector id, it will overwrite the previous value.
+
+        API reference: https://docs.pinecone.io/reference/upsert
+
+        To upsert in parallel follow: https://docs.pinecone.io/docs/insert-data#sending-upserts-in-parallel
+
+        Examples:
+            >>> index.upsert([('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0])])
+            >>> index.upsert([Vector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}),
+            >>>              Vector(id='id2', values=[1.0, 2.0, 3.0])])
+
+        Args:
+            vectors (Union[List[Vector], List[Tuple]]): A list of vectors to upsert.
+
+                     A vector can be represented by a 1) Vector object or a 2) tuple.
+                     1) if a tuple is used, it must be of the form (id, values, metadata) or (id, values).
+                        where id is a string, vector is a list of floats, and metadata is a dict.
+                        Examples: ('id1', [1.0, 2.0, 3.0], {'key': 'value'}), ('id2', [1.0, 2.0, 3.0])
+
+                    2) if a Vector object is used, a Vector object must be of the form Vector(id, values, metadata),
+                        where metadata is an optional argument of the type
+                        Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]].
+                       Examples: Vector(id='id1', values=[1.0, 2.0, 3.0], metadata={'key': 'value'}),
+                                 Vector(id='id2', values=[1.0, 2.0, 3.0])
+
+                    Note: the dimension of each vector must match the dimension of the index.
+
+            namespace (str): The namespace to write to. If not specified, the default namespace is used. [optional]
+            batch_size (int): The number of vectors to upsert in each batch.
+                               If not specified, all vectors will be upserted in a single batch. [optional]
+            show_progress (bool): Whether to show a progress bar using tqdm.
+                                  Applied only if batch_size is provided. Default is True.
+        Keyword Args:
+            Supports OpenAPI client keyword arguments. See pinecone.core.client.models.UpsertRequest for more details.
+
+        Returns: UpsertResponse, includes the number of vectors upserted.
+        """
         _check_type = kwargs.pop('_check_type', False)
 
-        def _vector_transform(item):
+        if kwargs.get('async_req', False) and batch_size is not None:
+            raise ValueError('async_req is not supported when batch_size is provided.'
+                             'To upsert in parallel, please follow: '
+                             'https://docs.pinecone.io/docs/insert-data#sending-upserts-in-parallel')
+
+        if batch_size is None:
+            return self._upsert_batch(vectors, namespace, _check_type, **kwargs)
+
+        if not isinstance(batch_size, int) or batch_size <= 0:
+            raise ValueError('batch_size must be a positive integer')
+
+        pbar = tqdm(total=len(vectors), disable=not show_progress, desc='Upserted vectors')
+        total_upserted = 0
+        for i in range(0, len(vectors), batch_size):
+            batch_result = self._upsert_batch(vectors[i:i + batch_size], namespace, _check_type, **kwargs)
+            pbar.update(batch_result.upserted_count)
+            # we can't use here pbar.n for the case show_progress=False
+            total_upserted += batch_result.upserted_count
+
+        return UpsertResponse(upserted_count=total_upserted)
+
+    def _upsert_batch(self,
+                      vectors: List[Vector],
+                      namespace: Optional[str],
+                      _check_type: bool,
+                      **kwargs) -> UpsertResponse:
+
+        args_dict = self._parse_non_empty_args([('namespace', namespace)])
+
+        def _vector_transform(item: Union[Vector, Tuple]):
             if isinstance(item, Vector):
                 return item
             if isinstance(item, tuple):
@@ -70,6 +148,7 @@ def _vector_transform(item):
         return self._vector_api.upsert(
             UpsertRequest(
                 vectors=list(map(_vector_transform, vectors)),
+                **args_dict,
                 _check_type=_check_type,
                 **{k: v for k, v in kwargs.items() if k not in _OPENAPI_ENDPOINT_PARAMS}
             ),
@@ -77,24 +156,163 @@ def _vector_transform(item):
         )
 
     @validate_and_convert_errors
-    def delete(self, *args, **kwargs):
+    def delete(self,
+               ids: Optional[List[str]] = None,
+               delete_all: Optional[bool] = None,
+               namespace: Optional[str] = None,
+               filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None,
+               **kwargs) -> Dict[str, Any]:
+        """
+        The Delete operation deletes vectors from the index, from a single namespace.
+        No error raised if the vector id does not exist.
+        Note: for any delete call, if namespace is not specified, the default namespace is used.
+
+        Delete can occur in the following mutual exclusive ways:
+        1. Delete by ids from a single namespace
+        2. Delete all vectors from a single namespace by setting delete_all to True
+        3. Delete all vectors from a single namespace by specifying a metadata filter
+           (note that for this option delete all must be set to False)
+
+        API reference: https://docs.pinecone.io/reference/delete_post
+
+        Examples:
+            >>> index.delete(ids=['id1', 'id2'], namespace='my_namespace')
+            >>> index.delete(delete_all=True, namespace='my_namespace')
+            >>> index.delete(filter={'key': 'value'}, namespace='my_namespace')
+
+        Args:
+            ids (List[str]): Vector ids to delete [optional]
+            delete_all (bool): This indicates that all vectors in the index namespace should be deleted.. [optional]
+                               Default is False.
+            namespace (str): The namespace to delete vectors from [optional]
+                             If not specified, the default namespace is used.
+            filter (Dict[str, Union[str, float, int, bool, List, dict]]):
+                    If specified, the metadata filter here will be used to select the vectors to delete.
+                    This is mutually exclusive with specifying ids to delete in the ids param or using delete_all=True.
+                     See https://www.pinecone.io/docs/metadata-filtering/.. [optional]
+
+      Keyword Args:
+        Supports OpenAPI client keyword arguments. See pinecone.core.client.models.DeleteRequest for more details.
+
+
+        Returns: An empty dictionary if the delete operation was successful.
+        """
         _check_type = kwargs.pop('_check_type', False)
+        args_dict = self._parse_non_empty_args([('ids', ids),
+                                                ('delete_all', delete_all),
+                                                ('namespace', namespace),
+                                                ('filter', filter)])
+
         return self._vector_api.delete(
             DeleteRequest(
-                *args,
-                **{k: v for k, v in kwargs.items() if k not in _OPENAPI_ENDPOINT_PARAMS},
+                **args_dict,
+                **{k: v for k, v in kwargs.items() if k not in _OPENAPI_ENDPOINT_PARAMS and v is not None},
                 _check_type=_check_type
             ),
             **{k: v for k, v in kwargs.items() if k in _OPENAPI_ENDPOINT_PARAMS}
         )
 
     @validate_and_convert_errors
-    def fetch(self, *args, **kwargs):
-        return self._vector_api.fetch(*args, **kwargs)
+    def fetch(self,
+              ids: List[str],
+              namespace: Optional[str] = None,
+              **kwargs) -> FetchResponse:
+        """
+        The fetch operation looks up and returns vectors, by ID, from a single namespace.
+        The returned vectors include the vector data and/or metadata.
+
+        API reference: https://docs.pinecone.io/reference/fetch
+
+        Examples:
+            >>> index.fetch(ids=['id1', 'id2'], namespace='my_namespace')
+            >>> index.fetch(ids=['id1', 'id2'])
+
+        Args:
+            ids (List[str]): The vector IDs to fetch.
+            namespace (str): The namespace to fetch vectors from.
+                             If not specified, the default namespace is used. [optional]
+        Keyword Args:
+            Supports OpenAPI client keyword arguments. See pinecone.core.client.models.FetchResponse for more details.
+
+
+        Returns: FetchResponse object which contains the list of Vector objects, and namespace name.
+        """
+        args_dict = self._parse_non_empty_args([('namespace', namespace)])
+        return self._vector_api.fetch(ids=ids, **args_dict, **kwargs)
 
     @validate_and_convert_errors
-    def query(self, vector=[], id='', queries=[], **kwargs):
+    def query(self,
+              vector: Optional[List[float]] = None,
+              id: Optional[str] = None,
+              queries: Optional[Union[List[QueryVector], List[Tuple]]] = None,
+              top_k: Optional[int] = None,
+              namespace: Optional[str] = None,
+              filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None,
+              include_values: Optional[bool] = None,
+              include_metadata: Optional[bool] = None,
+              **kwargs) -> QueryResponse:
+        """
+        The Query operation searches a namespace, using a query vector.
+        It retrieves the ids of the most similar items in a namespace, along with their similarity scores.
+
+        API reference: https://docs.pinecone.io/reference/query
+
+        Examples:
+            >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace')
+            >>> index.query(id='id1', top_k=10, namespace='my_namespace')
+            >>> index.query(vector=[1, 2, 3], top_k=10, namespace='my_namespace', filter={'key': 'value'})
+            >>> index.query(id='id1', top_k=10, namespace='my_namespace', include_metadata=True, include_values=True)
+
+        Args:
+            vector (List[float]): The query vector. This should be the same length as the dimension of the index
+                                  being queried. Each `query()` request can contain only one of the parameters
+                                  `queries`, `id` or `vector`.. [optional]
+            id (str): The unique ID of the vector to be used as a query vector.
+                      Each `query()` request can contain only one of the parameters
+                      `queries`, `vector`, or  `id`.. [optional]
+            queries ([QueryVector]): DEPRECATED. The query vectors.
+                                     Each `query()` request can contain only one of the parameters
+                                     `queries`, `vector`, or  `id`.. [optional]
+            top_k (int): The number of results to return for each query. Must be an integer greater than 1.
+            namespace (str): The namespace to fetch vectors from.
+                             If not specified, the default namespace is used. [optional]
+            filter (Dict[str, Union[str, float, int, bool, List, dict]):
+                    The filter to apply. You can use vector metadata to limit your search.
+                    See https://www.pinecone.io/docs/metadata-filtering/.. [optional]
+            include_values (bool): Indicates whether vector values are included in the response.
+                                   If omitted the server will use the default value of False [optional]
+            include_metadata (bool): Indicates whether metadata is included in the response as well as the ids.
+                                     If omitted the server will use the default value of False  [optional]
+
+        Keyword Args:
+            Supports OpenAPI client keyword arguments. See pinecone.core.client.models.QueryRequest for more details.
+
+        Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects,
+                 and namespace name.
+        """
+        def _query_transform(item):
+            if isinstance(item, QueryVector):
+                return item
+            if isinstance(item, tuple):
+                values, filter = fix_tuple_length(item, 2)
+                if filter is None:
+                    return QueryVector(values=values, _check_type=_check_type)
+                else:
+                    return QueryVector(values=values, filter=filter, _check_type=_check_type)
+            if isinstance(item, Iterable):
+                return QueryVector(values=item, _check_type=_check_type)
+            raise ValueError(f"Invalid query vector value passed: cannot interpret type {type(item)}")
+
         _check_type = kwargs.pop('_check_type', False)
+        queries = list(map(_query_transform, queries)) if queries is not None else None
+        args_dict = self._parse_non_empty_args([('vector', vector),
+                                                ('id', id),
+                                                ('queries', queries),
+                                                ('top_k', top_k),
+                                                ('namespace', namespace),
+                                                ('filter', filter),
+                                                ('include_values', include_values),
+                                                ('include_metadata', include_metadata)])
 
         def _query_transform(item):
             if isinstance(item, QueryVector):
@@ -108,34 +326,91 @@ def _query_transform(item):
 
         response = self._vector_api.query(
             QueryRequest(
-                queries=list(map(_query_transform, queries)),
-                vector=vector,
-                id=id,
+                **args_dict,
                 _check_type=_check_type,
                 **{k: v for k, v in kwargs.items() if k not in _OPENAPI_ENDPOINT_PARAMS}
             ),
             **{k: v for k, v in kwargs.items() if k in _OPENAPI_ENDPOINT_PARAMS}
         )
-        return parse_query_response(response, vector or id)
+        return parse_query_response(response, vector is not None or id)
 
     @validate_and_convert_errors
-    def update(self, id, **kwargs):
+    def update(self,
+               id: str,
+               values: Optional[List[float]] = None,
+               set_metadata: Optional[Dict[str,
+                                           Union[str, float, int, bool, List[int], List[float], List[str]]]] = None,
+               namespace: Optional[str] = None,
+               **kwargs) -> Dict[str, Any]:
+        """
+        The Update operation updates vector in a namespace.
+        If a value is included, it will overwrite the previous value.
+        If a set_metadata is included,
+        the values of the fields specified in it will be added or overwrite the previous value.
+
+        API reference: https://docs.pinecone.io/reference/update
+
+        Examples:
+            >>> index.update(id='id1', values=[1, 2, 3], namespace='my_namespace')
+            >>> index.update(id='id1', set_metadata={'key': 'value'}, namespace='my_namespace')
+
+        Args:
+            id (str): Vector's unique id.
+            values (List[float]): vector values to set. [optional]
+            set_metadata (Dict[str, Union[str, float, int, bool, List[int], List[float], List[str]]]]):
+                metadata to set for vector. [optional]
+            namespace (str): Namespace name where to update the vector.. [optional]
+
+        Keyword Args:
+            Supports OpenAPI client keyword arguments. See pinecone.core.client.models.UpdateRequest for more details.
+
+        Returns: An empty dictionary if the update was successful.
+        """
         _check_type = kwargs.pop('_check_type', False)
+        args_dict = self._parse_non_empty_args([('values', values),
+                                                ('set_metadata', set_metadata),
+                                                ('namespace', namespace)])
         return self._vector_api.update(UpdateRequest(
                 id=id,
+                **args_dict,
                 _check_type=_check_type,
                 **{k: v for k, v in kwargs.items() if k not in _OPENAPI_ENDPOINT_PARAMS}
             ),
             **{k: v for k, v in kwargs.items() if k in _OPENAPI_ENDPOINT_PARAMS})
 
     @validate_and_convert_errors
-    def describe_index_stats(self, *args, **kwargs):
+    def describe_index_stats(self,
+                             filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None,
+                             **kwargs) -> DescribeIndexStatsResponse:
+        """
+        The DescribeIndexStats operation returns statistics about the index's contents.
+        For example: The vector count per namespace and the number of dimensions.
+
+        API reference: https://docs.pinecone.io/reference/describe_index_stats_post
+
+        Examples:
+            >>> index.describe_index_stats()
+            >>> index.describe_index_stats(filter={'key': 'value'})
+
+        Args:
+            filter (Dict[str, Union[str, float, int, bool, List, dict]]):
+            If this parameter is present, the operation only returns statistics for vectors that satisfy the filter.
+            See https://www.pinecone.io/docs/metadata-filtering/.. [optional]
+
+        Returns: DescribeIndexStatsResponse object which contains stats about the index.
+        """
         _check_type = kwargs.pop('_check_type', False)
+        args_dict = self._parse_non_empty_args([('filter', filter)])
+
         return self._vector_api.describe_index_stats(
             DescribeIndexStatsRequest(
-                *args,
+                **args_dict,
                 **{k: v for k, v in kwargs.items() if k not in _OPENAPI_ENDPOINT_PARAMS},
                 _check_type=_check_type
             ),
             **{k: v for k, v in kwargs.items() if k in _OPENAPI_ENDPOINT_PARAMS}
         )
+
+    @staticmethod
+    def _parse_non_empty_args(args: List[Tuple[str, Any]]) -> Dict[str, Any]:
+        return {arg_name: val for arg_name, val in args if val is not None}
diff --git a/requirements.txt b/requirements.txt
index 05b55a2a..82331727 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,4 +5,5 @@ typing-extensions>=3.7.4
 dnspython>=2.0.0
 # openapi generated client:
 python_dateutil >= 2.5.3
-urllib3 >= 1.21.1
\ No newline at end of file
+urllib3 >= 1.21.1
+tqdm >= 4.64.1
\ No newline at end of file
diff --git a/tests/unit/test_grpc_index.py b/tests/unit/test_grpc_index.py
new file mode 100644
index 00000000..3d192167
--- /dev/null
+++ b/tests/unit/test_grpc_index.py
@@ -0,0 +1,378 @@
+import pytest
+
+import pinecone
+from core.utils import dict_to_proto_struct
+from pinecone import DescribeIndexStatsRequest
+from pinecone.core.grpc.protos.vector_service_pb2 import Vector, DescribeIndexStatsRequest, UpdateRequest, \
+    UpsertRequest, FetchRequest, QueryRequest, DeleteRequest, QueryVector, UpsertResponse
+
+
+class TestGrpcIndex:
+
+    def setup_method(self):
+        self.vector_dim = 8
+        self.vals1 = [0.1] * self.vector_dim
+        self.vals2 = [0.2] * self.vector_dim
+        self.md1 = {'genre': 'action', 'year': 2021}
+        self.md2 = {'genre': 'documentary', 'year': 2020}
+        self.filter1 = {'genre': {'$in': ['action']}}
+        self.filter2 = {'year': {'$eq': 2020}}
+
+        pinecone.init(api_key='example-key')
+        self.index = pinecone.GRPCIndex('example-name')
+
+    # region: upsert tests
+
+    def test_upsert_tuplesOfIdVec_UpserWithoutMD(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
+        self.index.upsert([('vec1', self.vals1), ('vec2', self.vals2)], namespace='ns')
+        self.index._wrap_grpc_call.assert_called_once_with(
+            self.index.stub.Upsert,
+            UpsertRequest(
+                vectors=[
+                    Vector(id='vec1', values=self.vals1, metadata={}),
+                    Vector(id='vec2', values=self.vals2, metadata={})],
+                namespace='ns'),
+            timeout=None
+        )
+
+    def test_upsert_tuplesOfIdVecMD_UpsertVectorsWithMD(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
+        self.index.upsert([('vec1', self.vals1, self.md1), ('vec2', self.vals2, self.md2)], namespace='ns')
+        self.index._wrap_grpc_call.assert_called_once_with(
+            self.index.stub.Upsert,
+            UpsertRequest(
+                vectors=[
+                    Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)),
+                    Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))],
+                namespace='ns'),
+            timeout=None)
+
+    def test_upsert_vectors_upsertInputVectors(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
+        self.index.upsert([Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)),
+                           Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))],
+                          namespace='ns')
+        self.index._wrap_grpc_call.assert_called_once_with(
+            self.index.stub.Upsert,
+            UpsertRequest(
+                vectors=[
+                    Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)),
+                    Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))],
+                namespace='ns'),
+            timeout=None)
+
+    def test_upsert_async_upsertInputVectorsAsync(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
+        self.index.upsert([Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)),
+                           Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))],
+                          namespace='ns',
+                          async_req=True)
+        self.index._wrap_grpc_call.assert_called_once_with(
+            self.index.stub.Upsert.future,
+            UpsertRequest(
+                vectors=[
+                    Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)),
+                    Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))],
+                namespace='ns'),
+            timeout=None)
+
+    def test_upsert_vectorListIsMultiplyOfBatchSize_vectorsUpsertedInBatches(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True,
+                            side_effect=lambda stub, upsert_request, timeout: UpsertResponse(
+                                upserted_count=len(upsert_request.vectors)))
+
+        result = self.index.upsert([Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)),
+                                    Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))],
+                                   namespace='ns',
+                                   batch_size=1,
+                                   show_progress=False)
+        self.index._wrap_grpc_call.assert_any_call(
+            self.index.stub.Upsert,
+            UpsertRequest(
+                vectors=[
+                    Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1))],
+                namespace='ns'),
+            timeout=None)
+
+        self.index._wrap_grpc_call.assert_any_call(
+            self.index.stub.Upsert,
+            UpsertRequest(
+                vectors=[Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))],
+                namespace='ns'),
+            timeout=None)
+
+        assert result.upserted_count == 2
+
+    def test_upsert_vectorListNotMultiplyOfBatchSize_vectorsUpsertedInBatches(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True,
+                            side_effect=lambda stub, upsert_request, timeout: UpsertResponse(
+                                upserted_count=len(upsert_request.vectors)))
+
+        result = self.index.upsert([Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)),
+                                    Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2)),
+                                    Vector(id='vec3', values=self.vals1, metadata=dict_to_proto_struct(self.md1))],
+                                   namespace='ns',
+                                   batch_size=2)
+        self.index._wrap_grpc_call.assert_any_call(
+            self.index.stub.Upsert,
+            UpsertRequest(
+                vectors=[
+                    Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)),
+                    Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))],
+                namespace='ns'),
+            timeout=None)
+
+        self.index._wrap_grpc_call.assert_any_call(
+            self.index.stub.Upsert,
+            UpsertRequest(
+                vectors=[Vector(id='vec3', values=self.vals1, metadata=dict_to_proto_struct(self.md1))],
+                namespace='ns'),
+            timeout=None)
+
+        assert result.upserted_count == 3
+
+    def test_upsert_vectorListSmallerThanBatchSize_vectorsUpsertedInBatches(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True,
+                            side_effect=lambda stub, upsert_request, timeout: UpsertResponse(
+                                upserted_count=len(upsert_request.vectors)))
+
+        result = self.index.upsert([Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)),
+                                    Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))],
+                                   namespace='ns',
+                                   batch_size=5)
+
+        self.index._wrap_grpc_call.assert_called_once_with(
+            self.index.stub.Upsert,
+            UpsertRequest(
+                vectors=[
+                    Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)),
+                    Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))],
+                namespace='ns'),
+            timeout=None)
+
+        assert result.upserted_count == 2
+
+    def test_upsert_tuplesList_vectorsUpsertedInBatches(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True,
+                            side_effect=lambda stub, upsert_request, timeout: UpsertResponse(
+                                upserted_count=len(upsert_request.vectors)))
+
+        result = self.index.upsert([('vec1', self.vals1, self.md1),
+                                    ('vec2', self.vals2, self.md2),
+                                    ('vec3', self.vals1, self.md1)],
+                                   namespace='ns',
+                                   batch_size=2)
+        self.index._wrap_grpc_call.assert_any_call(
+            self.index.stub.Upsert,
+            UpsertRequest(
+                vectors=[
+                    Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1)),
+                    Vector(id='vec2', values=self.vals2, metadata=dict_to_proto_struct(self.md2))],
+                namespace='ns'),
+            timeout=None)
+
+        self.index._wrap_grpc_call.assert_any_call(
+            self.index.stub.Upsert,
+            UpsertRequest(
+                vectors=[Vector(id='vec3', values=self.vals1, metadata=dict_to_proto_struct(self.md1))],
+                namespace='ns'),
+            timeout=None)
+
+        assert result.upserted_count == 3
+
+    def test_upsert_batchSizeIsNotPositive_errorIsRaised(self):
+        with pytest.raises(ValueError, match='batch_size must be a positive integer'):
+            self.index.upsert([Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1))],
+                              namespace='ns',
+                              batch_size=0)
+
+        with pytest.raises(ValueError, match='batch_size must be a positive integer'):
+            self.index.upsert([Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1))],
+                              namespace='ns',
+                              batch_size=-1)
+
+    def test_upsert_useBatchSizeAndAsyncReq_valueErrorRaised(self):
+        with pytest.raises(ValueError, match='async_req is not supported when batch_size is provided.'):
+            self.index.upsert([Vector(id='vec1', values=self.vals1, metadata=dict_to_proto_struct(self.md1))],
+                              namespace='ns',
+                              batch_size=2,
+                              async_req=True)
+
+    # endregion
+
+    # region: query tests
+
+    def test_query_byVectorNoFilter_queryVectorNoFilter(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
+        self.index.query(top_k=10, vector=self.vals1)
+        self.index._wrap_grpc_call.assert_called_once_with(
+            self.index.stub.Query,
+            QueryRequest(top_k=10, vector=self.vals1),
+            timeout=None,
+        )
+
+    def test_query_byVectorWithFilter_queryVectorWithFilter(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
+        self.index.query(top_k=10, vector=self.vals1, filter=self.filter1, namespace='ns', timeout=10)
+        self.index._wrap_grpc_call.assert_called_once_with(
+            self.index.stub.Query,
+            QueryRequest(top_k=10, vector=self.vals1, filter=dict_to_proto_struct(self.filter1), namespace='ns'),
+            timeout=10,
+        )
+
+    def test_query_byTuplesNoFilter_queryVectorsNoFilter(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
+        self.index.query(top_k=10, queries=[
+            (self.vals1,),
+            (self.vals2,)
+        ])
+        self.index._wrap_grpc_call.assert_called_once_with(
+            self.index.stub.Query,
+            QueryRequest(queries=[
+                QueryVector(values=self.vals1, filter={}),
+                QueryVector(values=self.vals2, filter={})
+            ], top_k=10),
+            timeout=None,
+        )
+
+    def test_query_byTuplesWithFilter_queryVectorsWithFilter(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
+        self.index.query(top_k=10, queries=[
+            (self.vals1, self.filter1),
+            (self.vals2, self.filter2)
+        ])
+        self.index._wrap_grpc_call.assert_called_once_with(
+            self.index.stub.Query,
+            QueryRequest(queries=[
+                QueryVector(values=self.vals1, filter=dict_to_proto_struct(self.filter1)),
+                QueryVector(values=self.vals2, filter=dict_to_proto_struct(self.filter2))
+            ], top_k=10),
+            timeout=None,
+        )
+
+    def test_query_byVecId_queryByVecId(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
+        self.index.query(top_k=10, id='vec1', include_metadata=True, include_values=False)
+        self.index._wrap_grpc_call.assert_called_once_with(
+            self.index.stub.Query,
+            QueryRequest(top_k=10, id='vec1', include_metadata=True, include_values=False),
+            timeout=None,
+        )
+
+    # endregion
+
+    # region: delete tests
+
+    def test_delete_byIds_deleteByIds(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
+        self.index.delete(ids=['vec1', 'vec2'])
+        self.index._wrap_grpc_call.assert_called_once_with(
+            self.index.stub.Delete,
+            DeleteRequest(ids=['vec1', 'vec2']),
+            timeout=None,
+        )
+
+    def test_delete_byIdsAsync_deleteByIdsAsync(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
+        self.index.delete(ids=['vec1', 'vec2'], async_req=True)
+        self.index._wrap_grpc_call.assert_called_once_with(
+            self.index.stub.Delete.future,
+            DeleteRequest(ids=['vec1', 'vec2']),
+            timeout=None,
+        )
+
+    def test_delete_deleteAllByFilter_deleteAllByFilter(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
+        self.index.delete(delete_all=True, filter=self.filter1, namespace='ns', timeout=30)
+        self.index._wrap_grpc_call.assert_called_once_with(
+            self.index.stub.Delete,
+            DeleteRequest(delete_all=True, filter=dict_to_proto_struct(self.filter1), namespace='ns'),
+            timeout=30,
+        )
+
+    def test_delete_deleteAllNoFilter_deleteNoFilter(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
+        self.index.delete(delete_all=True)
+        self.index._wrap_grpc_call.assert_called_once_with(
+            self.index.stub.Delete,
+            DeleteRequest(delete_all=True),
+            timeout=None,
+        )
+
+    # endregion
+
+    # region: fetch tests
+
+    def test_fetch_byIds_fetchByIds(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
+        self.index.fetch(['vec1', 'vec2'])
+        self.index._wrap_grpc_call.assert_called_once_with(
+            self.index.stub.Fetch,
+            FetchRequest(ids=['vec1', 'vec2']),
+            timeout=None,
+        )
+
+    def test_fetch_byIdsAndNS_fetchByIdsAndNS(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
+        self.index.fetch(['vec1', 'vec2'], namespace='ns', timeout=30)
+        self.index._wrap_grpc_call.assert_called_once_with(
+            self.index.stub.Fetch,
+            FetchRequest(ids=['vec1', 'vec2'], namespace='ns'),
+            timeout=30,
+        )
+
+    # endregion
+
+    # region: update tests
+
+    def test_update_byIdAnValues_updateByIdAndValues(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
+        self.index.update(id='vec1', values=self.vals1, namespace='ns', timeout=30)
+        self.index._wrap_grpc_call.assert_called_once_with(
+            self.index.stub.Update,
+            UpdateRequest(id='vec1', values=self.vals1, namespace='ns'),
+            timeout=30,
+        )
+
+    def test_update_byIdAnValuesAsync_updateByIdAndValuesAsync(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
+        self.index.update(id='vec1', values=self.vals1, namespace='ns', timeout=30, async_req=True)
+        self.index._wrap_grpc_call.assert_called_once_with(
+            self.index.stub.Update.future,
+            UpdateRequest(id='vec1', values=self.vals1, namespace='ns'),
+            timeout=30,
+        )
+
+    def test_update_byIdAnValuesAndMetadata_updateByIdAndValuesAndMetadata(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
+        self.index.update('vec1', values=self.vals1, set_metadata=self.md1)
+        self.index._wrap_grpc_call.assert_called_once_with(
+            self.index.stub.Update,
+            UpdateRequest(id='vec1', values=self.vals1, set_metadata=dict_to_proto_struct(self.md1)),
+            timeout=None,
+        )
+
+    # endregion
+
+    # region: describe index tests
+
+    def test_describeIndexStats_callWithoutFilter_CalledWithoutFilter(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
+        self.index.describe_index_stats()
+        self.index._wrap_grpc_call.assert_called_once_with(
+            self.index.stub.DescribeIndexStats,
+            DescribeIndexStatsRequest(),
+            timeout=None,
+        )
+
+    def test_describeIndexStats_callWithFilter_CalledWithFilter(self, mocker):
+        mocker.patch.object(self.index, '_wrap_grpc_call', autospec=True)
+        self.index.describe_index_stats(filter=self.filter1)
+        self.index._wrap_grpc_call.assert_called_once_with(
+            self.index.stub.DescribeIndexStats,
+            DescribeIndexStatsRequest(filter=dict_to_proto_struct(self.filter1)),
+            timeout=None,
+        )
+
+    # endregion
diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py
index 142f1ed7..6c89467a 100644
--- a/tests/unit/test_index.py
+++ b/tests/unit/test_index.py
@@ -1,64 +1,365 @@
-vector_dim = 8
-vals1 = [0.1] * vector_dim
-vals2 = [0.2] * vector_dim
-md1 = {'genre': 'action', 'year': 2021}
-md2 = {'genre': 'documentary', 'year': 2020}
-filter1 = {'genre': {'$in': ['action']}}
-filter2 = {'year': {'$eq': 2020}}
-
-
-def test_upsert_request_tuples_id_data(mocker):
-    import pinecone
-    pinecone.init(api_key='example-key')
-    index = pinecone.Index('example-name')
-    mocker.patch.object(index._vector_api, 'upsert', autospec=True)
-    index.upsert([('vec1', vals1), ('vec2', vals2)])
-    index._vector_api.upsert.assert_called_once_with(
-        pinecone.UpsertRequest(vectors=[
-            pinecone.Vector(id='vec1', values=vals1, metadata={}),
-            pinecone.Vector(id='vec2', values=vals2, metadata={})
-        ])
-    )
-
-
-def test_upsert_request_tuples_id_data_metadata(mocker):
-    import pinecone
-    pinecone.init(api_key='example-key')
-    index = pinecone.Index('example-name')
-    mocker.patch.object(index._vector_api, 'upsert', autospec=True)
-    index.upsert([('vec1', vals1, md1),
-                  ('vec2', vals2, md2)])
-    index._vector_api.upsert.assert_called_once_with(
-        pinecone.UpsertRequest(vectors=[
-            pinecone.Vector(id='vec1', values=vals1, metadata=md1),
-            pinecone.Vector(id='vec2', values=vals2, metadata=md2)
+import pytest
+
+from pinecone.core.client.api_client import Endpoint
+
+import pinecone
+from pinecone import DescribeIndexStatsRequest, ScoredVector, QueryResponse, UpsertResponse
+
+
+class TestRestIndex:
+
+    def setup_method(self):
+        self.vector_dim = 8
+        self.vals1 = [0.1] * self.vector_dim
+        self.vals2 = [0.2] * self.vector_dim
+        self.md1 = {'genre': 'action', 'year': 2021}
+        self.md2 = {'genre': 'documentary', 'year': 2020}
+        self.filter1 = {'genre': {'$in': ['action']}}
+        self.filter2 = {'year': {'$eq': 2020}}
+
+        pinecone.init(api_key='example-key')
+        self.index = pinecone.Index('example-name')
+
+    # region: upsert tests
+
+    def test_upsert_tuplesOfIdVec_UpserWithoutMD(self, mocker):
+        mocker.patch.object(self.index._vector_api, 'upsert', autospec=True)
+        self.index.upsert([('vec1', self.vals1), ('vec2', self.vals2)], namespace='ns')
+        self.index._vector_api.upsert.assert_called_once_with(
+            pinecone.UpsertRequest(vectors=[
+                pinecone.Vector(id='vec1', values=self.vals1, metadata={}),
+                pinecone.Vector(id='vec2', values=self.vals2, metadata={})
+            ], namespace='ns')
+        )
+
+    def test_upsert_tuplesOfIdVecMD_UpsertVectorsWithMD(self, mocker):
+        mocker.patch.object(self.index._vector_api, 'upsert', autospec=True)
+        self.index.upsert([('vec1', self.vals1, self.md1),
+                           ('vec2', self.vals2, self.md2)])
+        self.index._vector_api.upsert.assert_called_once_with(
+            pinecone.UpsertRequest(vectors=[
+                pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1),
+                pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2)
+            ])
+        )
+
+    def test_upsert_vectors_upsertInputVectors(self, mocker):
+        mocker.patch.object(self.index._vector_api, 'upsert', autospec=True)
+        self.index.upsert(vectors=[
+                pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1),
+                pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2)],
+            namespace='ns')
+        self.index._vector_api.upsert.assert_called_once_with(
+            pinecone.UpsertRequest(vectors=[
+                pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1),
+                pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2)
+            ], namespace='ns')
+        )
+
+    def test_upsert_parallelUpsert_callUpsertParallel(self, mocker):
+        mocker.patch.object(Endpoint, '__call__', autospec=True)
+        chunks = [[pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1)],
+                  [pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2)]]
+        with pinecone.Index('example-index', pool_threads=30) as index:
+            # Send requests in parallel
+            async_results = [
+                index.upsert(vectors=ids_vectors_chunk, namespace="ns", async_req=True)
+                for ids_vectors_chunk in chunks
+            ]
+            # Wait for and retrieve responses (this raises in case of error)
+            [async_result.get() for async_result in async_results]
+
+            Endpoint.__call__.assert_any_call(
+                index._vector_api.upsert,
+                pinecone.UpsertRequest(vectors=[
+                    pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1),
+                ],
+                    namespace='ns'),
+                async_req=True
+            )
+
+            Endpoint.__call__.assert_any_call(
+                index._vector_api.upsert,
+                pinecone.UpsertRequest(vectors=[
+                    pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2),
+                ],
+                    namespace='ns'),
+                async_req=True
+            )
+
+    def test_upsert_vectorListIsMultiplyOfBatchSize_vectorsUpsertedInBatches(self, mocker):
+        mocker.patch.object(self.index._vector_api,
+                            'upsert',
+                            autospec=True,
+                            side_effect=lambda upsert_request: UpsertResponse(
+                                upserted_count=len(upsert_request.vectors)))
+
+        result = self.index.upsert(vectors=[
+                pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1),
+                pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2)],
+            namespace='ns',
+            batch_size=1,
+            show_progress=False)
+
+        self.index._vector_api.upsert.assert_any_call(
+            pinecone.UpsertRequest(vectors=[
+                pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1),
+            ], namespace='ns')
+        )
+
+        self.index._vector_api.upsert.assert_any_call(
+            pinecone.UpsertRequest(vectors=[
+                pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2),
+            ], namespace='ns')
+        )
+
+        assert result.upserted_count == 2
+
+    def test_upsert_vectorListNotMultiplyOfBatchSize_vectorsUpsertedInBatches(self, mocker):
+        mocker.patch.object(self.index._vector_api,
+                            'upsert',
+                            autospec=True,
+                            side_effect=lambda upsert_request: UpsertResponse(
+                                upserted_count=len(upsert_request.vectors)))
+
+        result = self.index.upsert(vectors=[
+                pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1),
+                pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2),
+                pinecone.Vector(id='vec3', values=self.vals1, metadata=self.md1)],
+            namespace='ns',
+            batch_size=2)
+
+        self.index._vector_api.upsert.assert_any_call(
+            pinecone.UpsertRequest(vectors=[
+                pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1),
+                pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2),
+            ], namespace='ns')
+        )
+
+        self.index._vector_api.upsert.assert_any_call(
+            pinecone.UpsertRequest(vectors=[
+                pinecone.Vector(id='vec3', values=self.vals1, metadata=self.md1),
+            ], namespace='ns')
+        )
+
+        assert result.upserted_count == 3
+
+    def test_upsert_vectorListSmallerThanBatchSize_vectorsUpsertedInBatches(self, mocker):
+        mocker.patch.object(self.index._vector_api,
+                            'upsert',
+                            autospec=True,
+                            side_effect=lambda upsert_request: UpsertResponse(
+                                upserted_count=len(upsert_request.vectors)))
+
+        result = self.index.upsert(vectors=[
+                pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1),
+                pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2),
+                pinecone.Vector(id='vec3', values=self.vals1, metadata=self.md1)],
+            namespace='ns',
+            batch_size=5)
+
+        self.index._vector_api.upsert.assert_called_once_with(
+            pinecone.UpsertRequest(vectors=[
+                pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1),
+                pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2),
+                pinecone.Vector(id='vec3', values=self.vals1, metadata=self.md1),
+            ], namespace='ns')
+        )
+
+        assert result.upserted_count == 3
+
+    def test_upsert_tuplesList_vectorsUpsertedInBatches(self, mocker):
+        mocker.patch.object(self.index._vector_api,
+                            'upsert',
+                            autospec=True,
+                            side_effect=lambda upsert_request: UpsertResponse(
+                                upserted_count=len(upsert_request.vectors)))
+
+        result = self.index.upsert(vectors=
+                                   [('vec1', self.vals1, self.md1),
+                                    ('vec2', self.vals2, self.md2),
+                                    ('vec3', self.vals1, self.md1)],
+                                   namespace='ns',
+                                   batch_size=2)
+
+        self.index._vector_api.upsert.assert_any_call(
+            pinecone.UpsertRequest(vectors=[
+                pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1),
+                pinecone.Vector(id='vec2', values=self.vals2, metadata=self.md2),
+            ], namespace='ns')
+        )
+
+        self.index._vector_api.upsert.assert_any_call(
+            pinecone.UpsertRequest(vectors=[
+                pinecone.Vector(id='vec3', values=self.vals1, metadata=self.md1),
+            ], namespace='ns')
+        )
+
+        assert result.upserted_count == 3
+
+    def test_upsert_batchSizeIsNotPositive_errorIsRaised(self):
+        with pytest.raises(ValueError, match='batch_size must be a positive integer'):
+            self.index.upsert(vectors=[
+                pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1)],
+                namespace='ns',
+                batch_size=0)
+
+        with pytest.raises(ValueError, match='batch_size must be a positive integer'):
+            self.index.upsert(vectors=[
+                pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1)],
+                namespace='ns',
+                batch_size=-1)
+
+    def test_upsert_useBatchSizeAndAsyncReq_valueErrorRaised(self):
+        with pytest.raises(ValueError, match='async_req is not supported when batch_size is provided.'):
+            self.index.upsert(vectors=[
+                pinecone.Vector(id='vec1', values=self.vals1, metadata=self.md1)],
+                namespace='ns',
+                batch_size=1,
+                async_req=True)
+
+    # endregion
+
+    # region: query tests
+
+    def test_query_byVectorNoFilter_queryVectorNoFilter(self, mocker):
+        response = QueryResponse(results=[],
+                                 matches=[ScoredVector(id="1",
+                                                       score=0.9,
+                                                       values=[0.0],
+                                                       metadata={"a": 2})],
+                                 namespace="test")
+
+        mocker.patch.object(self.index._vector_api, 'query', autospec=True, return_value=response)
+
+        actual = self.index.query(top_k=10, vector=self.vals1)
+
+        self.index._vector_api.query.assert_called_once_with(
+            pinecone.QueryRequest(top_k=10, vector=self.vals1)
+        )
+        expected = QueryResponse(matches=[ScoredVector(id="1",
+                                                       score=0.9,
+                                                       values=[0.0],
+                                                       metadata={"a": 2})],
+                                 namespace="test")
+        expected._data_store.pop('results', None)
+        assert actual == expected
+
+    def test_query_byVectorWithFilter_queryVectorWithFilter(self, mocker):
+        mocker.patch.object(self.index._vector_api, 'query', autospec=True)
+        self.index.query(top_k=10, vector=self.vals1, filter=self.filter1, namespace='ns')
+        self.index._vector_api.query.assert_called_once_with(
+            pinecone.QueryRequest(top_k=10, vector=self.vals1, filter=self.filter1, namespace='ns')
+        )
+
+    def test_query_byTuplesNoFilter_queryVectorsNoFilter(self, mocker):
+        mocker.patch.object(self.index._vector_api, 'query', autospec=True)
+        self.index.query(top_k=10, queries=[
+            (self.vals1,),
+            (self.vals2,)
         ])
-    )
-
-
-def test_query_request_tuples_query_only(mocker):
-    import pinecone
-    pinecone.init(api_key='example-key')
-    index = pinecone.Index('example-name')
-    mocker.patch.object(index._vector_api, 'query', autospec=True)
-    index.query(top_k=10, vector=vals1)
-    index._vector_api.query.assert_called_once_with(
-        pinecone.QueryRequest(top_k=10, vector=vals1, id='', queries=[])
-    )
-
-
-def test_query_request_tuples_query_filter(mocker):
-    import pinecone
-    pinecone.init(api_key='example-key')
-    index = pinecone.Index('example-name')
-    mocker.patch.object(index._vector_api, 'query', autospec=True)
-    index.query(top_k=10, queries=[
-        (vals1, filter1),
-        (vals2, filter2)
-    ])
-    index._vector_api.query.assert_called_once_with(
-        pinecone.QueryRequest(top_k=10, vector=[], id='', queries=[
-            pinecone.QueryVector(values=vals1, filter=filter1),
-            pinecone.QueryVector(values=vals2, filter=filter2)
+        self.index._vector_api.query.assert_called_once_with(
+            pinecone.QueryRequest(top_k=10, queries=[
+                pinecone.QueryVector(values=self.vals1),
+                pinecone.QueryVector(values=self.vals2)
+            ])
+        )
+
+    def test_query_byTuplesWithFilter_queryVectorsWithFilter(self, mocker):
+        mocker.patch.object(self.index._vector_api, 'query', autospec=True)
+        self.index.query(top_k=10, queries=[
+            (self.vals1, self.filter1),
+            (self.vals2, self.filter2)
         ])
-    )
+        self.index._vector_api.query.assert_called_once_with(
+            pinecone.QueryRequest(top_k=10, queries=[
+                pinecone.QueryVector(values=self.vals1, filter=self.filter1),
+                pinecone.QueryVector(values=self.vals2, filter=self.filter2)
+            ])
+        )
+
+    def test_query_byVecId_queryByVecId(self, mocker):
+        mocker.patch.object(self.index._vector_api, 'query', autospec=True)
+        self.index.query(top_k=10, id='vec1', include_metadata=True, include_values=False)
+        self.index._vector_api.query.assert_called_once_with(
+            pinecone.QueryRequest(top_k=10, id='vec1', include_metadata=True, include_values=False)
+        )
+
+    # endregion
+
+    # region: delete tests
+
+    def test_delete_byIds_deleteByIds(self, mocker):
+        mocker.patch.object(self.index._vector_api, 'delete', autospec=True)
+        self.index.delete(ids=['vec1', 'vec2'])
+        self.index._vector_api.delete.assert_called_once_with(
+            pinecone.DeleteRequest(ids=['vec1', 'vec2'])
+        )
+
+    def test_delete_deleteAllByFilter_deleteAllByFilter(self, mocker):
+        mocker.patch.object(self.index._vector_api, 'delete', autospec=True)
+        self.index.delete(delete_all=True, filter=self.filter1, namespace='ns')
+        self.index._vector_api.delete.assert_called_once_with(
+            pinecone.DeleteRequest(delete_all=True, filter=self.filter1, namespace='ns')
+        )
+
+    def test_delete_deleteAllNoFilter_deleteNoFilter(self, mocker):
+        mocker.patch.object(self.index._vector_api, 'delete', autospec=True)
+        self.index.delete(delete_all=True)
+        self.index._vector_api.delete.assert_called_once_with(
+            pinecone.DeleteRequest(delete_all=True)
+        )
+
+    # endregion
+
+    # region: fetch tests
+
+    def test_fetch_byIds_fetchByIds(self, mocker):
+        mocker.patch.object(self.index._vector_api, 'fetch', autospec=True)
+        self.index.fetch(ids=['vec1', 'vec2'])
+        self.index._vector_api.fetch.assert_called_once_with(
+            ids=['vec1', 'vec2']
+        )
+
+    def test_fetch_byIdsAndNS_fetchByIdsAndNS(self, mocker):
+        mocker.patch.object(self.index._vector_api, 'fetch', autospec=True)
+        self.index.fetch(ids=['vec1', 'vec2'], namespace='ns')
+        self.index._vector_api.fetch.assert_called_once_with(
+            ids=['vec1', 'vec2'], namespace='ns'
+        )
+
+    # endregion
+
+    # region: update tests
+
+    def test_update_byIdAnValues_updateByIdAndValues(self, mocker):
+        mocker.patch.object(self.index._vector_api, 'update', autospec=True)
+        self.index.update(id='vec1', values=self.vals1, namespace='ns')
+        self.index._vector_api.update.assert_called_once_with(
+            pinecone.UpdateRequest(id='vec1', values=self.vals1, namespace='ns')
+        )
+
+    def test_update_byIdAnValuesAndMetadata_updateByIdAndValuesAndMetadata(self, mocker):
+        mocker.patch.object(self.index._vector_api, 'update', autospec=True)
+        self.index.update('vec1', values=self.vals1, metadata=self.md1)
+        self.index._vector_api.update.assert_called_once_with(
+            pinecone.UpdateRequest(id='vec1', values=self.vals1, metadata=self.md1)
+        )
+
+    # endregion
+
+    # region: describe index tests
+
+    def test_describeIndexStats_callWithoutFilter_CalledWithoutFilter(self, mocker):
+        mocker.patch.object(self.index._vector_api, 'describe_index_stats', autospec=True)
+        self.index.describe_index_stats()
+        self.index._vector_api.describe_index_stats.assert_called_once_with(
+            DescribeIndexStatsRequest())
+
+    def test_describeIndexStats_callWithFilter_CalledWithFilter(self, mocker):
+        mocker.patch.object(self.index._vector_api, 'describe_index_stats', autospec=True)
+        self.index.describe_index_stats(filter=self.filter1)
+        self.index._vector_api.describe_index_stats.assert_called_once_with(
+            DescribeIndexStatsRequest(filter=self.filter1))
+
+    # endregion
diff --git a/tox.ini b/tox.ini
index 8e3d6c25..c38a0d1d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py{36,37,38,39}-pcgrpc_{yes,no}-old_deps_{yes,no},flake8,docs
+envlist = flake8,docs
 skip_missing_interpreters = true
 
 [testenv]
@@ -15,7 +15,7 @@ deps=
   old_deps_yes: urllib3==1.21.1
   -r {toxinidir}/requirements.txt
   -r {toxinidir}/test-requirements.txt
-  pcgrpc_yes: -r {toxinidir}/requirements-grpc.txt
+  -r {toxinidir}/requirements-grpc.txt
 commands =
   pytest --cov=pinecone --timeout=120 tests/unit {posargs}