Release v5.3.0
Public Preview: Imports
To learn more about working with imports and details about expected data formats, please see these documentation guides:
This release adds methods for interacting with several new endpoints in Public Preview from the Python SDK. Before you can use these, you will need to follow the above docs to prepare your data and configure any storage integrations.
import os
import random
from pinecone import Pinecone, ServerlessSpec, ImportErrorMode
# 0. Instantiate your client instance
pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'])
# 1. You must have an index whose dimension matches the size of your data
# You may already have such an index, but for this demo we will create one.
index_name = f"import-{random.randint(0, 10000)}"
if not pc.has_index(index_name):
pc.create_index(
name=index_name,
dimension=10,
metric="cosine",
spec=ServerlessSpec(cloud="aws", region="eu-west-1")
)
# 2. Get a reference to the index client
index = pc.Index(name=index_name)
# 3. Start the import operation, passing a uri that describes the path to your
# AWS S3 bucket. Each subfolder within this path will correspond to a namespace
# where imported data will be stored.
root = 's3://dev-bulk-import-datasets-pub/10-records-dim-10/'
op = index.start_import(
uri=root,
error_mode=ImportErrorMode.CONTINUE, # or ABORT
# integration_id='' # Add this if you want to use a storage integration
)
# 4. Check the operation status
index.describe_import(id=op.id)
# 5. Cancel an import operation
index.cancel_import(id=op.id)
# 6. List all recent operations using a generator that handles pagination on your behalf
for i in index.list_imports():
print(f"id: {i.id} status: {i.status}")
# ...or turn the generator into a simple list, fetching all results at once
operations = list(index.list_imports())
print(operations)