Skip to content

Commit

Permalink
Destination Pinecone: Add source_tag for attribution + unit tests (#3…
Browse files Browse the repository at this point in the history
…8151)

Co-authored-by: Aaron ("AJ") Steers <[email protected]>
  • Loading branch information
bindipankhudi and aaronsteers authored May 15, 2024
1 parent bc83bee commit 1c3a6c4
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#

import os
import uuid
from typing import Optional

Expand All @@ -25,14 +26,17 @@

MAX_IDS_PER_DELETE = 1000

AIRBYTE_TAG = "airbyte"
AIRBYTE_TEST_TAG = "airbyte_test"


class PineconeIndexer(Indexer):
config: PineconeIndexingModel

def __init__(self, config: PineconeIndexingModel, embedding_dimensions: int):
super().__init__(config)
try:
self.pc = PineconeGRPC(api_key=config.pinecone_key, threaded=True)
self.pc = PineconeGRPC(api_key=config.pinecone_key, source_tag=self.get_source_tag, threaded=True)
except PineconeException as e:
return AirbyteConnectionStatus(status=Status.FAILED, message=str(e))

Expand Down Expand Up @@ -62,6 +66,10 @@ def pre_sync(self, catalog: ConfiguredAirbyteCatalog):
def post_sync(self):
return []

def get_source_tag(self):
is_test = "PYTEST_CURRENT_TEST" in os.environ or "RUN_IN_AIRBYTE_CI" in os.environ
return AIRBYTE_TEST_TAG if is_test else AIRBYTE_TAG

def delete_vectors(self, filter, namespace=None, prefix=None):
if self._pod_type == "starter":
# Starter pod types have a maximum of 100000 rows
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import json
import logging
import os
import time

from airbyte_cdk.destinations.vector_db_based.embedder import OPEN_AI_VECTOR_SIZE
Expand Down Expand Up @@ -48,6 +49,8 @@ def tearDown(self):
else :
print("Noting to delete. No data in the index/namespace.")

def test_integration_test_flag_is_set(self):
assert "PYTEST_CURRENT_TEST" in os.environ

def test_check_valid_config(self):
outcome = DestinationPinecone().check(logging.getLogger("airbyte"), self.config)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ data:
connectorSubtype: vectorstore
connectorType: destination
definitionId: 3d2b6f84-7f0d-4e3f-a5e5-7c7d4b50eabd
dockerImageTag: 0.1.0
dockerImageTag: 0.1.1
dockerRepository: airbyte/destination-pinecone
documentationUrl: https://docs.airbyte.com/integrations/destinations/pinecone
githubIssueLabel: destination-pinecone
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "airbyte-destination-pinecone"
version = "0.1.0"
version = "0.1.1"
description = "Airbyte destination implementation for Pinecone."
authors = ["Airbyte <[email protected]>"]
license = "MIT"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,26 @@ def test_check_with_errors(self, MockedEmbedder, MockedPineconeIndexer):
mock_embedder.check.assert_called_once()
mock_indexer.check.assert_called_once()

def test_check_with_config_errors(self):
bad_config = {
"processing": {"text_fields": ["str_col"], "metadata_fields": [], "chunk_size": 1000},
"embedding_2": {"mode": "openai", "openai_key": "mykey"},
"indexing": {
"pinecone_key": "mykey",
"pinecone_environment": "myenv",
"index": "myindex",
},
}
destination = DestinationPinecone()
result = destination.check(self.logger, bad_config)
self.assertEqual(result.status, Status.FAILED)

def test_check_with_init_indexer_errors(self):
destination = DestinationPinecone()
with patch("destination_pinecone.destination.PineconeIndexer", side_effect=Exception("Indexer Error")):
result = destination.check(self.logger, self.config)
self.assertEqual(result.status, Status.FAILED)

@patch("destination_pinecone.destination.Writer")
@patch("destination_pinecone.destination.PineconeIndexer")
@patch("destination_pinecone.destination.create_from_config")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#

import os
from unittest.mock import ANY, MagicMock, Mock, call, patch

import pytest
Expand Down Expand Up @@ -55,6 +56,36 @@ def mock_determine_spec_type():
mock.return_value = "pod"
yield mock


def test_get_source_tag_default():
# case when no test env variables are set
os.environ.pop("PYTEST_CURRENT_TEST", None)
os.environ.pop("RUN_IN_AIRBYTE_CI", None)
indexer = create_pinecone_indexer()
assert indexer.get_source_tag() == "airbyte"


def test_get_source_tag_with_pytest():
# pytest is running by default here
indexer = create_pinecone_indexer()
assert indexer.get_source_tag() == "airbyte_test"

# pytest plus ci is running
with patch.dict("os.environ", {"RUN_IN_AIRBYTE_CI": "value does not matter"}):
assert indexer.get_source_tag() == "airbyte_test"


@patch.dict("os.environ", {"RUN_IN_AIRBYTE_CI": "Value does not matter"})
def test_get_source_tag_with_ci():
# CI and pytest is running
indexer = create_pinecone_indexer()
assert indexer.get_source_tag() == "airbyte_test"

# CI is running but pytest is not
with patch.dict("os.environ", {"PYTEST_CURRENT_TEST": "Value does not matter"}):
assert indexer.get_source_tag() == "airbyte_test"


def test_pinecone_index_upsert_and_delete(mock_describe_index):
indexer = create_pinecone_indexer()
indexer._pod_type = "p1"
Expand Down
1 change: 1 addition & 0 deletions docs/integrations/destinations/pinecone.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ OpenAI and Fake embeddings produce vectors with 1536 dimensions, and the Cohere

| Version | Date | Pull Request | Subject |
| :------ | :--------- | :-------------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------- |
| 0.1.1 | 2023-05-14 | [#38151](https://github.com/airbytehq/airbyte/pull/38151) | Add airbyte source tag for attribution
| 0.1.0 | 2023-05-06 | [#37756](https://github.com/airbytehq/airbyte/pull/37756) | Add support for Pinecone Serverless |
| 0.0.24 | 2023-04-15 | [#37333](https://github.com/airbytehq/airbyte/pull/37333) | Update CDK & pytest version to fix security vulnerabilities. |
| 0.0.23 | 2023-03-22 | [#35911](https://github.com/airbytehq/airbyte/pull/35911) | Bump versions to latest, resolves test failures. |
Expand Down

0 comments on commit 1c3a6c4

Please sign in to comment.