Skip to content

Commit

Permalink
Added mock test for zero vector insertion
Browse files Browse the repository at this point in the history
  • Loading branch information
swetavooda committed Apr 9, 2024
1 parent bad50cd commit da8c4a2
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 1 deletion.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:
- run: psql test -c 'alter database test set enable_seqscan = off'

# setup the database for testing
- run: make installcheck REGRESS="pinecone_crud pinecone_medium_create" REGRESS_OPTS="--dbname=test --inputdir=./test --use-existing"
- run: make installcheck REGRESS="pinecone_crud pinecone_medium_create pinecone_zero_vector_insert" REGRESS_OPTS="--dbname=test --inputdir=./test --use-existing"
- if: ${{ failure() }}
run: cat regression.diffs
# mac:
Expand Down
63 changes: 63 additions & 0 deletions test/expected/pinecone_zero_vector_insert.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
-- SETUP
-- suppress output
\o /dev/null
delete from pinecone_mock;
-- logging level
SET client_min_messages = 'notice';
-- flush each vector individually
SET pinecone.vectors_per_request = 1;
SET pinecone.requests_per_batch = 1;
-- disable flat scan to force use of the index
SET enable_seqscan = off;
-- CREATE TABLE
DROP TABLE IF EXISTS t;
NOTICE: table "t" does not exist, skipping
CREATE TABLE t (id int, val vector(3));
\o
-- CREATE INDEX
-- mock create index
INSERT INTO pinecone_mock (url_prefix, method, response)
VALUES ('https://api.pinecone.io/indexes', 'POST', $${
"name": "invalid",
"metric": "euclidean",
"dimension": 3,
"status": {
"ready": true,
"state": "Ready"
},
"host": "fakehost",
"spec": {
"serverless": {
"cloud": "aws",
"region": "us-west-2"
}
}
}$$);
-- mock describe index stats
INSERT INTO pinecone_mock (url_prefix, method, response)
VALUES ('https://fakehost/describe_index_stats', 'GET', '{"namespaces":{},"dimension":3,"indexFullness":0,"totalVectorCount":0}');
INSERT INTO t (id, val) VALUES (2, '[0,0,0]');
-- create index after insering 0 vector - Throws an error
CREATE INDEX i2 ON t USING pinecone (val) WITH (spec = '{"serverless":{"cloud":"aws","region":"us-west-2"}}');
ERROR: Invalid vector: zero vector
HINT: Pinecone insists that dense vectors cannot be zero in all dimensions. I don't know why they do this to you even when your metric isn't cosine.
-- Truncate the table to remove the values for creating an index successfully
TRUNCATE TABLE t;
-- create index
CREATE INDEX i2 ON t USING pinecone (val) WITH (spec = '{"serverless":{"cloud":"aws","region":"us-west-2"}}');
INSERT INTO pinecone_mock (url_prefix, method, response)
VALUES ('https://fakehost/vectors/upsert',
'{ "vectors": [{
"id": "000000000001",
"values": [100, 1, 1],
"metadata": {
}
}]
}',
'{"upsertedCount":1}'
);
INSERT INTO t (id, val) VALUES (1, '[100,1,1]');
INSERT INTO t (id, val) VALUES (2, '[0,0,0]');
ERROR: Invalid vector: zero vector
HINT: Pinecone insists that dense vectors cannot be zero in all dimensions. I don't know why they do this to you even when your metric isn't cosine.
DROP TABLE t;
68 changes: 68 additions & 0 deletions test/sql/pinecone_zero_vector_insert.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
-- SETUP
-- suppress output
\o /dev/null
delete from pinecone_mock;
-- logging level
SET client_min_messages = 'notice';
-- flush each vector individually
SET pinecone.vectors_per_request = 1;
SET pinecone.requests_per_batch = 1;
-- disable flat scan to force use of the index
SET enable_seqscan = off;
-- CREATE TABLE
DROP TABLE IF EXISTS t;
CREATE TABLE t (id int, val vector(3));
\o

-- CREATE INDEX
-- mock create index
INSERT INTO pinecone_mock (url_prefix, method, response)
VALUES ('https://api.pinecone.io/indexes', 'POST', $${
"name": "invalid",
"metric": "euclidean",
"dimension": 3,
"status": {
"ready": true,
"state": "Ready"
},
"host": "fakehost",
"spec": {
"serverless": {
"cloud": "aws",
"region": "us-west-2"
}
}
}$$);

-- mock describe index stats
INSERT INTO pinecone_mock (url_prefix, method, response)
VALUES ('https://fakehost/describe_index_stats', 'GET', '{"namespaces":{},"dimension":3,"indexFullness":0,"totalVectorCount":0}');


INSERT INTO t (id, val) VALUES (2, '[0,0,0]');

-- create index after insering 0 vector - Throws an error
CREATE INDEX i2 ON t USING pinecone (val) WITH (spec = '{"serverless":{"cloud":"aws","region":"us-west-2"}}');

-- Truncate the table to remove the values for creating an index successfully
TRUNCATE TABLE t;

-- create index
CREATE INDEX i2 ON t USING pinecone (val) WITH (spec = '{"serverless":{"cloud":"aws","region":"us-west-2"}}');

INSERT INTO pinecone_mock (url_prefix, method, response)
VALUES ('https://fakehost/vectors/upsert',
'{ "vectors": [{
"id": "000000000001",
"values": [100, 1, 1],
"metadata": {
}
}]
}',
'{"upsertedCount":1}'
);

INSERT INTO t (id, val) VALUES (1, '[100,1,1]');
INSERT INTO t (id, val) VALUES (2, '[0,0,0]');

DROP TABLE t;

0 comments on commit da8c4a2

Please sign in to comment.