diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 832877d7..019879f9 100755 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -52,7 +52,7 @@ jobs: - run: psql test -c 'alter database test set enable_seqscan = off' # setup the database for testing - - run: make installcheck REGRESS="pinecone_crud pinecone_medium_create" REGRESS_OPTS="--dbname=test --inputdir=./test --use-existing" + - run: make installcheck REGRESS="pinecone_crud pinecone_medium_create pinecone_zero_vector_insert" REGRESS_OPTS="--dbname=test --inputdir=./test --use-existing" - if: ${{ failure() }} run: cat regression.diffs # mac: diff --git a/src/pinecone/pinecone_insert.c b/src/pinecone/pinecone_insert.c index c6b33fcf..3f08817c 100644 --- a/src/pinecone/pinecone_insert.c +++ b/src/pinecone/pinecone_insert.c @@ -156,11 +156,16 @@ bool AppendBufferTupleInCtx(Relation index, Datum *values, bool *isnull, ItemPoi MemoryContext oldCtx; MemoryContext insertCtx; bool checkpoint_created; + Vector* vector; // use a memory context because index_form_tuple can allocate insertCtx = AllocSetContextCreate(CurrentMemoryContext, "Pinecone insert tuple temporary context", ALLOCSET_DEFAULT_SIZES); oldCtx = MemoryContextSwitchTo(insertCtx); + + vector = DatumGetVector(values[0]); + validate_vector_nonzero(vector); + checkpoint_created = AppendBufferTuple(index, values, isnull, heap_tid, heapRel); MemoryContextSwitchTo(oldCtx); MemoryContextDelete(insertCtx); // delete the temporary context diff --git a/test/expected/pinecone_zero_vector_insert.out b/test/expected/pinecone_zero_vector_insert.out new file mode 100644 index 00000000..c8d73b80 --- /dev/null +++ b/test/expected/pinecone_zero_vector_insert.out @@ -0,0 +1,63 @@ +-- SETUP +-- suppress output +\o /dev/null +delete from pinecone_mock; +-- logging level +SET client_min_messages = 'notice'; +-- flush each vector individually +SET pinecone.vectors_per_request = 1; +SET pinecone.requests_per_batch = 1; +-- disable flat scan to force use of the index +SET enable_seqscan = off; +-- CREATE TABLE +DROP TABLE IF EXISTS t; +NOTICE: table "t" does not exist, skipping +CREATE TABLE t (id int, val vector(3)); +\o +-- CREATE INDEX +-- mock create index +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://api.pinecone.io/indexes', 'POST', $${ + "name": "invalid", + "metric": "euclidean", + "dimension": 3, + "status": { + "ready": true, + "state": "Ready" + }, + "host": "fakehost", + "spec": { + "serverless": { + "cloud": "aws", + "region": "us-west-2" + } + } +}$$); +-- mock describe index stats +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://fakehost/describe_index_stats', 'GET', '{"namespaces":{},"dimension":3,"indexFullness":0,"totalVectorCount":0}'); +INSERT INTO t (id, val) VALUES (2, '[0,0,0]'); +-- create index after insering 0 vector - Throws an error +CREATE INDEX i2 ON t USING pinecone (val) WITH (spec = '{"serverless":{"cloud":"aws","region":"us-west-2"}}'); +ERROR: Invalid vector: zero vector +HINT: Pinecone insists that dense vectors cannot be zero in all dimensions. I don't know why they do this to you even when your metric isn't cosine. +-- Truncate the table to remove the values for creating an index successfully +TRUNCATE TABLE t; +-- create index +CREATE INDEX i2 ON t USING pinecone (val) WITH (spec = '{"serverless":{"cloud":"aws","region":"us-west-2"}}'); +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://fakehost/vectors/upsert', +'{ "vectors": [{ + "id": "000000000001", + "values": [100, 1, 1], + "metadata": { + } + }] + }', + '{"upsertedCount":1}' +); +INSERT INTO t (id, val) VALUES (1, '[100,1,1]'); +INSERT INTO t (id, val) VALUES (2, '[0,0,0]'); +ERROR: Invalid vector: zero vector +HINT: Pinecone insists that dense vectors cannot be zero in all dimensions. I don't know why they do this to you even when your metric isn't cosine. +DROP TABLE t; diff --git a/test/sql/pinecone_zero_vector_insert.sql b/test/sql/pinecone_zero_vector_insert.sql new file mode 100644 index 00000000..4bbd61c1 --- /dev/null +++ b/test/sql/pinecone_zero_vector_insert.sql @@ -0,0 +1,68 @@ +-- SETUP +-- suppress output +\o /dev/null +delete from pinecone_mock; +-- logging level +SET client_min_messages = 'notice'; +-- flush each vector individually +SET pinecone.vectors_per_request = 1; +SET pinecone.requests_per_batch = 1; +-- disable flat scan to force use of the index +SET enable_seqscan = off; +-- CREATE TABLE +DROP TABLE IF EXISTS t; +CREATE TABLE t (id int, val vector(3)); +\o + +-- CREATE INDEX +-- mock create index +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://api.pinecone.io/indexes', 'POST', $${ + "name": "invalid", + "metric": "euclidean", + "dimension": 3, + "status": { + "ready": true, + "state": "Ready" + }, + "host": "fakehost", + "spec": { + "serverless": { + "cloud": "aws", + "region": "us-west-2" + } + } +}$$); + +-- mock describe index stats +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://fakehost/describe_index_stats', 'GET', '{"namespaces":{},"dimension":3,"indexFullness":0,"totalVectorCount":0}'); + + +INSERT INTO t (id, val) VALUES (2, '[0,0,0]'); + +-- create index after insering 0 vector - Throws an error +CREATE INDEX i2 ON t USING pinecone (val) WITH (spec = '{"serverless":{"cloud":"aws","region":"us-west-2"}}'); + +-- Truncate the table to remove the values for creating an index successfully +TRUNCATE TABLE t; + +-- create index +CREATE INDEX i2 ON t USING pinecone (val) WITH (spec = '{"serverless":{"cloud":"aws","region":"us-west-2"}}'); + +INSERT INTO pinecone_mock (url_prefix, method, response) +VALUES ('https://fakehost/vectors/upsert', +'{ "vectors": [{ + "id": "000000000001", + "values": [100, 1, 1], + "metadata": { + } + }] + }', + '{"upsertedCount":1}' +); + +INSERT INTO t (id, val) VALUES (1, '[100,1,1]'); +INSERT INTO t (id, val) VALUES (2, '[0,0,0]'); + +DROP TABLE t; \ No newline at end of file