Skip to content

Commit

Permalink
Update sp_vector_search.sql
Browse files Browse the repository at this point in the history
  • Loading branch information
rjvgupta authored Oct 25, 2024
1 parent 8ae1e0e commit 058549d
Showing 1 changed file with 14 additions and 9 deletions.
23 changes: 14 additions & 9 deletions stored-procedures/sp_vector_search/sp_vector_search.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ Parameters:
Requirements:
expects a table with the following tables to exist and be populated:
CREATE TABLE $(tablename)_embeddings
( recordid VARCHAR(15),
modeloutput SUPER ) DISTKEY (recordid);
( "recordId" VARCHAR(15),
"modelOutput" SUPER ) DISTKEY (recordid);
CREATE TABLE $(tablename)_kmeans
( cluster int,
centroid SUPER,
Expand All @@ -25,37 +25,42 @@ Requirements:
interations int) DISTSTYLE ALL;
CREATE TABLE $(tablename)_kmeans_clusters
( cluster int,
recordid VARCHAR(15),
"recordId" VARCHAR(15),
similarity float,
rnk int) DISTKEY (recordid);
History:
2024-07-19 - rjvgupta - Created
2024-10-25 - rjvgupta - Updated for case-sensitivity.
**********************************************************************************************/

SET enable_case_sensitive_identifier TO true;

CREATE OR REPLACE PROCEDURE sp_vector_search (tablename IN varchar, search IN varchar, cnt IN int, tmp_name IN varchar) AS $$
BEGIN
EXECUTE 'drop table if exists #'||tmp_name;
EXECUTE 'create table #'||tmp_name ||' (recordid varchar(100), similarity float)';
EXECUTE 'create table #'||tmp_name ||' ("recordId" varchar(100), similarity float)';
EXECUTE 'insert into #'||tmp_name ||'
select re.recordid, sum(rv::float*qv::float)/SQRT(sum(rv::float*rv::float)*sum(qv::float*qv::float)) esimilarity
select re."recordId", sum(rv::float*qv::float)/SQRT(sum(rv::float*rv::float)*sum(qv::float*qv::float)) esimilarity
from (
select k.cluster, q.q, sum(kv::float*qv::float)/SQRT(sum(kv::float*kv::float)*sum(qv::float*qv::float)) csimilarity
from '||tablename||'_kmeans k, k.centroid kv at kvi,
(select JSON_PARSE(f_titan_embedding('''+search+''')) as q) q, q.q qv at qvi
where kvi = qvi
group by 1,2
qualify rank() over (order by csimilarity desc) = 1
) q, '||tablename||'_kmeans_clusters c, '||tablename||'_embeddings re, q.q qv at qvi, re.modeloutput.embedding rv at rvi
where rvi = qvi and c.cluster = q.cluster and c.recordid = re.recordid
) q, '||tablename||'_kmeans_clusters c, '||tablename||'_embeddings re, q.q qv at qvi, re."modelOutput".embedding rv at rvi
where rvi = qvi and c.cluster = q.cluster and c."recordId" = re."recordId"
group by 1
qualify rank() over (order by esimilarity desc) <= '||cnt;
END $$ LANGUAGE plpgsql;

/* Usage Example:
call sp_kmeans('reviews', 'bat product quality', 100, 'searchresults')
SET enable_case_sensitive_identifier TO true;
call sp_vector_search('reviews', 'bad broken unreliable slow', 100, 'searchresults');
select review_id, product_title, review_title, review_desc, similarity
from #searchresults
join reviews on review_id = recordid
join reviews on review_id = "recordId"
order by similarity desc;
*/

0 comments on commit 058549d

Please sign in to comment.