diff --git a/chatdesk-ui/supabase/migrations/20240108234545_add_file_items.sql b/chatdesk-ui/supabase/migrations/20240108234545_add_file_items.sql index 819583e..fd4042e 100644 --- a/chatdesk-ui/supabase/migrations/20240108234545_add_file_items.sql +++ b/chatdesk-ui/supabase/migrations/20240108234545_add_file_items.sql @@ -19,6 +19,7 @@ create table file_items ( content TEXT NOT NULL, local_embedding vector(384), -- 384 works for local w/ Xenova/all-MiniLM-L6-v2 openai_embedding vector(1536), -- 1536 for OpenAI + bge_m3_embedding vector(1024), -- 1024 for BGE-M3 tokens INT NOT NULL ); @@ -32,6 +33,9 @@ CREATE INDEX file_items_embedding_idx ON file_items CREATE INDEX file_items_local_embedding_idx ON file_items USING hnsw (local_embedding vector_cosine_ops); +CREATE INDEX file_items_bge_m3_embedding_idx ON file_items + USING hnsw (bge_m3_embedding vector_cosine_ops); + -- RLS ALTER TABLE file_items ENABLE ROW LEVEL SECURITY; @@ -113,4 +117,33 @@ begin order by file_items.openai_embedding <=> query_embedding limit match_count; end; +$$; + +create function match_file_items_bge_m3 ( + query_embedding vector(1024), + match_count int DEFAULT null, + file_ids UUID[] DEFAULT null +) returns table ( + id UUID, + file_id UUID, + content TEXT, + tokens INT, + similarity float +) +language plpgsql +as $$ +#variable_conflict use_column +begin + return query + select + id, + file_id, + content, + tokens, + 1 - (file_items.bge_m3_embedding <=> query_embedding) as similarity + from file_items + where (file_id = ANY(file_ids)) + order by file_items.bge_m3_embedding <=> query_embedding + limit match_count; +end; $$; \ No newline at end of file