This commit is contained in:
parent
41dc3ca5fa
commit
be0662d918
|
|
@ -19,6 +19,7 @@ create table file_items (
|
||||||
content TEXT NOT NULL,
|
content TEXT NOT NULL,
|
||||||
local_embedding vector(384), -- 384 works for local w/ Xenova/all-MiniLM-L6-v2
|
local_embedding vector(384), -- 384 works for local w/ Xenova/all-MiniLM-L6-v2
|
||||||
openai_embedding vector(1536), -- 1536 for OpenAI
|
openai_embedding vector(1536), -- 1536 for OpenAI
|
||||||
|
bge_m3_embedding vector(1024), -- 1024 for BGE-M3
|
||||||
tokens INT NOT NULL
|
tokens INT NOT NULL
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
@ -32,6 +33,9 @@ CREATE INDEX file_items_embedding_idx ON file_items
|
||||||
CREATE INDEX file_items_local_embedding_idx ON file_items
|
CREATE INDEX file_items_local_embedding_idx ON file_items
|
||||||
USING hnsw (local_embedding vector_cosine_ops);
|
USING hnsw (local_embedding vector_cosine_ops);
|
||||||
|
|
||||||
|
CREATE INDEX file_items_bge_m3_embedding_idx ON file_items
|
||||||
|
USING hnsw (bge_m3_embedding vector_cosine_ops);
|
||||||
|
|
||||||
-- RLS
|
-- RLS
|
||||||
|
|
||||||
ALTER TABLE file_items ENABLE ROW LEVEL SECURITY;
|
ALTER TABLE file_items ENABLE ROW LEVEL SECURITY;
|
||||||
|
|
@ -113,4 +117,33 @@ begin
|
||||||
order by file_items.openai_embedding <=> query_embedding
|
order by file_items.openai_embedding <=> query_embedding
|
||||||
limit match_count;
|
limit match_count;
|
||||||
end;
|
end;
|
||||||
|
$$;
|
||||||
|
|
||||||
|
create function match_file_items_bge_m3 (
|
||||||
|
query_embedding vector(1024),
|
||||||
|
match_count int DEFAULT null,
|
||||||
|
file_ids UUID[] DEFAULT null
|
||||||
|
) returns table (
|
||||||
|
id UUID,
|
||||||
|
file_id UUID,
|
||||||
|
content TEXT,
|
||||||
|
tokens INT,
|
||||||
|
similarity float
|
||||||
|
)
|
||||||
|
language plpgsql
|
||||||
|
as $$
|
||||||
|
#variable_conflict use_column
|
||||||
|
begin
|
||||||
|
return query
|
||||||
|
select
|
||||||
|
id,
|
||||||
|
file_id,
|
||||||
|
content,
|
||||||
|
tokens,
|
||||||
|
1 - (file_items.bge_m3_embedding <=> query_embedding) as similarity
|
||||||
|
from file_items
|
||||||
|
where (file_id = ANY(file_ids))
|
||||||
|
order by file_items.bge_m3_embedding <=> query_embedding
|
||||||
|
limit match_count;
|
||||||
|
end;
|
||||||
$$;
|
$$;
|
||||||
Loading…
Reference in New Issue