This commit is contained in:
hailin 2025-05-28 11:12:27 +08:00
parent 41dc3ca5fa
commit be0662d918
1 changed files with 33 additions and 0 deletions

View File

@ -19,6 +19,7 @@ create table file_items (
content TEXT NOT NULL,
local_embedding vector(384), -- 384 works for local w/ Xenova/all-MiniLM-L6-v2
openai_embedding vector(1536), -- 1536 for OpenAI
bge_m3_embedding vector(1024), -- 1024 for BGE-M3
tokens INT NOT NULL
);
@ -32,6 +33,9 @@ CREATE INDEX file_items_embedding_idx ON file_items
CREATE INDEX file_items_local_embedding_idx ON file_items
USING hnsw (local_embedding vector_cosine_ops);
CREATE INDEX file_items_bge_m3_embedding_idx ON file_items
USING hnsw (bge_m3_embedding vector_cosine_ops);
-- RLS
ALTER TABLE file_items ENABLE ROW LEVEL SECURITY;
@ -113,4 +117,33 @@ begin
order by file_items.openai_embedding <=> query_embedding
limit match_count;
end;
$$;
create function match_file_items_bge_m3 (
query_embedding vector(1024),
match_count int DEFAULT null,
file_ids UUID[] DEFAULT null
) returns table (
id UUID,
file_id UUID,
content TEXT,
tokens INT,
similarity float
)
language plpgsql
as $$
#variable_conflict use_column
begin
return query
select
id,
file_id,
content,
tokens,
1 - (file_items.bge_m3_embedding <=> query_embedding) as similarity
from file_items
where (file_id = ANY(file_ids))
order by file_items.bge_m3_embedding <=> query_embedding
limit match_count;
end;
$$;