chatbot-ui/supabase/migrations/20240108234545_add_file_ite...

116 lines
2.6 KiB
PL/PgSQL

--------------- FILE ITEMS ---------------
create table file_items (
-- ID
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
-- RELATIONSHIPS
file_id UUID NOT NULL REFERENCES files(id) ON DELETE CASCADE,
user_id UUID NOT NULL REFERENCES auth.users(id) ON DELETE CASCADE,
-- METADATA
created_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMPTZ,
-- SHARING
sharing TEXT NOT NULL DEFAULT 'private',
-- REQUIRED
content TEXT NOT NULL,
local_embedding vector(384), -- 384 works for local w/ Xenova/all-MiniLM-L6-v2
openai_embedding vector(1536), -- 1536 for OpenAI
tokens INT NOT NULL
);
-- INDEXES --
CREATE INDEX file_items_file_id_idx ON file_items(file_id);
CREATE INDEX file_items_embedding_idx ON file_items
USING hnsw (openai_embedding vector_cosine_ops);
CREATE INDEX file_items_local_embedding_idx ON file_items
USING hnsw (local_embedding vector_cosine_ops);
-- RLS
ALTER TABLE file_items ENABLE ROW LEVEL SECURITY;
CREATE POLICY "Allow full access to own file items"
ON file_items
USING (user_id = auth.uid())
WITH CHECK (user_id = auth.uid());
CREATE POLICY "Allow view access to non-private file items"
ON file_items
FOR SELECT
USING (file_id IN (
SELECT id FROM files WHERE sharing <> 'private'
));
-- TRIGGERS --
CREATE TRIGGER update_profiles_updated_at
BEFORE UPDATE ON file_items
FOR EACH ROW
EXECUTE PROCEDURE update_updated_at_column();
-- FUNCTIONS --
create function match_file_items_local (
query_embedding vector(384),
match_count int DEFAULT null,
file_ids UUID[] DEFAULT null
) returns table (
id UUID,
file_id UUID,
content TEXT,
tokens INT,
similarity float
)
language plpgsql
as $$
#variable_conflict use_column
begin
return query
select
id,
file_id,
content,
tokens,
1 - (file_items.local_embedding <=> query_embedding) as similarity
from file_items
where (file_id = ANY(file_ids))
order by file_items.local_embedding <=> query_embedding
limit match_count;
end;
$$;
create function match_file_items_openai (
query_embedding vector(1536),
match_count int DEFAULT null,
file_ids UUID[] DEFAULT null
) returns table (
id UUID,
file_id UUID,
content TEXT,
tokens INT,
similarity float
)
language plpgsql
as $$
#variable_conflict use_column
begin
return query
select
id,
file_id,
content,
tokens,
1 - (file_items.openai_embedding <=> query_embedding) as similarity
from file_items
where (file_id = ANY(file_ids))
order by file_items.openai_embedding <=> query_embedding
limit match_count;
end;
$$;