import { generateLocalEmbedding } from "@/lib/generate-local-embedding" import { processDocX } from "@/lib/retrieval/processing" import { checkApiKey, getServerProfile } from "@/lib/server/server-chat-helpers" import { Database } from "@/supabase/types" import { FileItemChunk } from "@/types" import { createClient } from "@supabase/supabase-js" import { NextResponse } from "next/server" import OpenAI from "openai" import { getRuntimeEnvForRouterAPI } from "@/lib/runtime-env" import { generateBgeM3Embedding } from "@/lib/generate-bgem3-embedding" export const runtime = "nodejs" export async function POST(req: Request) { const json = await req.json() const { text, fileId, embeddingsProvider, fileExtension } = json as { text: string fileId: string embeddingsProvider: "openai" | "local" | "bge-m3" fileExtension: string } try { const supabaseAdmin = createClient( getRuntimeEnvForRouterAPI("SUPABASE_URL") ?? "http://localhost:8000", process.env.SUPABASE_SERVICE_ROLE_KEY! ) const profile = await getServerProfile() if (embeddingsProvider === "openai") { if (profile.use_azure_openai) { checkApiKey(profile.azure_openai_api_key, "Azure OpenAI") } else { checkApiKey(profile.openai_api_key, "OpenAI") } } let chunks: FileItemChunk[] = [] switch (fileExtension) { case "docx": chunks = await processDocX(text) break default: return new NextResponse("Unsupported file type", { status: 400 }) } let embeddings: any = [] let openai if (profile.use_azure_openai) { openai = new OpenAI({ apiKey: profile.azure_openai_api_key || "", baseURL: `${profile.azure_openai_endpoint}/openai/deployments/${profile.azure_openai_embeddings_id}`, defaultQuery: { "api-version": "2023-12-01-preview" }, defaultHeaders: { "api-key": profile.azure_openai_api_key } }) } else { openai = new OpenAI({ apiKey: profile.openai_api_key || "", organization: profile.openai_organization_id }) } if (embeddingsProvider === "openai") { const response = await openai.embeddings.create({ model: "text-embedding-3-small", input: chunks.map(chunk => chunk.content) }) embeddings = response.data.map((item: any) => { return item.embedding }) } else if (embeddingsProvider === "local") { const embeddingPromises = chunks.map(async chunk => { try { return await generateLocalEmbedding(chunk.content) } catch (error) { console.error(`Error generating embedding for chunk: ${chunk}`, error) return null } }) embeddings = await Promise.all(embeddingPromises) } else if (embeddingsProvider === "bge-m3"){ // 示例:调用你自己的 BGE-M3 API 或本地函数 const embeddingPromises = chunks.map(async (chunk, index) => { try { // return await generateBgeM3Embedding(chunk.content) const result = await generateBgeM3Embedding(chunk.content) if (!Array.isArray(result)) { console.error(`......❌ Chunk ${index}: result is not an array`, result) return null } if (result.length !== 1024) { console.error(`......❌ Chunk ${index}: incorrect length: ${result.length}`) return null } if (!result.every(x => typeof x === "number")) { console.error(`......❌ Chunk ${index}: contains non-numbers`, result) return null } return result } catch (error) { console.error(`Error generating BGE-M3 embedding for chunk: ${chunk}`, error) return null } }) embeddings = await Promise.all(embeddingPromises) console.log(`......[embedding] 维度: ${embeddings.length}`); } const file_items = chunks.map((chunk, index) => ({ file_id: fileId, user_id: profile.user_id, content: chunk.content, tokens: chunk.tokens, openai_embedding: embeddingsProvider === "openai" ? ((embeddings[index] || null) as any) : null, local_embedding: embeddingsProvider === "local" ? ((embeddings[index] || null) as any) : null, bge_m3_embedding: embeddingsProvider === "bge-m3" && embeddings[index] && embeddings[index].length === 1024 // ? (embeddings[index] || null) as any ? embeddings[index] as any : null })) await supabaseAdmin.from("file_items").upsert(file_items) const totalTokens = file_items.reduce((acc, item) => acc + item.tokens, 0) await supabaseAdmin .from("files") .update({ tokens: totalTokens }) .eq("id", fileId) return new NextResponse("Embed Successful", { status: 200 }) } catch (error: any) { console.error(error) const errorMessage = error.error?.message || "An unexpected error occurred" const errorCode = error.status || 500 return new Response(JSON.stringify({ message: errorMessage }), { status: errorCode }) } }