348 lines
11 KiB
TypeScript
348 lines
11 KiB
TypeScript
import { Injectable } from '@nestjs/common';
|
|
import { InjectRepository } from '@nestjs/typeorm';
|
|
import { Repository, ILike } from 'typeorm';
|
|
import { TenantContextService } from '@iconsulting/shared';
|
|
import { IKnowledgeRepository } from '../../../domain/repositories/knowledge.repository.interface';
|
|
import { KnowledgeArticleEntity, KnowledgeSource } from '../../../domain/entities/knowledge-article.entity';
|
|
import { KnowledgeChunkEntity, ChunkType } from '../../../domain/entities/knowledge-chunk.entity';
|
|
import { KnowledgeArticleORM } from '../../../infrastructure/database/postgres/entities/knowledge-article.orm';
|
|
import { KnowledgeChunkORM } from '../../../infrastructure/database/postgres/entities/knowledge-chunk.orm';
|
|
|
|
@Injectable()
|
|
export class KnowledgePostgresRepository implements IKnowledgeRepository {
|
|
constructor(
|
|
@InjectRepository(KnowledgeArticleORM)
|
|
private articleRepo: Repository<KnowledgeArticleORM>,
|
|
@InjectRepository(KnowledgeChunkORM)
|
|
private chunkRepo: Repository<KnowledgeChunkORM>,
|
|
private readonly tenantContext: TenantContextService,
|
|
) {}
|
|
|
|
private getTenantId(): string {
|
|
const id = this.tenantContext.getCurrentTenantId();
|
|
if (!id) throw new Error('Tenant context not set');
|
|
return id;
|
|
}
|
|
|
|
// ========== 文章操作 ==========
|
|
|
|
async saveArticle(article: KnowledgeArticleEntity): Promise<void> {
|
|
const orm = this.toArticleORM(article);
|
|
orm.tenantId = this.getTenantId();
|
|
await this.articleRepo.save(orm);
|
|
}
|
|
|
|
async findArticleById(id: string): Promise<KnowledgeArticleEntity | null> {
|
|
const orm = await this.articleRepo.findOne({
|
|
where: { id, tenantId: this.getTenantId() },
|
|
});
|
|
return orm ? this.toArticleEntity(orm) : null;
|
|
}
|
|
|
|
async findArticlesByCategory(
|
|
category: string,
|
|
options?: { publishedOnly?: boolean; limit?: number; offset?: number },
|
|
): Promise<KnowledgeArticleEntity[]> {
|
|
const query = this.articleRepo.createQueryBuilder('article')
|
|
.where('article.tenant_id = :tenantId', { tenantId: this.getTenantId() })
|
|
.andWhere('article.category = :category', { category });
|
|
|
|
if (options?.publishedOnly) {
|
|
query.andWhere('article.isPublished = true');
|
|
}
|
|
|
|
query.orderBy('article.qualityScore', 'DESC')
|
|
.addOrderBy('article.createdAt', 'DESC');
|
|
|
|
if (options?.limit) {
|
|
query.take(options.limit);
|
|
}
|
|
if (options?.offset) {
|
|
query.skip(options.offset);
|
|
}
|
|
|
|
const orms = await query.getMany();
|
|
return orms.map(orm => this.toArticleEntity(orm));
|
|
}
|
|
|
|
async searchArticles(
|
|
queryStr: string,
|
|
options?: { category?: string; publishedOnly?: boolean; limit?: number },
|
|
): Promise<KnowledgeArticleEntity[]> {
|
|
const query = this.articleRepo.createQueryBuilder('article')
|
|
.where('article.tenant_id = :tenantId', { tenantId: this.getTenantId() })
|
|
.andWhere('(article.title ILIKE :search OR article.content ILIKE :search)', {
|
|
search: `%${queryStr}%`,
|
|
});
|
|
|
|
if (options?.category) {
|
|
query.andWhere('article.category = :category', { category: options.category });
|
|
}
|
|
|
|
if (options?.publishedOnly) {
|
|
query.andWhere('article.isPublished = true');
|
|
}
|
|
|
|
query.orderBy('article.qualityScore', 'DESC')
|
|
.take(options?.limit || 10);
|
|
|
|
const orms = await query.getMany();
|
|
return orms.map(orm => this.toArticleEntity(orm));
|
|
}
|
|
|
|
async searchArticlesByVector(
|
|
embedding: number[],
|
|
options?: {
|
|
category?: string;
|
|
publishedOnly?: boolean;
|
|
limit?: number;
|
|
minSimilarity?: number;
|
|
},
|
|
): Promise<Array<{ article: KnowledgeArticleEntity; similarity: number }>> {
|
|
const tenantId = this.getTenantId();
|
|
const embeddingStr = `[${embedding.join(',')}]`;
|
|
const limit = options?.limit || 5;
|
|
const minSimilarity = options?.minSimilarity || 0.7;
|
|
|
|
let sql = `
|
|
SELECT *,
|
|
1 - (embedding <=> '${embeddingStr}'::vector) as similarity
|
|
FROM knowledge_articles
|
|
WHERE tenant_id = $1
|
|
AND embedding IS NOT NULL
|
|
`;
|
|
|
|
if (options?.category) {
|
|
sql += ` AND category = '${options.category}'`;
|
|
}
|
|
|
|
if (options?.publishedOnly) {
|
|
sql += ` AND is_published = true`;
|
|
}
|
|
|
|
sql += `
|
|
AND 1 - (embedding <=> '${embeddingStr}'::vector) >= ${minSimilarity}
|
|
ORDER BY similarity DESC
|
|
LIMIT ${limit}
|
|
`;
|
|
|
|
const results = await this.articleRepo.query(sql, [tenantId]);
|
|
|
|
return results.map((row: any) => ({
|
|
article: this.toArticleEntityFromRaw(row),
|
|
similarity: parseFloat(row.similarity),
|
|
}));
|
|
}
|
|
|
|
async updateArticle(article: KnowledgeArticleEntity): Promise<void> {
|
|
const orm = this.toArticleORM(article);
|
|
orm.tenantId = this.getTenantId();
|
|
await this.articleRepo.save(orm);
|
|
}
|
|
|
|
async deleteArticle(id: string): Promise<void> {
|
|
await this.articleRepo.delete({ id, tenantId: this.getTenantId() });
|
|
}
|
|
|
|
async countArticles(options?: { category?: string; publishedOnly?: boolean }): Promise<number> {
|
|
const query = this.articleRepo.createQueryBuilder('article')
|
|
.where('article.tenant_id = :tenantId', { tenantId: this.getTenantId() });
|
|
|
|
if (options?.category) {
|
|
query.andWhere('article.category = :category', { category: options.category });
|
|
}
|
|
|
|
if (options?.publishedOnly) {
|
|
query.andWhere('article.isPublished = true');
|
|
}
|
|
|
|
return query.getCount();
|
|
}
|
|
|
|
// ========== 块操作 ==========
|
|
|
|
async saveChunk(chunk: KnowledgeChunkEntity): Promise<void> {
|
|
const orm = this.toChunkORM(chunk);
|
|
orm.tenantId = this.getTenantId();
|
|
await this.chunkRepo.save(orm);
|
|
}
|
|
|
|
async saveChunks(chunks: KnowledgeChunkEntity[]): Promise<void> {
|
|
const tenantId = this.getTenantId();
|
|
const orms = chunks.map(chunk => {
|
|
const orm = this.toChunkORM(chunk);
|
|
orm.tenantId = tenantId;
|
|
return orm;
|
|
});
|
|
await this.chunkRepo.save(orms);
|
|
}
|
|
|
|
async findChunksByArticleId(articleId: string): Promise<KnowledgeChunkEntity[]> {
|
|
const orms = await this.chunkRepo.find({
|
|
where: { articleId, tenantId: this.getTenantId() },
|
|
order: { chunkIndex: 'ASC' },
|
|
});
|
|
return orms.map(orm => this.toChunkEntity(orm));
|
|
}
|
|
|
|
async searchChunksByVector(
|
|
embedding: number[],
|
|
options?: {
|
|
category?: string;
|
|
limit?: number;
|
|
minSimilarity?: number;
|
|
},
|
|
): Promise<Array<{ chunk: KnowledgeChunkEntity; similarity: number }>> {
|
|
const tenantId = this.getTenantId();
|
|
const embeddingStr = `[${embedding.join(',')}]`;
|
|
const limit = options?.limit || 5;
|
|
const minSimilarity = options?.minSimilarity || 0.7;
|
|
|
|
let sql = `
|
|
SELECT c.*,
|
|
1 - (c.embedding <=> '${embeddingStr}'::vector) as similarity
|
|
FROM knowledge_chunks c
|
|
JOIN knowledge_articles a ON c.article_id = a.id
|
|
WHERE c.tenant_id = $1
|
|
AND c.embedding IS NOT NULL
|
|
AND a.is_published = true
|
|
`;
|
|
|
|
if (options?.category) {
|
|
sql += ` AND a.category = '${options.category}'`;
|
|
}
|
|
|
|
sql += `
|
|
AND 1 - (c.embedding <=> '${embeddingStr}'::vector) >= ${minSimilarity}
|
|
ORDER BY similarity DESC
|
|
LIMIT ${limit}
|
|
`;
|
|
|
|
const results = await this.chunkRepo.query(sql, [tenantId]);
|
|
|
|
return results.map((row: any) => ({
|
|
chunk: this.toChunkEntityFromRaw(row),
|
|
similarity: parseFloat(row.similarity),
|
|
}));
|
|
}
|
|
|
|
async deleteChunksByArticleId(articleId: string): Promise<void> {
|
|
await this.chunkRepo.delete({ articleId, tenantId: this.getTenantId() });
|
|
}
|
|
|
|
// ========== 转换方法 ==========
|
|
|
|
private toArticleORM(entity: KnowledgeArticleEntity): KnowledgeArticleORM {
|
|
const orm = new KnowledgeArticleORM();
|
|
orm.id = entity.id;
|
|
orm.tenantId = this.getTenantId();
|
|
orm.title = entity.title;
|
|
orm.content = entity.content;
|
|
orm.summary = entity.summary;
|
|
orm.category = entity.category;
|
|
orm.tags = entity.tags;
|
|
orm.source = entity.source;
|
|
orm.sourceUrl = entity.sourceUrl;
|
|
orm.embedding = entity.embedding;
|
|
orm.isPublished = entity.isPublished;
|
|
orm.citationCount = entity.citationCount;
|
|
orm.helpfulCount = entity.helpfulCount;
|
|
orm.unhelpfulCount = entity.unhelpfulCount;
|
|
orm.qualityScore = entity.qualityScore;
|
|
orm.createdBy = entity.createdBy;
|
|
orm.updatedBy = entity.updatedBy;
|
|
orm.createdAt = entity.createdAt;
|
|
orm.updatedAt = entity.updatedAt;
|
|
return orm;
|
|
}
|
|
|
|
private toArticleEntity(orm: KnowledgeArticleORM): KnowledgeArticleEntity {
|
|
return KnowledgeArticleEntity.fromPersistence({
|
|
id: orm.id,
|
|
title: orm.title,
|
|
content: orm.content,
|
|
summary: orm.summary,
|
|
category: orm.category,
|
|
tags: orm.tags,
|
|
source: orm.source as KnowledgeSource,
|
|
sourceUrl: orm.sourceUrl,
|
|
embedding: orm.embedding,
|
|
isPublished: orm.isPublished,
|
|
citationCount: orm.citationCount,
|
|
helpfulCount: orm.helpfulCount,
|
|
unhelpfulCount: orm.unhelpfulCount,
|
|
qualityScore: orm.qualityScore,
|
|
createdBy: orm.createdBy,
|
|
updatedBy: orm.updatedBy,
|
|
createdAt: orm.createdAt,
|
|
updatedAt: orm.updatedAt,
|
|
});
|
|
}
|
|
|
|
private toArticleEntityFromRaw(row: any): KnowledgeArticleEntity {
|
|
return KnowledgeArticleEntity.fromPersistence({
|
|
id: row.id,
|
|
title: row.title,
|
|
content: row.content,
|
|
summary: row.summary,
|
|
category: row.category,
|
|
tags: row.tags,
|
|
source: row.source as KnowledgeSource,
|
|
sourceUrl: row.source_url,
|
|
embedding: row.embedding,
|
|
isPublished: row.is_published,
|
|
citationCount: row.citation_count,
|
|
helpfulCount: row.helpful_count,
|
|
unhelpfulCount: row.unhelpful_count,
|
|
qualityScore: row.quality_score,
|
|
createdBy: row.created_by,
|
|
updatedBy: row.updated_by,
|
|
createdAt: new Date(row.created_at),
|
|
updatedAt: new Date(row.updated_at),
|
|
});
|
|
}
|
|
|
|
private toChunkORM(entity: KnowledgeChunkEntity): KnowledgeChunkORM {
|
|
const orm = new KnowledgeChunkORM();
|
|
orm.id = entity.id;
|
|
orm.tenantId = this.getTenantId();
|
|
orm.articleId = entity.articleId;
|
|
orm.content = entity.content;
|
|
orm.chunkIndex = entity.chunkIndex;
|
|
orm.chunkType = entity.chunkType;
|
|
orm.embedding = entity.embedding;
|
|
orm.metadata = entity.metadata as Record<string, unknown>;
|
|
orm.tokenCount = entity.tokenCount;
|
|
orm.createdAt = entity.createdAt;
|
|
return orm;
|
|
}
|
|
|
|
private toChunkEntity(orm: KnowledgeChunkORM): KnowledgeChunkEntity {
|
|
return KnowledgeChunkEntity.fromPersistence({
|
|
id: orm.id,
|
|
articleId: orm.articleId,
|
|
content: orm.content,
|
|
chunkIndex: orm.chunkIndex,
|
|
chunkType: orm.chunkType as ChunkType,
|
|
embedding: orm.embedding,
|
|
metadata: orm.metadata,
|
|
tokenCount: orm.tokenCount,
|
|
createdAt: orm.createdAt,
|
|
});
|
|
}
|
|
|
|
private toChunkEntityFromRaw(row: any): KnowledgeChunkEntity {
|
|
return KnowledgeChunkEntity.fromPersistence({
|
|
id: row.id,
|
|
articleId: row.article_id,
|
|
content: row.content,
|
|
chunkIndex: row.chunk_index,
|
|
chunkType: row.chunk_type as ChunkType,
|
|
embedding: row.embedding,
|
|
metadata: row.metadata,
|
|
tokenCount: row.token_count,
|
|
createdAt: new Date(row.created_at),
|
|
});
|
|
}
|
|
}
|