iconsulting/packages/services/knowledge-service/src/adapters/outbound/persistence/knowledge-postgres.reposito...

348 lines
11 KiB
TypeScript

import { Injectable } from '@nestjs/common';
import { InjectRepository } from '@nestjs/typeorm';
import { Repository, ILike } from 'typeorm';
import { TenantContextService } from '@iconsulting/shared';
import { IKnowledgeRepository } from '../../../domain/repositories/knowledge.repository.interface';
import { KnowledgeArticleEntity, KnowledgeSource } from '../../../domain/entities/knowledge-article.entity';
import { KnowledgeChunkEntity, ChunkType } from '../../../domain/entities/knowledge-chunk.entity';
import { KnowledgeArticleORM } from '../../../infrastructure/database/postgres/entities/knowledge-article.orm';
import { KnowledgeChunkORM } from '../../../infrastructure/database/postgres/entities/knowledge-chunk.orm';
@Injectable()
export class KnowledgePostgresRepository implements IKnowledgeRepository {
constructor(
@InjectRepository(KnowledgeArticleORM)
private articleRepo: Repository<KnowledgeArticleORM>,
@InjectRepository(KnowledgeChunkORM)
private chunkRepo: Repository<KnowledgeChunkORM>,
private readonly tenantContext: TenantContextService,
) {}
private getTenantId(): string {
const id = this.tenantContext.getCurrentTenantId();
if (!id) throw new Error('Tenant context not set');
return id;
}
// ========== 文章操作 ==========
async saveArticle(article: KnowledgeArticleEntity): Promise<void> {
const orm = this.toArticleORM(article);
orm.tenantId = this.getTenantId();
await this.articleRepo.save(orm);
}
async findArticleById(id: string): Promise<KnowledgeArticleEntity | null> {
const orm = await this.articleRepo.findOne({
where: { id, tenantId: this.getTenantId() },
});
return orm ? this.toArticleEntity(orm) : null;
}
async findArticlesByCategory(
category: string,
options?: { publishedOnly?: boolean; limit?: number; offset?: number },
): Promise<KnowledgeArticleEntity[]> {
const query = this.articleRepo.createQueryBuilder('article')
.where('article.tenant_id = :tenantId', { tenantId: this.getTenantId() })
.andWhere('article.category = :category', { category });
if (options?.publishedOnly) {
query.andWhere('article.isPublished = true');
}
query.orderBy('article.qualityScore', 'DESC')
.addOrderBy('article.createdAt', 'DESC');
if (options?.limit) {
query.take(options.limit);
}
if (options?.offset) {
query.skip(options.offset);
}
const orms = await query.getMany();
return orms.map(orm => this.toArticleEntity(orm));
}
async searchArticles(
queryStr: string,
options?: { category?: string; publishedOnly?: boolean; limit?: number },
): Promise<KnowledgeArticleEntity[]> {
const query = this.articleRepo.createQueryBuilder('article')
.where('article.tenant_id = :tenantId', { tenantId: this.getTenantId() })
.andWhere('(article.title ILIKE :search OR article.content ILIKE :search)', {
search: `%${queryStr}%`,
});
if (options?.category) {
query.andWhere('article.category = :category', { category: options.category });
}
if (options?.publishedOnly) {
query.andWhere('article.isPublished = true');
}
query.orderBy('article.qualityScore', 'DESC')
.take(options?.limit || 10);
const orms = await query.getMany();
return orms.map(orm => this.toArticleEntity(orm));
}
async searchArticlesByVector(
embedding: number[],
options?: {
category?: string;
publishedOnly?: boolean;
limit?: number;
minSimilarity?: number;
},
): Promise<Array<{ article: KnowledgeArticleEntity; similarity: number }>> {
const tenantId = this.getTenantId();
const embeddingStr = `[${embedding.join(',')}]`;
const limit = options?.limit || 5;
const minSimilarity = options?.minSimilarity || 0.7;
let sql = `
SELECT *,
1 - (embedding <=> '${embeddingStr}'::vector) as similarity
FROM knowledge_articles
WHERE tenant_id = $1
AND embedding IS NOT NULL
`;
if (options?.category) {
sql += ` AND category = '${options.category}'`;
}
if (options?.publishedOnly) {
sql += ` AND is_published = true`;
}
sql += `
AND 1 - (embedding <=> '${embeddingStr}'::vector) >= ${minSimilarity}
ORDER BY similarity DESC
LIMIT ${limit}
`;
const results = await this.articleRepo.query(sql, [tenantId]);
return results.map((row: any) => ({
article: this.toArticleEntityFromRaw(row),
similarity: parseFloat(row.similarity),
}));
}
async updateArticle(article: KnowledgeArticleEntity): Promise<void> {
const orm = this.toArticleORM(article);
orm.tenantId = this.getTenantId();
await this.articleRepo.save(orm);
}
async deleteArticle(id: string): Promise<void> {
await this.articleRepo.delete({ id, tenantId: this.getTenantId() });
}
async countArticles(options?: { category?: string; publishedOnly?: boolean }): Promise<number> {
const query = this.articleRepo.createQueryBuilder('article')
.where('article.tenant_id = :tenantId', { tenantId: this.getTenantId() });
if (options?.category) {
query.andWhere('article.category = :category', { category: options.category });
}
if (options?.publishedOnly) {
query.andWhere('article.isPublished = true');
}
return query.getCount();
}
// ========== 块操作 ==========
async saveChunk(chunk: KnowledgeChunkEntity): Promise<void> {
const orm = this.toChunkORM(chunk);
orm.tenantId = this.getTenantId();
await this.chunkRepo.save(orm);
}
async saveChunks(chunks: KnowledgeChunkEntity[]): Promise<void> {
const tenantId = this.getTenantId();
const orms = chunks.map(chunk => {
const orm = this.toChunkORM(chunk);
orm.tenantId = tenantId;
return orm;
});
await this.chunkRepo.save(orms);
}
async findChunksByArticleId(articleId: string): Promise<KnowledgeChunkEntity[]> {
const orms = await this.chunkRepo.find({
where: { articleId, tenantId: this.getTenantId() },
order: { chunkIndex: 'ASC' },
});
return orms.map(orm => this.toChunkEntity(orm));
}
async searchChunksByVector(
embedding: number[],
options?: {
category?: string;
limit?: number;
minSimilarity?: number;
},
): Promise<Array<{ chunk: KnowledgeChunkEntity; similarity: number }>> {
const tenantId = this.getTenantId();
const embeddingStr = `[${embedding.join(',')}]`;
const limit = options?.limit || 5;
const minSimilarity = options?.minSimilarity || 0.7;
let sql = `
SELECT c.*,
1 - (c.embedding <=> '${embeddingStr}'::vector) as similarity
FROM knowledge_chunks c
JOIN knowledge_articles a ON c.article_id = a.id
WHERE c.tenant_id = $1
AND c.embedding IS NOT NULL
AND a.is_published = true
`;
if (options?.category) {
sql += ` AND a.category = '${options.category}'`;
}
sql += `
AND 1 - (c.embedding <=> '${embeddingStr}'::vector) >= ${minSimilarity}
ORDER BY similarity DESC
LIMIT ${limit}
`;
const results = await this.chunkRepo.query(sql, [tenantId]);
return results.map((row: any) => ({
chunk: this.toChunkEntityFromRaw(row),
similarity: parseFloat(row.similarity),
}));
}
async deleteChunksByArticleId(articleId: string): Promise<void> {
await this.chunkRepo.delete({ articleId, tenantId: this.getTenantId() });
}
// ========== 转换方法 ==========
private toArticleORM(entity: KnowledgeArticleEntity): KnowledgeArticleORM {
const orm = new KnowledgeArticleORM();
orm.id = entity.id;
orm.tenantId = this.getTenantId();
orm.title = entity.title;
orm.content = entity.content;
orm.summary = entity.summary;
orm.category = entity.category;
orm.tags = entity.tags;
orm.source = entity.source;
orm.sourceUrl = entity.sourceUrl;
orm.embedding = entity.embedding;
orm.isPublished = entity.isPublished;
orm.citationCount = entity.citationCount;
orm.helpfulCount = entity.helpfulCount;
orm.unhelpfulCount = entity.unhelpfulCount;
orm.qualityScore = entity.qualityScore;
orm.createdBy = entity.createdBy;
orm.updatedBy = entity.updatedBy;
orm.createdAt = entity.createdAt;
orm.updatedAt = entity.updatedAt;
return orm;
}
private toArticleEntity(orm: KnowledgeArticleORM): KnowledgeArticleEntity {
return KnowledgeArticleEntity.fromPersistence({
id: orm.id,
title: orm.title,
content: orm.content,
summary: orm.summary,
category: orm.category,
tags: orm.tags,
source: orm.source as KnowledgeSource,
sourceUrl: orm.sourceUrl,
embedding: orm.embedding,
isPublished: orm.isPublished,
citationCount: orm.citationCount,
helpfulCount: orm.helpfulCount,
unhelpfulCount: orm.unhelpfulCount,
qualityScore: orm.qualityScore,
createdBy: orm.createdBy,
updatedBy: orm.updatedBy,
createdAt: orm.createdAt,
updatedAt: orm.updatedAt,
});
}
private toArticleEntityFromRaw(row: any): KnowledgeArticleEntity {
return KnowledgeArticleEntity.fromPersistence({
id: row.id,
title: row.title,
content: row.content,
summary: row.summary,
category: row.category,
tags: row.tags,
source: row.source as KnowledgeSource,
sourceUrl: row.source_url,
embedding: row.embedding,
isPublished: row.is_published,
citationCount: row.citation_count,
helpfulCount: row.helpful_count,
unhelpfulCount: row.unhelpful_count,
qualityScore: row.quality_score,
createdBy: row.created_by,
updatedBy: row.updated_by,
createdAt: new Date(row.created_at),
updatedAt: new Date(row.updated_at),
});
}
private toChunkORM(entity: KnowledgeChunkEntity): KnowledgeChunkORM {
const orm = new KnowledgeChunkORM();
orm.id = entity.id;
orm.tenantId = this.getTenantId();
orm.articleId = entity.articleId;
orm.content = entity.content;
orm.chunkIndex = entity.chunkIndex;
orm.chunkType = entity.chunkType;
orm.embedding = entity.embedding;
orm.metadata = entity.metadata as Record<string, unknown>;
orm.tokenCount = entity.tokenCount;
orm.createdAt = entity.createdAt;
return orm;
}
private toChunkEntity(orm: KnowledgeChunkORM): KnowledgeChunkEntity {
return KnowledgeChunkEntity.fromPersistence({
id: orm.id,
articleId: orm.articleId,
content: orm.content,
chunkIndex: orm.chunkIndex,
chunkType: orm.chunkType as ChunkType,
embedding: orm.embedding,
metadata: orm.metadata,
tokenCount: orm.tokenCount,
createdAt: orm.createdAt,
});
}
private toChunkEntityFromRaw(row: any): KnowledgeChunkEntity {
return KnowledgeChunkEntity.fromPersistence({
id: row.id,
articleId: row.article_id,
content: row.content,
chunkIndex: row.chunk_index,
chunkType: row.chunk_type as ChunkType,
embedding: row.embedding,
metadata: row.metadata,
tokenCount: row.token_count,
createdAt: new Date(row.created_at),
});
}
}