.

2024-10-28 22:18:43 +08:00 · 2024-10-28 22:18:43 +08:00 · ea4dee565a
parent b40708d0ed
commit ea4dee565a
2 changed files with 71 additions and 2 deletions
--- a/app.py
+++ b/app.py
@ -2,11 +2,11 @@ from flask import Flask, render_template, request, redirect, url_for
 import uuid
 import logging
 import json
-from elasticsearch import Elasticsearch
+from elasticsearch import Elasticsearch, helpers
 from elasticsearch.exceptions import ApiError, TransportError
 from dataclasses import dataclass, asdict, field
 from typing import List, Dict, Any, Optional
-
+import json
 # 配置日志级别、格式和输出位置
 logging.basicConfig(
@ -81,6 +81,62 @@ def load_indices_from_es(key: str, index: str, buf: Dict[str, Any]) -> Any:
        return None
 def extract_index_structure(es_client, index_name):
    """
    扫描 Elasticsearch 索引中的一个文档，提取字段结构（包括嵌套字段），生成包含所有字段的层级关系和类型信息。
    :param es_client: Elasticsearch 客户端实例
    :param index_name: 要提取的索引名称
    :return: 包含字段结构的字典
    """
    try:
        # 获取索引中的一个文档
        response = es_client.search(index=index_name, size=1)
        field_structure = {}
        # 递归解析字段结构
        def parse_document(doc, parent_key="", existing_fields=None):
            if existing_fields is None:
                existing_fields = {}
            if isinstance(doc, dict):
                for key, value in doc.items():
                    full_key = f"{parent_key}.{key}" if parent_key else key
                    if full_key not in existing_fields:
                        existing_fields[full_key] = type(value).__name__
                    if isinstance(value, dict):
                        parse_document(value, full_key, existing_fields)
                    elif isinstance(value, list) and len(value) > 0:
                        if isinstance(value[0], dict):
                            parse_document(value[0], full_key, existing_fields)
                        else:
                            existing_fields[full_key] = f"list[{type(value[0]).__name__}]"
                    else:
                        existing_fields[full_key] = type(value).__name__
            elif isinstance(doc, list):
                if len(doc) > 0:
                    if isinstance(doc[0], dict):
                        parse_document(doc[0], parent_key, existing_fields)
                    else:
                        existing_fields[parent_key] = f"list[{type(doc[0]).__name__}]"
            else:
                existing_fields[parent_key] = type(doc).__name__
            return existing_fields
        # 解析获取到的第一个文档的字段结构
        if 'hits' in response and 'hits' in response['hits'] and len(response['hits']['hits']) > 0:
            source = response['hits']['hits'][0].get('_source', {})
            field_structure = parse_document(source)
        return field_structure
    except Exception as e:
        print(f"Error extracting structure for index {index_name}: {e}")
        return {}
@app.route('/')
 def index():
    # 默认展示第一个客户
@ -88,6 +144,9 @@ def index():
 def calculate_statistics():
    total_emails = 0
    cold_lead_count = 0
@ -215,3 +274,11 @@ def search_by_email():
    # 如果没有找到匹配的邮件地址
    return "Email address not found.", 404
 # 提取字段结构
 field_structure = extract_index_structure(es, 'customer')
 # 打印字段结构
 print(json.dumps(field_structure, indent=2, ensure_ascii=False))
--- a/caller.py
+++ b/caller.py
@ -14,4 +14,6 @@ if not ret:
 customer_data=buf[key]['customer database']
 app.set_customer_data(customer_data)
 app.app.run(host="0.0.0.0", port=5001)