evalscope_v0.17.0/evalscope.0.17.0/evalscope/third_party/longbench_write/utils.py

37 lines
815 B
Python

# Copyright (c) Alibaba, Inc. and its affiliates.
import re
def count_words(text):
chinese_characters = re.findall(r'[\u4e00-\u9fff]', text)
english_words = re.findall(r'\b[a-zA-Z]+\b', text)
chinese_char_count = len(chinese_characters)
english_word_count = len(english_words)
total_count = chinese_char_count + english_word_count
is_chinese = chinese_char_count > english_word_count
return total_count, is_chinese
def chinese_to_arabic(chinese_number: str) -> int:
chinese_numerals = {
'': 0,
'': 1,
'': 2,
'': 3,
'': 4,
'': 5,
'': 6,
'': 7,
'': 8,
'': 9,
'': 2,
'': 2,
}
return chinese_numerals.get(chinese_number, chinese_number)