evalscope/tests/aigc/test_t2i.py

88 lines
2.6 KiB
Python

from dotenv import dotenv_values
env = dotenv_values('.env')
import os
import unittest
from evalscope.config import TaskConfig
from evalscope.constants import EvalType, JudgeStrategy, ModelTask, OutputType
from evalscope.run import run_task
from evalscope.utils import test_level_list
from evalscope.utils.logger import get_logger
os.environ['LOG_LEVEL'] = 'DEBUG'
logger = get_logger()
class TestRun(unittest.TestCase):
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
def test_run_general(self):
from evalscope.config import TaskConfig
task_cfg = TaskConfig(
datasets=[
'general_t2i'
],
dataset_args={
'general_t2i': {
'metric_list': [
'PickScore',
'CLIPScore',
'HPSv2Score',
'HPSv2.1Score',
'BLIPv2Score',
'ImageRewardScore',
'VQAScore',
'FGA_BLIP2Score',
'MPS'
],
'dataset_id': 'custom_eval/multimodal/t2i/example.jsonl',
}
}
)
run_task(task_cfg=task_cfg)
@unittest.skipUnless(0 in test_level_list(), 'skip test in current test level')
def test_run_benchmark(self):
task_cfg = TaskConfig(
model='stabilityai/stable-diffusion-xl-base-1.0', # model on modelscope
model_task=ModelTask.IMAGE_GENERATION, # must be IMAGE_GENERATION
model_args={
'use_safetensors': True,
'variant': 'fp16',
'torch_dtype': 'torch.float16',
},
datasets=[
'tifa160',
# 'genai_bench',
# 'evalmuse',
# 'hpdv2',
],
dataset_args={
'tifa160': {
'metric_list': [
'PickScore',
# 'CLIPScore',
# 'HPSv2Score',
# 'BLIPv2Score',
# 'ImageRewardScore',
# 'VQAScore',
# 'FGA_BLIP2Score',
]
}
},
limit=5,
generation_config={
'num_inference_steps': 50,
'guidance_scale': 7.5
},
# use_cache='outputs/20250427_134122',
)
run_task(task_cfg=task_cfg)