import random import string random.seed(42) def gen_prompt(tokenizer, token_num): cha_set = string.ascii_letters + string.digits ret = "".join(random.choices(cha_set, k=token_num)) while len(tokenizer(ret).input_ids) < token_num: ret += random.choice(cha_set) return ret def gen_arguments(args, tokenizer): multi_qas = [{"qas": []} for _ in range(args.num_qa)] for i in range(args.num_qa): qas = multi_qas[i]["qas"] for _ in range(args.turns): prompt_len = random.randint(args.min_len_q, args.max_len_q) new_tokens = random.randint(args.min_len_a, args.max_len_a) qas.append( { "prompt": gen_prompt(tokenizer, prompt_len), "new_tokens": new_tokens, } ) return multi_qas