diff --git a/eval_prompt.py b/eval_prompt.py index 3092c163dba937815478eb8118cac6ac4372e11c..06a6375b4e683bfbb0d7bb0f14a7ac8ad71a83b7 100644 --- a/eval_prompt.py +++ b/eval_prompt.py @@ -15,9 +15,9 @@ def evaluate_prompt(prompt: str, task_name: str, args: Namespace): logger.info(f'Evaluating prompt "{prompt}"') evaluation_model = LLMModel.get_model( - name="hfchat", + # name="hfchat", # name="alpacahfchat", - # name="llamachat", + name="llamachat", # model="PKU-Alignment/alpaca-7b-reproduced", # model="chavinlo/alpaca-native", load_in_8bit=True, @@ -42,24 +42,28 @@ def evaluate_prompt(prompt: str, task_name: str, args: Namespace): if __name__ == "__main__": def main(): - init_rng(1) - setup_console_logger() - argparser = argparse.ArgumentParser() argparser.add_argument("-p", "--prompt", type=str, required=True) argparser.add_argument( "-t", "--task", type=str, choices=sorted(tasks.keys()), required=True ) + argparser.add_argument( + "-v", "--verbose", action="count", default=0, help="Increase verbosity" + ) args = argparser.parse_args() + init_rng(1) + setup_console_logger(verbosity_level=args.verbose) + options = Namespace( llama_path=None, chat_format=None, chat_handler=None, - verbose=False, llama_verbose=False, llama_model="QuantFactory/Meta-Llama-3.1-8B-Instruct-GGUF", + # llama_model="TheBloke/Llama-2-70B-Chat-GGUF", llama_model_file="Meta-Llama-3.1-8B-Instruct.Q8_0.gguf", + # llama_model_file="llama-2-70b-chat.Q4_K_M.gguf", disable_cache=False, use_grammar=False, evaluation_strategy="simple", diff --git a/main.py b/main.py index 2256754fe0d6b7cd11f27674302743d46556b63a..40c8d25dd7e2126e8f5acc83aa5548391e9b532f 100644 --- a/main.py +++ b/main.py @@ -9,7 +9,7 @@ from weave.trace.settings import UserSettings from evoprompt.cli import argument_parser from evoprompt.evolution import get_optimizer_class -from evoprompt.models import HfLlamaChat, Llama, LlamaChat, LLMModel +from evoprompt.models import HfChat, Llama, LlamaChat, LLMModel from evoprompt.task import get_task from evoprompt.utils import init_rng, setup_console_logger @@ -86,26 +86,26 @@ if __name__ == "__main__": logger.info("DEBUG mode: Do a quick run") # set up evolution model - evolution_model = LLMModel.get_model(options.evolution_engine, options=options) + evolution_model = LLMModel.get_model(options.evolution_engine, **vars(options)) logger.info( f"Using {evolution_model.__class__.__name__.lower()} as the evolution engine" ) judge_model: LLMModel | None = None if options.judge_engine is not None: - judge_model = LLMModel.get_model(options.judge_engine, options=options) + judge_model = LLMModel.get_model(options.judge_engine, **vars(options)) logger.info(f"Using {options.judge_engine} as the judge engine") # set up evaluation model # NOTE currenty we always stick to Llama (Llama or LlamaChat depending on evolution engine) as evaluation engine # TODO allow to set separate engine and model for evaluation? - if isinstance(evolution_model, (Llama, LlamaChat, HfLlamaChat)): + if isinstance(evolution_model, (Llama, LlamaChat, HfChat)): evaluation_model_name = evolution_model.__class__.__name__.lower() elif judge_model is not None and isinstance(judge_model, (Llama, LlamaChat)): evaluation_model_name = judge_model.__class__.__name__.lower() else: evaluation_model_name = "llamachat" - evaluation_model = LLMModel.get_model(name=evaluation_model_name, options=options) + evaluation_model = LLMModel.get_model(name=evaluation_model_name, **vars(options)) logger.info(f"Using {evaluation_model_name} as the evaluation engine") task = get_task(