From 7886a510c1fefd78472011c2867d3dc7096cee20 Mon Sep 17 00:00:00 2001 From: Maximilian Schmidt <maximilian.schmidt@ims.uni-stuttgart.de> Date: Wed, 10 Apr 2024 14:00:27 +0200 Subject: [PATCH] Re-add model usage for evolution and evaluation --- evolution.py | 26 +++++++++++++++++++------- optimization.py | 9 +++++---- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/evolution.py b/evolution.py index 03d0e59..9a935fa 100644 --- a/evolution.py +++ b/evolution.py @@ -2,7 +2,7 @@ from abc import abstractmethod from models import LLMModel from numpy.random import choice -from opt_types import Prompt +from opt_types import ModelUsage, Prompt from optimization import PromptOptimization from task import Task from tqdm import trange @@ -93,9 +93,16 @@ class EvolutionAlgorithm(PromptOptimization): self.population_size = 3 num_iterations = 2 + # model usage for evaluation of prompts + total_evaluation_usage = ModelUsage() + # model usage for evolution of prompts + total_evolution_usage = ModelUsage() + run_directory = initialize_run_directory(self.evolution_model) - initial_prompts, _ = self.init_run(self.population_size) + initial_prompts, evolution_usage, evaluation_usage = self.init_run(self.population_size) + total_evaluation_usage += evaluation_usage + total_evolution_usage += evolution_usage # Algorithm 1 Discrete prompt optimization: EVOPROMPT @@ -116,14 +123,16 @@ class EvolutionAlgorithm(PromptOptimization): # Line 4: Evolution: generate a new prompt based on the selected parent prompts by leveraging LLM to perform evolutionary operators # p′i â†Evo(pr1,...,prk) - p_i, _ = self.evolve( + p_i, evolution_usage = self.evolve( pr1, pr2, prompts_current_evolution=prompts_current_evolution, current_iteration=i, ) + total_evolution_usage += evolution_usage evolved_prompt = self.add_prompt(p_i, (pr1, pr2), {"gen": t}) + evaluation_usage += evolved_prompt.usage new_evolutions.append(evolved_prompt) # Line 6: Update based on the evaluation scores @@ -144,9 +153,9 @@ class EvolutionAlgorithm(PromptOptimization): self.task, self.evolution_model, # model usage for evaluating prompts - self.evaluation_model.usage, + total_evaluation_usage, # model usage for evolution of prompts - self.evolution_model.usage, + total_evolution_usage, add_snapshot_dict, ) # Line 8: Return the best prompt, p∗, among the final population PT : @@ -155,8 +164,11 @@ class EvolutionAlgorithm(PromptOptimization): logger.info(f"Best prompt: {p}") # We pick the prompt with the highest score on the development set and report its score on the testset. - test_performance = self.task.evaluate_test(p.content) - logger.info(f"Best prompt on test set: {test_performance}") + test_performance, _ = self.task.evaluate_test(p.content) + logger.info("Best prompt on test set: %s", test_performance) + logger.info("Usage (evolution model / evaluation model / total): %s / %s / %s", total_evolution_usage, total_evaluation_usage, total_evolution_usage + total_evaluation_usage) + + return total_evolution_usage, total_evaluation_usage class GeneticAlgorithm(EvolutionAlgorithm): diff --git a/optimization.py b/optimization.py index 7d70713..d379907 100644 --- a/optimization.py +++ b/optimization.py @@ -89,9 +89,9 @@ class PromptOptimization: def get_prompts(self, prompt_ids: list[str]): return [self.get_prompt(p_id) for p_id in prompt_ids] - def init_run(self, num_initial_prompts: int) -> tuple[list[Prompt], ModelUsage]: + def init_run(self, num_initial_prompts: int) -> tuple[list[Prompt], ModelUsage, ModelUsage]: # - Initial prompts P0 = {p1, p2, . . . , pN } - paraphrases, usage = paraphrase_prompts( + paraphrases, paraphrase_usage = paraphrase_prompts( self.evolution_model, self.task.base_prompt, n=num_initial_prompts - 1 ) @@ -102,7 +102,8 @@ class PromptOptimization: ) # accumulate usage + evaluation_usage = ModelUsage() for prompt in initial_prompts: - usage += prompt.usage + evaluation_usage += prompt.usage - return initial_prompts, usage + return initial_prompts, paraphrase_usage, evaluation_usage -- GitLab