Skip to content
Snippets Groups Projects
Commit 7886a510 authored by Max Kimmich's avatar Max Kimmich
Browse files

Re-add model usage for evolution and evaluation

parent 3f29409c
No related branches found
No related tags found
No related merge requests found
......@@ -2,7 +2,7 @@ from abc import abstractmethod
from models import LLMModel
from numpy.random import choice
from opt_types import Prompt
from opt_types import ModelUsage, Prompt
from optimization import PromptOptimization
from task import Task
from tqdm import trange
......@@ -93,9 +93,16 @@ class EvolutionAlgorithm(PromptOptimization):
self.population_size = 3
num_iterations = 2
# model usage for evaluation of prompts
total_evaluation_usage = ModelUsage()
# model usage for evolution of prompts
total_evolution_usage = ModelUsage()
run_directory = initialize_run_directory(self.evolution_model)
initial_prompts, _ = self.init_run(self.population_size)
initial_prompts, evolution_usage, evaluation_usage = self.init_run(self.population_size)
total_evaluation_usage += evaluation_usage
total_evolution_usage += evolution_usage
# Algorithm 1 Discrete prompt optimization: EVOPROMPT
......@@ -116,14 +123,16 @@ class EvolutionAlgorithm(PromptOptimization):
# Line 4: Evolution: generate a new prompt based on the selected parent prompts by leveraging LLM to perform evolutionary operators
# p′i ←Evo(pr1,...,prk)
p_i, _ = self.evolve(
p_i, evolution_usage = self.evolve(
pr1,
pr2,
prompts_current_evolution=prompts_current_evolution,
current_iteration=i,
)
total_evolution_usage += evolution_usage
evolved_prompt = self.add_prompt(p_i, (pr1, pr2), {"gen": t})
evaluation_usage += evolved_prompt.usage
new_evolutions.append(evolved_prompt)
# Line 6: Update based on the evaluation scores
......@@ -144,9 +153,9 @@ class EvolutionAlgorithm(PromptOptimization):
self.task,
self.evolution_model,
# model usage for evaluating prompts
self.evaluation_model.usage,
total_evaluation_usage,
# model usage for evolution of prompts
self.evolution_model.usage,
total_evolution_usage,
add_snapshot_dict,
)
# Line 8: Return the best prompt, p∗, among the final population PT :
......@@ -155,8 +164,11 @@ class EvolutionAlgorithm(PromptOptimization):
logger.info(f"Best prompt: {p}")
# We pick the prompt with the highest score on the development set and report its score on the testset.
test_performance = self.task.evaluate_test(p.content)
logger.info(f"Best prompt on test set: {test_performance}")
test_performance, _ = self.task.evaluate_test(p.content)
logger.info("Best prompt on test set: %s", test_performance)
logger.info("Usage (evolution model / evaluation model / total): %s / %s / %s", total_evolution_usage, total_evaluation_usage, total_evolution_usage + total_evaluation_usage)
return total_evolution_usage, total_evaluation_usage
class GeneticAlgorithm(EvolutionAlgorithm):
......
......@@ -89,9 +89,9 @@ class PromptOptimization:
def get_prompts(self, prompt_ids: list[str]):
return [self.get_prompt(p_id) for p_id in prompt_ids]
def init_run(self, num_initial_prompts: int) -> tuple[list[Prompt], ModelUsage]:
def init_run(self, num_initial_prompts: int) -> tuple[list[Prompt], ModelUsage, ModelUsage]:
# - Initial prompts P0 = {p1, p2, . . . , pN }
paraphrases, usage = paraphrase_prompts(
paraphrases, paraphrase_usage = paraphrase_prompts(
self.evolution_model, self.task.base_prompt, n=num_initial_prompts - 1
)
......@@ -102,7 +102,8 @@ class PromptOptimization:
)
# accumulate usage
evaluation_usage = ModelUsage()
for prompt in initial_prompts:
usage += prompt.usage
evaluation_usage += prompt.usage
return initial_prompts, usage
return initial_prompts, paraphrase_usage, evaluation_usage
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment