Skip to content
Snippets Groups Projects
Commit 7fcaaca7 authored by Grießhaber Daniel's avatar Grießhaber Daniel :squid:
Browse files

save snapshot during evolution process

parent 3cef8395
No related branches found
No related tags found
No related merge requests found
...@@ -53,6 +53,7 @@ class MultiProcessOptimizer: ...@@ -53,6 +53,7 @@ class MultiProcessOptimizer:
task=task, task=task,
evolution_model=evolution_model, evolution_model=evolution_model,
evaluation_model=evaluation_model, evaluation_model=evaluation_model,
run_options=options.__dict__,
) )
def __exit__(self, exc_type, exc_value, exc_tb): def __exit__(self, exc_type, exc_value, exc_tb):
...@@ -89,7 +90,8 @@ class MultiProcessOptimizer: ...@@ -89,7 +90,8 @@ class MultiProcessOptimizer:
def run_optimization(self, num_iterations: int) -> str: def run_optimization(self, num_iterations: int) -> str:
self._running = True self._running = True
self.optimizer.run(num_iterations, debug=self.debug, add_snapshot_dict={}) self.optimizer
self.optimizer.run(num_iterations, debug=self.debug)
self._running = False self._running = False
def get_progress(self): def get_progress(self):
......
from abc import abstractmethod from abc import abstractmethod
from typing import Any
from numpy.random import choice from numpy.random import choice
from tqdm import trange from tqdm import trange
...@@ -6,9 +7,9 @@ from tqdm import trange ...@@ -6,9 +7,9 @@ from tqdm import trange
from cli import argument_parser from cli import argument_parser
from models import LLMModel from models import LLMModel
from opt_types import ModelUsage, Prompt from opt_types import ModelUsage, Prompt
from optimization import PromptOptimization, save_snapshot from optimization import PromptOptimization
from task import Task from task import Task
from utils import initialize_run_directory, log_calls, logger from utils import log_calls, logger
SYSTEM_MESSAGE = ( SYSTEM_MESSAGE = (
"Please follow the instruction step-by-step to generate a better prompt." "Please follow the instruction step-by-step to generate a better prompt."
...@@ -46,11 +47,13 @@ class EvolutionAlgorithm(PromptOptimization): ...@@ -46,11 +47,13 @@ class EvolutionAlgorithm(PromptOptimization):
task: Task, task: Task,
evolution_model: LLMModel, evolution_model: LLMModel,
evaluation_model: LLMModel, evaluation_model: LLMModel,
run_options: dict[str, Any] = {},
) -> None: ) -> None:
super().__init__( super().__init__(
task=task, task=task,
evolution_model=evolution_model, evolution_model=evolution_model,
evaluation_model=evaluation_model, evaluation_model=evaluation_model,
run_options=run_options,
) )
self.population_size = population_size self.population_size = population_size
...@@ -88,46 +91,27 @@ class EvolutionAlgorithm(PromptOptimization): ...@@ -88,46 +91,27 @@ class EvolutionAlgorithm(PromptOptimization):
def update(self, *args, **kwargs): def update(self, *args, **kwargs):
pass pass
def run( def run(self, num_iterations: int, debug: bool = False) -> None:
self, num_iterations: int, add_snapshot_dict: dict, debug: bool = False
) -> None:
# debug mode for quick run # debug mode for quick run
if debug: if debug:
self.population_size = 3 self.population_size = 3
num_iterations = 2 num_iterations = 2
# model usage for evaluation of prompts self.init_run(self.population_size, num_iterations)
total_evaluation_usage = ModelUsage()
# model usage for evolution of prompts
total_evolution_usage = ModelUsage()
run_directory = initialize_run_directory(self.evolution_model)
initial_prompts, evolution_usage, evaluation_usage = self.init_run(
self.population_size
)
total_evaluation_usage += evaluation_usage
total_evolution_usage += evolution_usage
# Algorithm 1 Discrete prompt optimization: EVOPROMPT # Algorithm 1 Discrete prompt optimization: EVOPROMPT
# P keeps track of prompts in each generation
P = [initial_prompts]
# Line 2: # Line 2:
self.iterations_pbar = trange(
1, num_iterations + 1, desc="iterations", leave=True
)
for t in self.iterations_pbar: for t in self.iterations_pbar:
# Line 3: Selection: select a certain number of prompts from current population as parent prompts # Line 3: Selection: select a certain number of prompts from current population as parent prompts
# pr1,...,prk ∼ Pt−1 # pr1,...,prk ∼ Pt−1
prompts_current_evolution = P[t - 1] prompts_current_evolution = self.P[t - 1]
new_evolutions = [] new_evolutions = []
for i in trange(self.population_size, desc="updates", leave=False): for i in trange(self.population_size, desc="updates", leave=False):
# for both GA and DE we start with two parent prompts # for both GA and DE we start with two parent prompts
pr1, pr2 = self.select(P[t - 1]) pr1, pr2 = self.select(self.P[t - 1])
# Line 4: Evolution: generate a new prompt based on the selected parent prompts by leveraging LLM to perform evolutionary operators # Line 4: Evolution: generate a new prompt based on the selected parent prompts by leveraging LLM to perform evolutionary operators
# p′i ←Evo(pr1,...,prk) # p′i ←Evo(pr1,...,prk)
...@@ -137,38 +121,25 @@ class EvolutionAlgorithm(PromptOptimization): ...@@ -137,38 +121,25 @@ class EvolutionAlgorithm(PromptOptimization):
prompts_current_evolution=prompts_current_evolution, prompts_current_evolution=prompts_current_evolution,
current_iteration=i, current_iteration=i,
) )
total_evolution_usage += evolution_usage self.total_evolution_usage += evolution_usage
evolved_prompt = self.add_prompt(p_i, (pr1, pr2), {"gen": t}) evolved_prompt = self.add_prompt(p_i, (pr1, pr2), {"gen": t})
evaluation_usage += evolved_prompt.usage self.total_evaluation_usage += evolved_prompt.usage
new_evolutions.append(evolved_prompt) new_evolutions.append(evolved_prompt)
self.save_snapshot()
# Line 6: Update based on the evaluation scores # Line 6: Update based on the evaluation scores
# Pt ← {Pt−1, p′i} and St ← {St−1, s′i} # Pt ← {Pt−1, p′i} and St ← {St−1, s′i}
new_population = self.update(new_evolutions, prompts_current_evolution) new_population = self.update(new_evolutions, prompts_current_evolution)
# store new generation # store new generation
P.append(new_population) self.P.append(new_population)
self.save_snapshot()
# TODO move to super class
save_snapshot( self.save_snapshot()
run_directory,
self.all_prompts,
self.family_tree,
[[prompt.id for prompt in population] for population in P],
num_iterations,
self.population_size,
self.task,
self.evolution_model,
# model usage for evaluating prompts
total_evaluation_usage,
# model usage for evolution of prompts
total_evolution_usage,
add_snapshot_dict,
)
# Line 8: Return the best prompt, p∗, among the final population PT : # Line 8: Return the best prompt, p∗, among the final population PT :
# p∗ ← argmaxp∈PT f(p, D) # p∗ ← argmaxp∈PT f(p, D)
p = max(P[-1], key=lambda prompt: self.all_prompts[prompt.id].score) p = max(self.P[-1], key=lambda prompt: self.all_prompts[prompt.id].score)
logger.info(f"Best prompt: {p}") logger.info(f"Best prompt: {p}")
# We pick the prompt with the highest score on the development set and report its score on the testset. # We pick the prompt with the highest score on the development set and report its score on the testset.
...@@ -176,12 +147,12 @@ class EvolutionAlgorithm(PromptOptimization): ...@@ -176,12 +147,12 @@ class EvolutionAlgorithm(PromptOptimization):
logger.info("Best prompt on test set: %s", test_performance) logger.info("Best prompt on test set: %s", test_performance)
logger.info( logger.info(
"Usage (evolution model / evaluation model / total): %s / %s / %s", "Usage (evolution model / evaluation model / total): %s / %s / %s",
total_evolution_usage, self.total_evolution_usage,
total_evaluation_usage, self.total_evaluation_usage,
total_evolution_usage + total_evaluation_usage, self.total_evolution_usage + self.total_evaluation_usage,
) )
return total_evolution_usage, total_evaluation_usage return self.total_evolution_usage, self.total_evaluation_usage
class GeneticAlgorithm(EvolutionAlgorithm): class GeneticAlgorithm(EvolutionAlgorithm):
......
Subproject commit d430de1597342eedf0cede1873507a3ffaa28dbb Subproject commit 6e7b5edc0e5b34fe100f8b2f46c0117d861c90ee
...@@ -74,5 +74,6 @@ if __name__ == "__main__": ...@@ -74,5 +74,6 @@ if __name__ == "__main__":
task=task, task=task,
evolution_model=evolution_model, evolution_model=evolution_model,
evaluation_model=evaluation_model, evaluation_model=evaluation_model,
run_options=options.__dict__,
) )
optimizer.run(10, debug=debug, add_snapshot_dict=options.__dict__) optimizer.run(10, debug=debug)
import json import json
from abc import abstractmethod
from itertools import zip_longest from itertools import zip_longest
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
from tqdm import trange
from models import Llama2, LLMModel, OpenAI from models import Llama2, LLMModel, OpenAI
from opt_types import ModelUsage, OptTypeEncoder, Prompt from opt_types import ModelUsage, OptTypeEncoder, Prompt
from task import Task from task import Task
from utils import log_calls from utils import initialize_run_directory, log_calls
PARAPHRASE_PROMPT = """You are given an instruction that describes a task. Write a response that paraphrases the instruction. Only output the paraphrased instruction bracketed in <prompt> and </prompt>.""" PARAPHRASE_PROMPT = """You are given an instruction that describes a task. Write a response that paraphrases the instruction. Only output the paraphrased instruction bracketed in <prompt> and </prompt>."""
...@@ -40,22 +43,28 @@ def paraphrase_prompts( ...@@ -40,22 +43,28 @@ def paraphrase_prompts(
class PromptOptimization: class PromptOptimization:
total_evaluation_usage: ModelUsage
total_evolution_usage: ModelUsage
run_directory: Path
# P contains the list of prompts at each generation
P: list[list[Prompt]]
# family_tree contains the relation of prompts to its parents
family_tree: dict[str, tuple[str, ...] | None]
# all_prompts contains a list of Prompt objects that took part in the optimization
all_prompts: dict[str, Prompt]
def __init__( def __init__(
self, *, task: Task, evolution_model: LLMModel, evaluation_model: LLMModel self,
*,
task: Task,
evolution_model: LLMModel,
evaluation_model: LLMModel,
run_options: dict[str, Any] = {}
) -> None: ) -> None:
self.task = task self.task = task
self.evolution_model = evolution_model self.evolution_model = evolution_model
self.evaluation_model = evaluation_model self.evaluation_model = evaluation_model
self._init() self.run_options = run_options
def _init(self):
# family_tree contains the relation of prompts to its parents
self.family_tree: dict[str, tuple[str, ...] | None] = {}
# all_prompts contains a list of Prompt objects that took part in the optimization
self.all_prompts: dict[str, Prompt] = {}
def reset(self):
self._init
def evaluate_prompt(self, prompt: str, parents: tuple[Prompt] | None = None): def evaluate_prompt(self, prompt: str, parents: tuple[Prompt] | None = None):
parent_histories = ( parent_histories = (
...@@ -85,6 +94,7 @@ class PromptOptimization: ...@@ -85,6 +94,7 @@ class PromptOptimization:
self.family_tree[prompt_object.id] = ( self.family_tree[prompt_object.id] = (
tuple(p.id for p in parents) if parents is not None else None tuple(p.id for p in parents) if parents is not None else None
) )
self.save_snapshot()
return prompt_object return prompt_object
...@@ -105,67 +115,69 @@ class PromptOptimization: ...@@ -105,67 +115,69 @@ class PromptOptimization:
def get_prompts(self, prompt_ids: list[str]): def get_prompts(self, prompt_ids: list[str]):
return [self.get_prompt(p_id) for p_id in prompt_ids] return [self.get_prompt(p_id) for p_id in prompt_ids]
def init_run( @abstractmethod
self, num_initial_prompts: int def save_snapshot(self): ...
) -> tuple[list[Prompt], ModelUsage, ModelUsage]:
# - Initial prompts P0 = {p1, p2, . . . , pN } def init_run(self, num_initial_prompts: int, num_iterations: int):
# family_tree contains the relation of prompts to its parents
self.family_tree: dict[str, tuple[str, ...] | None] = {}
# all_prompts contains a list of Prompt objects that took part in the optimization
self.all_prompts: dict[str, Prompt] = {}
self.P = []
self.total_evaluation_usage = ModelUsage()
self.total_evolution_usage = ModelUsage()
self.iterations_pbar = trange(
1, num_iterations + 1, desc="iterations", leave=True
)
self.run_directory = initialize_run_directory(self.evolution_model)
self.save_snapshot()
paraphrases, paraphrase_usage = paraphrase_prompts( paraphrases, paraphrase_usage = paraphrase_prompts(
self.evolution_model, self.task.base_prompt, n=num_initial_prompts - 1 self.evolution_model, self.task.base_prompt, n=num_initial_prompts - 1
) )
self.total_evolution_usage += paraphrase_usage
# the initial prompts # the initial prompts
initial_prompts = [self.task.base_prompt] + paraphrases initial_prompts = [self.task.base_prompt] + paraphrases
initial_prompts = self.add_prompts( initial_prompts = self.add_prompts(
initial_prompts, metas=[{"gen": 0} for _ in initial_prompts] initial_prompts, metas=[{"gen": 0} for _ in initial_prompts]
) )
# - Initial prompts P0 = {p1, p2, . . . , pN }
self.P.append(initial_prompts)
# accumulate usage # accumulate usage
evaluation_usage = ModelUsage()
for prompt in initial_prompts: for prompt in initial_prompts:
evaluation_usage += prompt.usage self.total_evaluation_usage += prompt.usage
self.save_snapshot()
return initial_prompts, paraphrase_usage, evaluation_usage
def save_snapshot(self):
with open(self.run_directory / "snapshot.json", "w") as f:
# TODO turn snapshots methods into instance methods of optimizer json.dump(
def save_snapshot( {
run_directory: Path, "all_prompts": self.all_prompts,
all_prompts: list[Prompt], "family_tree": self.family_tree,
family_tree: dict[str, tuple[str, str] | None], "P": [
P: list[list[str]], [prompt.id for prompt in population] for population in self.P
T: int, ],
N: int, "T": self.family_tree,
task, "N": self.iterations_pbar.n,
model: Llama2 | OpenAI, "task": {
evaluation_usage: ModelUsage, "name": self.task.__class__.__name__,
evolution_usage: ModelUsage, "validation_dataset": self.task.validation_dataset.info.dataset_name,
run_options: dict[str, Any], "test_dataset": self.task.test_dataset.info.dataset_name,
): "metric": self.task.metric_name,
"use_grammar": self.task.use_grammar,
with open(run_directory / "snapshot.json", "w") as f: },
json.dump( "model": {"name": self.evolution_model.__class__.__name__},
{ "evaluation_usage": self.total_evaluation_usage,
"all_prompts": all_prompts, "evolution_usage": self.total_evolution_usage,
"family_tree": family_tree, "run_options": self.run_options,
"P": P,
"T": T,
"N": N,
"task": {
"name": task.__class__.__name__,
"validation_dataset": task.validation_dataset.info.dataset_name,
"test_dataset": task.test_dataset.info.dataset_name,
"metric": task.metric_name,
"use_grammar": task.use_grammar,
}, },
"model": {"name": model.__class__.__name__}, f,
"evaluation_usage": evaluation_usage, indent=4,
"evolution_usage": evolution_usage, cls=OptTypeEncoder,
"run_options": run_options, )
},
f,
indent=4,
cls=OptTypeEncoder,
)
def load_snapshot(path: Path): def load_snapshot(path: Path):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment