From 7886a510c1fefd78472011c2867d3dc7096cee20 Mon Sep 17 00:00:00 2001
From: Maximilian Schmidt <maximilian.schmidt@ims.uni-stuttgart.de>
Date: Wed, 10 Apr 2024 14:00:27 +0200
Subject: [PATCH] Re-add model usage for evolution and evaluation

---
 evolution.py    | 26 +++++++++++++++++++-------
 optimization.py |  9 +++++----
 2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/evolution.py b/evolution.py
index 03d0e59..9a935fa 100644
--- a/evolution.py
+++ b/evolution.py
@@ -2,7 +2,7 @@ from abc import abstractmethod
 
 from models import LLMModel
 from numpy.random import choice
-from opt_types import Prompt
+from opt_types import ModelUsage, Prompt
 from optimization import PromptOptimization
 from task import Task
 from tqdm import trange
@@ -93,9 +93,16 @@ class EvolutionAlgorithm(PromptOptimization):
             self.population_size = 3
             num_iterations = 2
 
+        # model usage for evaluation of prompts
+        total_evaluation_usage = ModelUsage()
+        # model usage for evolution of prompts
+        total_evolution_usage = ModelUsage()
+
         run_directory = initialize_run_directory(self.evolution_model)
 
-        initial_prompts, _ = self.init_run(self.population_size)
+        initial_prompts, evolution_usage, evaluation_usage = self.init_run(self.population_size)
+        total_evaluation_usage += evaluation_usage
+        total_evolution_usage += evolution_usage
 
         # Algorithm 1 Discrete prompt optimization: EVOPROMPT
 
@@ -116,14 +123,16 @@ class EvolutionAlgorithm(PromptOptimization):
 
                 # Line 4: Evolution: generate a new prompt based on the selected parent prompts by leveraging LLM to perform evolutionary operators
                 # p′i ←Evo(pr1,...,prk)
-                p_i, _ = self.evolve(
+                p_i, evolution_usage = self.evolve(
                     pr1,
                     pr2,
                     prompts_current_evolution=prompts_current_evolution,
                     current_iteration=i,
                 )
+                total_evolution_usage += evolution_usage
 
                 evolved_prompt = self.add_prompt(p_i, (pr1, pr2), {"gen": t})
+                evaluation_usage += evolved_prompt.usage
 
                 new_evolutions.append(evolved_prompt)
             # Line 6: Update based on the evaluation scores
@@ -144,9 +153,9 @@ class EvolutionAlgorithm(PromptOptimization):
             self.task,
             self.evolution_model,
             # model usage for evaluating prompts
-            self.evaluation_model.usage,
+            total_evaluation_usage,
             # model usage for evolution of prompts
-            self.evolution_model.usage,
+            total_evolution_usage,
             add_snapshot_dict,
         )
         # Line 8: Return the best prompt, p∗, among the final population PT :
@@ -155,8 +164,11 @@ class EvolutionAlgorithm(PromptOptimization):
         logger.info(f"Best prompt: {p}")
 
         # We pick the prompt with the highest score on the development set and report its score on the testset.
-        test_performance = self.task.evaluate_test(p.content)
-        logger.info(f"Best prompt on test set: {test_performance}")
+        test_performance, _ = self.task.evaluate_test(p.content)
+        logger.info("Best prompt on test set: %s", test_performance)
+        logger.info("Usage (evolution model / evaluation model / total): %s / %s / %s", total_evolution_usage, total_evaluation_usage, total_evolution_usage + total_evaluation_usage)
+
+        return total_evolution_usage, total_evaluation_usage
 
 
 class GeneticAlgorithm(EvolutionAlgorithm):
diff --git a/optimization.py b/optimization.py
index 7d70713..d379907 100644
--- a/optimization.py
+++ b/optimization.py
@@ -89,9 +89,9 @@ class PromptOptimization:
     def get_prompts(self, prompt_ids: list[str]):
         return [self.get_prompt(p_id) for p_id in prompt_ids]
 
-    def init_run(self, num_initial_prompts: int) -> tuple[list[Prompt], ModelUsage]:
+    def init_run(self, num_initial_prompts: int) -> tuple[list[Prompt], ModelUsage, ModelUsage]:
         # - Initial prompts P0 = {p1, p2, . . . , pN }
-        paraphrases, usage = paraphrase_prompts(
+        paraphrases, paraphrase_usage = paraphrase_prompts(
             self.evolution_model, self.task.base_prompt, n=num_initial_prompts - 1
         )
 
@@ -102,7 +102,8 @@ class PromptOptimization:
         )
 
         # accumulate usage
+        evaluation_usage = ModelUsage()
         for prompt in initial_prompts:
-            usage += prompt.usage
+            evaluation_usage += prompt.usage
 
-        return initial_prompts, usage
+        return initial_prompts, paraphrase_usage, evaluation_usage
-- 
GitLab