From 9249cf324f8580d78bcdb3a26ee0577600fdef28 Mon Sep 17 00:00:00 2001
From: Maximilian Kimmich <maximilian.kimmich@ims.uni-stuttgart.de>
Date: Tue, 1 Oct 2024 19:58:06 +0200
Subject: [PATCH] Update temperature for more creative generations

---
 evoprompt/evolution/evolution.py |  3 +++
 evoprompt/models.py              | 12 ++++++++----
 evoprompt/task/task.py           |  2 +-
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/evoprompt/evolution/evolution.py b/evoprompt/evolution/evolution.py
index e6b7d36..033f359 100644
--- a/evoprompt/evolution/evolution.py
+++ b/evoprompt/evolution/evolution.py
@@ -249,6 +249,7 @@ class GeneticAlgorithm(EvolutionAlgorithm):
             self.evolution_model.create_completion(
                 system_message=SYSTEM_MESSAGE,
                 prompt=filled_prompt,
+                enforce_randomness=True,
             )
         )
 
@@ -351,6 +352,7 @@ class DifferentialEvolution(EvolutionAlgorithm):
             self.evolution_model.create_completion(
                 system_message=SYSTEM_MESSAGE,
                 prompt=filled_prompt,
+                enforce_randomness=True,
             )
         )
 
@@ -468,6 +470,7 @@ class DifferentialEvolutionWithCot(DifferentialEvolution):
                     prompt=filled_prompt,
                     history=history,
                     stop="</prompt>" if idx == len(DE_COT_PROMPTS) - 1 else None,
+                    enforce_randomness=True,
                 )
             )
             logger.debug(
diff --git a/evoprompt/models.py b/evoprompt/models.py
index 34eb729..9380a07 100644
--- a/evoprompt/models.py
+++ b/evoprompt/models.py
@@ -275,7 +275,8 @@ class Llama(LLMModel):
             "max_tokens": max_tokens,
         }
         if enforce_randomness:
-            model_call_kwargs["temperature"] = 2.0
+            # same temperature as in evoprompt paper reference implementation
+            model_call_kwargs["temperature"] = 0.5
             model_call_kwargs["seed"] = random.randint(0, 2**32 - 1)
         else:
             model_call_kwargs["temperature"] = 0.0
@@ -398,7 +399,8 @@ class LlamaChat(ChatModel, Llama):
             "max_tokens": max_tokens,
         }
         if enforce_randomness:
-            model_call_kwargs["temperature"] = 2.0
+            # same temperature as in evoprompt paper reference implementation
+            model_call_kwargs["temperature"] = 0.5
             model_call_kwargs["seed"] = random.randint(0, 2**32 - 1)
         else:
             model_call_kwargs["temperature"] = 0.0
@@ -478,7 +480,8 @@ class HfChat(ChatModel, LLMModel):
             "max_length": max_tokens if max_tokens is not None else 2048,
         }
         if enforce_randomness:
-            model_call_kwargs["temperature"] = 2.0
+            # same temperature as in evoprompt paper reference implementation
+            model_call_kwargs["temperature"] = 0.5
             model_call_kwargs["do_sample"] = True
         else:
             model_call_kwargs["do_sample"] = False
@@ -575,7 +578,8 @@ class OpenAIChat(ChatModel, LLMModel):
             "max_completion_tokens": max_tokens if max_tokens is not None else 1024,
         }
         if enforce_randomness:
-            model_call_kwargs["temperature"] = 2.0
+            # same temperature as in evoprompt paper reference implementation
+            model_call_kwargs["temperature"] = 0.5
         else:
             model_call_kwargs["temperature"] = 0.0
 
diff --git a/evoprompt/task/task.py b/evoprompt/task/task.py
index cfac945..b073a87 100644
--- a/evoprompt/task/task.py
+++ b/evoprompt/task/task.py
@@ -415,7 +415,7 @@ class Task(metaclass=ABCMeta):
             # we use cached completions to speed up the process although we loose the non-deterministic behavior of LMs, but we're ok with a single result
             use_cache=True,
             # use less randomness, i.e., more certain outputs
-            temperature=0.0,
+            enforce_randomness=False,
         )
 
         if not self.use_grammar:
-- 
GitLab