From 9249cf324f8580d78bcdb3a26ee0577600fdef28 Mon Sep 17 00:00:00 2001 From: Maximilian Kimmich <maximilian.kimmich@ims.uni-stuttgart.de> Date: Tue, 1 Oct 2024 19:58:06 +0200 Subject: [PATCH] Update temperature for more creative generations --- evoprompt/evolution/evolution.py | 3 +++ evoprompt/models.py | 12 ++++++++---- evoprompt/task/task.py | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/evoprompt/evolution/evolution.py b/evoprompt/evolution/evolution.py index e6b7d36..033f359 100644 --- a/evoprompt/evolution/evolution.py +++ b/evoprompt/evolution/evolution.py @@ -249,6 +249,7 @@ class GeneticAlgorithm(EvolutionAlgorithm): self.evolution_model.create_completion( system_message=SYSTEM_MESSAGE, prompt=filled_prompt, + enforce_randomness=True, ) ) @@ -351,6 +352,7 @@ class DifferentialEvolution(EvolutionAlgorithm): self.evolution_model.create_completion( system_message=SYSTEM_MESSAGE, prompt=filled_prompt, + enforce_randomness=True, ) ) @@ -468,6 +470,7 @@ class DifferentialEvolutionWithCot(DifferentialEvolution): prompt=filled_prompt, history=history, stop="</prompt>" if idx == len(DE_COT_PROMPTS) - 1 else None, + enforce_randomness=True, ) ) logger.debug( diff --git a/evoprompt/models.py b/evoprompt/models.py index 34eb729..9380a07 100644 --- a/evoprompt/models.py +++ b/evoprompt/models.py @@ -275,7 +275,8 @@ class Llama(LLMModel): "max_tokens": max_tokens, } if enforce_randomness: - model_call_kwargs["temperature"] = 2.0 + # same temperature as in evoprompt paper reference implementation + model_call_kwargs["temperature"] = 0.5 model_call_kwargs["seed"] = random.randint(0, 2**32 - 1) else: model_call_kwargs["temperature"] = 0.0 @@ -398,7 +399,8 @@ class LlamaChat(ChatModel, Llama): "max_tokens": max_tokens, } if enforce_randomness: - model_call_kwargs["temperature"] = 2.0 + # same temperature as in evoprompt paper reference implementation + model_call_kwargs["temperature"] = 0.5 model_call_kwargs["seed"] = random.randint(0, 2**32 - 1) else: model_call_kwargs["temperature"] = 0.0 @@ -478,7 +480,8 @@ class HfChat(ChatModel, LLMModel): "max_length": max_tokens if max_tokens is not None else 2048, } if enforce_randomness: - model_call_kwargs["temperature"] = 2.0 + # same temperature as in evoprompt paper reference implementation + model_call_kwargs["temperature"] = 0.5 model_call_kwargs["do_sample"] = True else: model_call_kwargs["do_sample"] = False @@ -575,7 +578,8 @@ class OpenAIChat(ChatModel, LLMModel): "max_completion_tokens": max_tokens if max_tokens is not None else 1024, } if enforce_randomness: - model_call_kwargs["temperature"] = 2.0 + # same temperature as in evoprompt paper reference implementation + model_call_kwargs["temperature"] = 0.5 else: model_call_kwargs["temperature"] = 0.0 diff --git a/evoprompt/task/task.py b/evoprompt/task/task.py index cfac945..b073a87 100644 --- a/evoprompt/task/task.py +++ b/evoprompt/task/task.py @@ -415,7 +415,7 @@ class Task(metaclass=ABCMeta): # we use cached completions to speed up the process although we loose the non-deterministic behavior of LMs, but we're ok with a single result use_cache=True, # use less randomness, i.e., more certain outputs - temperature=0.0, + enforce_randomness=False, ) if not self.use_grammar: -- GitLab