diff --git a/evoprompt/evolution/evolution.py b/evoprompt/evolution/evolution.py index e6b7d365237ad9221fd85c3890ceb4a192640e1c..033f359d5374fdd89dce6ed72b51e010dc8c9928 100644 --- a/evoprompt/evolution/evolution.py +++ b/evoprompt/evolution/evolution.py @@ -249,6 +249,7 @@ class GeneticAlgorithm(EvolutionAlgorithm): self.evolution_model.create_completion( system_message=SYSTEM_MESSAGE, prompt=filled_prompt, + enforce_randomness=True, ) ) @@ -351,6 +352,7 @@ class DifferentialEvolution(EvolutionAlgorithm): self.evolution_model.create_completion( system_message=SYSTEM_MESSAGE, prompt=filled_prompt, + enforce_randomness=True, ) ) @@ -468,6 +470,7 @@ class DifferentialEvolutionWithCot(DifferentialEvolution): prompt=filled_prompt, history=history, stop="</prompt>" if idx == len(DE_COT_PROMPTS) - 1 else None, + enforce_randomness=True, ) ) logger.debug( diff --git a/evoprompt/models.py b/evoprompt/models.py index 34eb729fee3fa07464e007a872320c4718157520..9380a073b0164ad5dc0b5b983253ea7c6d2145b3 100644 --- a/evoprompt/models.py +++ b/evoprompt/models.py @@ -275,7 +275,8 @@ class Llama(LLMModel): "max_tokens": max_tokens, } if enforce_randomness: - model_call_kwargs["temperature"] = 2.0 + # same temperature as in evoprompt paper reference implementation + model_call_kwargs["temperature"] = 0.5 model_call_kwargs["seed"] = random.randint(0, 2**32 - 1) else: model_call_kwargs["temperature"] = 0.0 @@ -398,7 +399,8 @@ class LlamaChat(ChatModel, Llama): "max_tokens": max_tokens, } if enforce_randomness: - model_call_kwargs["temperature"] = 2.0 + # same temperature as in evoprompt paper reference implementation + model_call_kwargs["temperature"] = 0.5 model_call_kwargs["seed"] = random.randint(0, 2**32 - 1) else: model_call_kwargs["temperature"] = 0.0 @@ -478,7 +480,8 @@ class HfChat(ChatModel, LLMModel): "max_length": max_tokens if max_tokens is not None else 2048, } if enforce_randomness: - model_call_kwargs["temperature"] = 2.0 + # same temperature as in evoprompt paper reference implementation + model_call_kwargs["temperature"] = 0.5 model_call_kwargs["do_sample"] = True else: model_call_kwargs["do_sample"] = False @@ -575,7 +578,8 @@ class OpenAIChat(ChatModel, LLMModel): "max_completion_tokens": max_tokens if max_tokens is not None else 1024, } if enforce_randomness: - model_call_kwargs["temperature"] = 2.0 + # same temperature as in evoprompt paper reference implementation + model_call_kwargs["temperature"] = 0.5 else: model_call_kwargs["temperature"] = 0.0 diff --git a/evoprompt/task/task.py b/evoprompt/task/task.py index cfac945fbfacc200741eb704272825d16c1c0f59..b073a875984ed8cfa5f6942aa13064ed1f27e2c0 100644 --- a/evoprompt/task/task.py +++ b/evoprompt/task/task.py @@ -415,7 +415,7 @@ class Task(metaclass=ABCMeta): # we use cached completions to speed up the process although we loose the non-deterministic behavior of LMs, but we're ok with a single result use_cache=True, # use less randomness, i.e., more certain outputs - temperature=0.0, + enforce_randomness=False, ) if not self.use_grammar: