diff --git a/evoprompt/evolution/evolution.py b/evoprompt/evolution/evolution.py index 57556f0c0b5716fc12390d01076a84377df03261..ce57de57845d5cd10021b174a90a5408896e6ac8 100644 --- a/evoprompt/evolution/evolution.py +++ b/evoprompt/evolution/evolution.py @@ -23,7 +23,7 @@ from evoprompt.evolution.template_ga import ( GA_DEMONSTRATION_DATA_SIM, GA_PROMPT, ) -from evoprompt.models import LLMModel +from evoprompt.models import ChatMessages, LLMModel from evoprompt.opt_types import ModelUsage, Prompt from evoprompt.optimization import Judgement, PromptOptimization from evoprompt.task import Task @@ -50,14 +50,12 @@ class EvolutionAlgorithm(PromptOptimization, metaclass=ABCMeta): *, task: Task, evolution_model: LLMModel, - evaluation_model: LLMModel, judge_model: LLMModel | None, run_options: dict[str, Any] = {}, ) -> None: super().__init__( task=task, evolution_model=evolution_model, - evaluation_model=evaluation_model, judge_model=judge_model, run_options=run_options, ) @@ -163,7 +161,7 @@ class EvolutionAlgorithm(PromptOptimization, metaclass=ABCMeta): if p_i is not None: prompt_source = ( "corrected" # could also mean that user skipped the prompt - if not all(j.happy for j in judgements) + if False in [j.happy for j in judgements] else "generated" ) evolved_prompt = self.add_prompt( @@ -195,7 +193,13 @@ class EvolutionAlgorithm(PromptOptimization, metaclass=ABCMeta): # Line 8: Return the best prompt, p∗, among the final population PT : # p∗ ↠argmaxp∈PT f(p, D) p = max(self.P[-1], key=lambda prompt: self.all_prompts[prompt.id].score) - logger.info("Best prompt with score %.2f: %s", p.score, p) + logger.info( + "Best prompt with score %.2f: %s (Source: %s - Gen: %d)", + p.score, + p, + p.meta["source"], + p.meta["gen"], + ) # We pick the prompt with the highest score on the development set and report its score on the testset. test_performance, _, _ = self.task.evaluate_test(p.content) diff --git a/evoprompt/models.py b/evoprompt/models.py index 4012ce478dee5ff99dec5eb20df61e346d0db243..57e219a40960fd6c2df14abcdcb765c02240b1e1 100644 --- a/evoprompt/models.py +++ b/evoprompt/models.py @@ -1,6 +1,7 @@ from collections.abc import Iterable import hashlib import inspect +from itertools import zip_longest import json import logging import random @@ -122,25 +123,32 @@ class LLMModel(ABC): def build_demonstration_data( self, demonstrations: Iterable[tuple[str, str]], - instruction: str | None, + instruction: list[str] | str | None, **kwargs, ) -> ChatMessages: if not isinstance(self, ChatModel): raise ValueError( f"Model {self} does not support building demonstration data" ) + + if not isinstance(instruction, list): + instruction = [instruction] messages = [] - for input_, output in demonstrations: + for (input_, output), _instruction in zip_longest( + demonstrations, instruction, fillvalue=instruction[-1] + ): messages.extend( - self.build_input_data(input_, instruction=instruction, **kwargs)[1] + self.build_input_data(input_, instruction=_instruction, **kwargs)[1] ) messages.append(self._get_assistant_message(output)) return messages def build_input_data( - self, prompt: str, instruction: str | None = None, **kwargs + self, input_: str, instruction: str | None = None, **kwargs ) -> ChatMessages: - return instruction, [self._get_user_message(prompt)] + return instruction, [ + self._get_user_message(input_ if input_ is not None else instruction) + ] def _get_prediction_prefix(self): # some models use a special token prefix for the prediction @@ -162,19 +170,19 @@ class LLMModel(ABC): use_randomness: bool = False, ): ... - def _get_user_message(self, content: Any): + def _get_user_message(self, content: Any) -> ChatMessage: return { "role": "user", "content": content, } - def _get_system_message(self, content: Any): + def _get_system_message(self, content: Any) -> ChatMessage: return { "role": "system", "content": content, } - def _get_assistant_message(self, content: Any): + def _get_assistant_message(self, content: Any) -> ChatMessage: return { "role": "assistant", "content": content, @@ -400,10 +408,10 @@ class ChatModel: # we prepend the history to the messages # the chat format should take care of adding appropriate assistant messages for generating the completion - messages_for_model = messages if history is None: - history = [] - messages_for_model = history + messages_for_model + messages_for_model = messages + else: + messages_for_model = history + messages # prepend system message if available if system_message is not None: if isinstance(system_message, str): @@ -569,19 +577,19 @@ class AlpacaHfChat(HfChat): self.pipeline.tokenizer.chat_template = "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ system_message }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '\\n\\n### Instruction:\\n' + message['content'].strip() + '\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ '### Response:\\n' + message['content'].strip()}}{% endif %}{% endfor %}" def build_input_data( - self, prompt: str, instruction: str = None, **kwargs + self, input_: str, instruction: str = None, **kwargs ) -> ChatMessages: # For Alpaca we add the instruction for each input (and assume that content is the context, otherwise the content should contain the instruction) - return self.SYSTEM_MESSAGE, [ - self._get_user_message( - ( - (instruction + "\n\n" + self._get_input_prefix()) - if instruction is not None - else "" - ) - + prompt - ) - ] + if instruction is None: + assert ( + input_ is not None + ), "Either instruction or input must be provided for Alpaca" + prompt_input = input_ + else: + prompt_input = instruction + if input_ is not None: + prompt_input += "\n\n" + self._get_input_prefix() + input_ + return self.SYSTEM_MESSAGE, [self._get_user_message(prompt_input)] @staticmethod def _get_input_prefix(): diff --git a/evoprompt/optimization.py b/evoprompt/optimization.py index c1147f40f3b1a1c7c0df1ed0358012149c80acc6..ffeded99da45d76fd73728b9f9b9a174ef4a5e82 100644 --- a/evoprompt/optimization.py +++ b/evoprompt/optimization.py @@ -153,13 +153,11 @@ class PromptOptimization: *, task: Task, evolution_model: LLMModel, - evaluation_model: LLMModel, judge_model: LLMModel | None, run_options: dict[str, Any] = {}, ) -> None: self.task = task self.evolution_model = evolution_model - self.evaluation_model = evaluation_model self.judge_model = judge_model self.run_options = run_options @@ -174,7 +172,7 @@ class PromptOptimization: return self.task.evaluate_validation(prompt, parent_histories) def get_initial_prompts(self, num_initial_prompts: int, debug: bool = False): - # this implements the para_topk algorothm from https://github.com/beeevita/EvoPrompt + # this implements the para_topk algorithm from https://github.com/beeevita/EvoPrompt base_prompts = self.task.base_prompts if debug: base_prompts = base_prompts[:2] diff --git a/evoprompt/task/task.py b/evoprompt/task/task.py index ae260f5844cf89a1a9064a0780705371cc3b71ec..3b59db32d9920253f9deb9b953676c79b9299854 100644 --- a/evoprompt/task/task.py +++ b/evoprompt/task/task.py @@ -437,12 +437,12 @@ class Task(metaclass=ABCMeta): use_prediction_prefix: bool = False, ) -> tuple[ChatMessage, ChatMessages]: # the default is to use the prompt as is and concatenate the datum string - prompt = self._get_prompt_text_for_datum( + datum_input = self._get_prompt_text_for_datum( sample, use_prefix=self.force_task_input_prefix ) if use_prediction_prefix: - prompt += f"\n{self._get_prediction_prefix().strip()}" - return self.model.build_input_data(prompt, instruction) + datum_input += f"\n{self._get_prediction_prefix().strip()}" + return self.model.build_input_data(datum_input, instruction) def build_demonstration_prompt( self, diff --git a/main.py b/main.py index f74fbea070a3078cd23d6ca903981808e2e05823..ae7c518cffbe090d83aa570d1861021692d461de 100644 --- a/main.py +++ b/main.py @@ -124,7 +124,6 @@ if __name__ == "__main__": population_size=10, task=task, evolution_model=evolution_model, - evaluation_model=evaluation_model, judge_model=judge_model, run_options=options.__dict__, )