Skip to content
Snippets Groups Projects
Commit dad15285 authored by Max Kimmich's avatar Max Kimmich
Browse files

Add demonstration for DE

parent de38cf26
No related branches found
No related tags found
No related merge requests found
......@@ -10,7 +10,8 @@ from evoprompt.models import LLMModel
from evoprompt.opt_types import ModelUsage, Prompt
from evoprompt.optimization import PromptOptimization
from evoprompt.task import Task
from evoprompt.utils import log_calls, get_all_subclasses
from evoprompt.template_de import get_de_prompt_template
from evoprompt.utils import get_all_subclasses, log_calls
logger = logging.getLogger(__name__)
......@@ -27,18 +28,6 @@ Prompt 2: {prompt2}
"""
DE_PROMPT = """
1. Identify the different parts between the Prompt 1 and Prompt 2:
Prompt 1: {prompt1}
Prompt 2: {prompt2}
2. Randomly mutate the different parts
3. Combine the different parts with Prompt 3, selectively replace it with the different parts in Step 2 and generate a new prompt.
Prompt 3: {prompt3}
4. Cross over the prompt in the Step 3 with the following basic prompt and generate a final prompt bracketed with <prompt> and </prompt>:
Basic Prompt: {basic_prompt}
"""
class EvolutionAlgorithm(PromptOptimization):
shorthand: str
......@@ -59,6 +48,7 @@ class EvolutionAlgorithm(PromptOptimization):
evaluation_model=evaluation_model,
run_options=run_options,
)
self.use_evolution_demo = run_options.get("use_evolution_demo", False)
self.population_size = population_size
......@@ -243,7 +233,7 @@ class DifferentialEvolution(EvolutionAlgorithm):
evolved_prompt, usage = self.evolution_model(
system_message=SYSTEM_MESSAGE,
prompt=DE_PROMPT.format(
prompt=get_de_prompt_template(self.use_evolution_demo, self.task).format(
prompt1=prompt_1,
prompt2=prompt_2,
prompt3=best_prompt_current_evolution,
......@@ -280,7 +270,8 @@ class DifferentialEvolution(EvolutionAlgorithm):
optimizers = {
algorithm.shorthand: algorithm for algorithm in EvolutionAlgorithm.__subclasses__()
algorithm.shorthand: algorithm
for algorithm in get_all_subclasses(EvolutionAlgorithm)
}
......@@ -293,3 +284,8 @@ def get_optimizer_class(name: str):
argument_parser.add_argument(
"--evolution-algorithm", "-a", type=str, choices=optimizers.keys(), default="ga"
)
argument_parser.add_argument(
"--use-evolution-demo",
action="store_true",
help="Whether to prepend a single demonstration example for evolution or not",
)
......@@ -11,8 +11,8 @@ from evoprompt.task.text_classification import TextClassification
from evoprompt.task.sentiment_analysis import SentimentAnalysis
from evoprompt.task.topic_classification import AGNews, TREC
from evoprompt.task.subjectivity_classification import Subj
from evoprompt.task.summarization import SAMSum
from evoprompt.task.simplification import ASSET
from evoprompt.task.summarization import Summarization, SAMSum
from evoprompt.task.simplification import Simplification, ASSET
from evoprompt.utils import get_all_subclasses
......@@ -32,6 +32,9 @@ def get_task(name: str, evaluation_model: LLMModel, **options):
argument_parser.add_argument("--debug", "-d", action="store_true", default=None)
argument_group = argument_parser.add_argument_group("Task arguments")
argument_group.add_argument(
"--task", "-t", type=str, required=True, choices=tasks.keys()
)
argument_group.add_argument("--use-grammar", "-g", action="store_true")
argument_group.add_argument(
"--evaluation-strategy",
......@@ -39,5 +42,7 @@ argument_group.add_argument(
default="simple",
)
argument_group.add_argument(
"--task", "-t", type=str, required=True, choices=tasks.keys()
"--n-evaluation-demo",
type=int,
help="Number of demonstration examples per class used for evaluation",
)
......@@ -35,7 +35,7 @@ class HfSST2(SentimentAnalysis):
def load_validation_set(
self, validation_dataset: str | None, validation_split: str | None
):
return super().load_validation_set("stanfordnlp/sst2", "validation[:200]")
return super().load_validation_set("stanfordnlp/sst2", "validation")
def load_test_set(self, test_dataset: str, test_split: str | None):
return super().load_test_set("stanfordnlp/sst2", "test")
......@@ -128,7 +128,7 @@ class HfMovieReviews(SentimentAnalysis):
self, validation_dataset: str | None, validation_split: str | None
):
return super().load_validation_set(
"cornell-movie-review-data/rotten_tomatoes", "validation[:200]"
"cornell-movie-review-data/rotten_tomatoes", "validation"
)
def load_test_set(self, test_dataset: str, test_split: str | None):
......
......@@ -262,15 +262,16 @@ class Task(metaclass=ABCMeta):
use_grammar: bool,
evaluation_strategy: EvaluationStrategyKey,
validation_split: str | None = None,
use_evolution_demo: bool = False,
test_split: str | None = None,
debug: bool = False,
**kwargs,
) -> None:
self.model = model
self.debug = debug
# whether we use the grammar to constrain the model output or not
self.use_grammar = use_grammar
self.evaluation_strategy = get_evaluation_strategy(evaluation_strategy)(self)
self.use_evolution_demo = use_evolution_demo
self.validation_dataset = self.load_validation_set(
validation_dataset, validation_split
......@@ -279,12 +280,11 @@ class Task(metaclass=ABCMeta):
self.validation_dataset = self.validation_dataset.shuffle(42).select(
range(10)
)
else:
# NOTE currently we select a subset for validation
if len(self.validation_dataset) > 200:
self.validation_dataset = self.validation_dataset.shuffle(42).select(
range(200)
)
elif len(self.validation_dataset) > 200:
# NOTE currently we select a subset as validation set
self.validation_dataset = self.validation_dataset.shuffle(42).select(
range(200)
)
self.test_dataset = self.load_test_set(test_dataset, test_split)
if self.debug and len(self.test_dataset) > 5:
......@@ -301,10 +301,12 @@ class Task(metaclass=ABCMeta):
def predict(self, prompt: str, datum: DatasetDatum) -> tuple[str, ModelUsage]:
# run model for inference using grammar to constrain output
# TODO grammar also depends on prompt and vice-versa -> what are good labels?
response, usage = self.model(
system_message=SYSTEM_MESSAGE,
prompt=prompt,
prompt_appendix=self._get_prompt_text_for_datum(datum),
# grammar can be applied to constrain the model output
grammar=self._get_grammar(datum) if self.use_grammar else None,
)
......
# adopted from https://github.com/beeevita/EvoPrompt/blob/bf43b0dcc63fb79b7c0007d4693b2c0721e9a1a7/data/template_de.py
from evoprompt.task import Task, TextClassification, Summarization, Simplification
DE_PROMPT = """
1. Identify the different parts between the Prompt 1 and Prompt 2:
Prompt 1: {prompt1}
Prompt 2: {prompt2}
2. Randomly mutate the different parts
3. Combine the different parts with Prompt 3, selectively replace it with the different parts in Step 2 and generate a new prompt.
Prompt 3: {prompt3}
4. Cross over the prompt in the Step 3 with the following basic prompt and generate a final prompt bracketed with <prompt> and </prompt>:
Basic Prompt: {basic_prompt}
"""
DE_PROMPT_WITH_DEMONSTRATION_SIM = """Please follow the instruction step-by-step to generate a better prompt.
1. Identify the different parts between the Prompt 1 and Prompt 2:
Prompt 1: Rewrite the input text into simpler text.
Prompt 2: Rewrite my complex sentence in simpler terms, but keep the meaning.
2. Randomly mutate the different parts
3. Combine the different parts with Prompt 3, selectively replace it with the different parts in step 2 and generate a new prompt.
Prompt 3: Rewrite the given input text into simpler English sentences while preserving the same meaning, so it can be understood by non-native English speakers.
4. Crossover the prompt in the step3 with the following basic prompt and generate a final prompt bracketed with <prompt> and </prompt>:
Basic Prompt: Make the sentence easier for people who do not speak English fluently to comprehend.
1. Identifying the different parts between Prompt 1 and Prompt 2:
Prompt 1: Rewrite the input text into simpler text.
Prompt 2: Rewrite my complex sentence in simpler terms, but keep the meaning.
Different parts:
"input text" vs "my complex sentence"
"simpler text" vs "simpler terms, but keep the meaning"
2. Randomly mutate the different parts:
"input text" -> "provided text"
"my complex sentence" -> "the difficult sentence"
"simpler text" -> "easier language"
"simpler terms, but keep the meaning" -> "simpler words while maintaining the meaning"
3. Combine the different parts with Prompt 3, selectively replace it with the different parts in step 2 and generate a new prompt:
Prompt 3: Rewrite the given input text into simpler English sentences while preserving the same meaning, so it can be understood by non-native English speakers.
New Prompt: Transform the provided text into easier language while maintaining the meaning, making it accessible for non-native English speakers.
4. Crossover the prompt in step 3 with the following basic prompt and generate a final prompt bracketed with <prompt> and </prompt>:
Basic Prompt: Make the sentence easier for people who do not speak English fluently to comprehend.
Final Prompt: <prompt>Convert the difficult sentence into simpler words while preserving the meaning, so it's easier for non-native English speakers to understand.</prompt>
Please follow the instruction step-by-step to generate a better prompt.
1. Identify the different parts between the Prompt 1 and Prompt 2:
Prompt 1: {prompt1}
Prompt 2: {prompt2}
2. Randomly mutate the different parts
3. Combine the different parts with Prompt 3, selectively replace it with the different parts in step2 and generate a new prompt.
Prompt 3: {prompt3}
4. Crossover the prompt in the step3 with the following basic prompt and generate a final prompt bracketed with <prompt> and </prompt>:
Basic Prompt: {basic_prompt}
1. """
DE_PROMPT_WITH_DEMONSTRATION_CLS = """Please follow the instruction step-by-step to generate a better prompt.
1. Identify the different parts between the Prompt 1 and Prompt 2:
Prompt 1: Your task is to classify the comment as one of the following categories: terrible, bad, okay, good, great.
Prompt 2: In this task, you are given sentences from movie reviews. The task is to classify a sentence as one of the following categories: terrible, bad, okay, good, great.
2. Randomly mutate the different parts
3. Combine the different parts with Prompt 3, selectively replace it with the different parts in step 2 and generate a new prompt.
Prompt 3: Assess a movie or a book based on its explanation and determine the sentiment of the movie review. Have your colleague's evaluation of the movie they watched be expressed in a concise remark (e.g. awesome, all right, terrible, or horrendous) following the narrative synopsis they were provided, and choose from terrible, bad, okay, good and great to describe the movie.
4. Crossover the prompt in the step3 with the following basic prompt and generate a final prompt bracketed with <prompt> and </prompt>:
Basic Prompt: You are a sentiment classifier. To do this, you must first understand the meaning of the sentence and any relevant context. And then you should classify it as one of the following categories: terrible, bad, okay, good, great.
1. Identifying the different parts between Prompt 1 and Prompt 2:
Prompt 1: Your task is to classify the comment as one of the following categories: terrible, bad, okay, good, great.
Prompt 2: In this task, you are given sentences from movie reviews. The task is to classify a sentence as one of the following categories: terrible, bad, okay, good, great.
Different parts:
"classify the comment" vs "classify a sentence"
"Your task is to" vs "In this task, you are given sentences from movie reviews. The task is to"
2. Randomly mutate the different parts:
"classify the comment" -> "categorize the statement"
"classify a sentence" -> "evaluate the review"
"Your task is to" -> "Your mission is to"
"In this task, you are given sentences from movie reviews. The task is to" -> "In this assignment, you will receive movie review sentences. Your job is to"
3. Combine the different parts with Prompt 3, selectively replace it with the different parts in step 2 and generate a new prompt:
Prompt 3: Assess a movie or a book based on its explanation and determine the sentiment of the movie review. Have your colleague's evaluation of the movie they watched be expressed in a concise remark (e.g. awesome, all right, terrible, or horrendous) following the narrative synopsis they were provided, and choose from terrible, bad, okay, good and great to describe the movie.
New Prompt: In this assignment, you will receive movie review sentences. Your job is to evaluate the review and determine the sentiment, choosing from terrible, bad, okay, good, and great to describe the movie.
4. Crossover the prompt in step 3 with the following basic prompt and generate a final prompt bracketed with <prompt> and </prompt>:
Basic Prompt: You are a sentiment classifier. To do this, you must first understand the meaning of the sentence and any relevant context. And then you should classify it as one of the following categories: terrible, bad, okay, good, great.
Final Prompt: <prompt>Your mission is to categorize the statement from a movie review by understanding its meaning and context, and then classify it as one of the following categories: terrible, bad, okay, good, or great.</prompt>
Please follow the instruction step-by-step to generate a better prompt.
1. Identify the different parts between the Prompt 1 and Prompt 2:
Prompt 1: {prompt1}
Prompt 2: {prompt2}
2. Randomly mutate the different parts
3. Combine the different parts with Prompt 3, selectively replace it with the different parts in step2 and generate a new prompt.
Prompt 3: {prompt3}
4. Crossover the prompt in the step3 with the following basic prompt and generate a final prompt bracketed with <prompt> and </prompt>:
Basic Prompt: {basic_prompt}
1. """
def get_de_prompt_template(use_demonstration_example: bool, task: None | Task = None):
if use_demonstration_example:
assert (
task is not None
), "Task cannot be done if demonstation data should be used."
if isinstance(task, (TextClassification, Summarization)):
return DE_PROMPT_WITH_DEMONSTRATION_SIM
elif isinstance(task, Simplification):
return DE_PROMPT_WITH_DEMONSTRATION_CLS
else:
raise NotImplementedError(
f"Prompt with demonstration data is not implemented for task of type {type(task)}."
)
return DE_PROMPT
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment