From 03728b2004271492f698554f49ee0d1d394e0d35 Mon Sep 17 00:00:00 2001 From: Maximilian Schmidt <maximilian.schmidt@ims.uni-stuttgart.de> Date: Wed, 14 Aug 2024 18:07:58 +0200 Subject: [PATCH] Improve prompt extraction --- evoprompt/evolution.py | 14 +++++++++----- evoprompt/template_de.py | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/evoprompt/evolution.py b/evoprompt/evolution.py index 6d076bf..3891015 100644 --- a/evoprompt/evolution.py +++ b/evoprompt/evolution.py @@ -250,14 +250,18 @@ class DifferentialEvolution(EvolutionAlgorithm): ), ) matches = re.findall( - # TODO make sure that regex matches also if </prompt> is missing - r"<prompt>(.*?)</prompt>", + # regex that matches any characters between last pair of <prompt></prompt>, also if </prompt> is missing + r"<prompt>(?!.*<prompt>)(?:(.*)</prompt>|(.*))", evolved_prompt, flags=(re.IGNORECASE | re.DOTALL), ) - if matches: - # we expect that the last match is where we find the prompt, other matches are part of the instruction (which the model repeats) - evolved_prompt = matches[-1] + if matches and any(matches[0]): + # there is always only a single match, and one group should be non-empty + if matches[0][0]: + evolved_prompt = matches[0][0] + else: + assert matches[0][1] + evolved_prompt = matches[0][1] else: # TODO what to do in this case? Discard generated prompt directly? pass diff --git a/evoprompt/template_de.py b/evoprompt/template_de.py index 04aacd4..1a8cb0e 100644 --- a/evoprompt/template_de.py +++ b/evoprompt/template_de.py @@ -102,7 +102,7 @@ def get_de_prompt_template(use_demonstration_example: bool, task: None | Task = if use_demonstration_example: assert ( task is not None - ), "Task cannot be done if demonstation data should be used." + ), "Task cannot be None if demonstation data should be used." if isinstance(task, (TextClassification, Summarization)): return DE_PROMPT_WITH_DEMONSTRATION_SIM -- GitLab