From 03728b2004271492f698554f49ee0d1d394e0d35 Mon Sep 17 00:00:00 2001
From: Maximilian Schmidt <maximilian.schmidt@ims.uni-stuttgart.de>
Date: Wed, 14 Aug 2024 18:07:58 +0200
Subject: [PATCH] Improve prompt extraction

---
 evoprompt/evolution.py   | 14 +++++++++-----
 evoprompt/template_de.py |  2 +-
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/evoprompt/evolution.py b/evoprompt/evolution.py
index 6d076bf..3891015 100644
--- a/evoprompt/evolution.py
+++ b/evoprompt/evolution.py
@@ -250,14 +250,18 @@ class DifferentialEvolution(EvolutionAlgorithm):
             ),
         )
         matches = re.findall(
-            # TODO make sure that regex matches also if </prompt> is missing
-            r"<prompt>(.*?)</prompt>",
+            # regex that matches any characters between last pair of <prompt></prompt>, also if </prompt> is missing
+            r"<prompt>(?!.*<prompt>)(?:(.*)</prompt>|(.*))",
             evolved_prompt,
             flags=(re.IGNORECASE | re.DOTALL),
         )
-        if matches:
-            # we expect that the last match is where we find the prompt, other matches are part of the instruction (which the model repeats)
-            evolved_prompt = matches[-1]
+        if matches and any(matches[0]):
+            # there is always only a single match, and one group should be non-empty
+            if matches[0][0]:
+                evolved_prompt = matches[0][0]
+            else:
+                assert matches[0][1]
+                evolved_prompt = matches[0][1]
         else:
             # TODO what to do in this case? Discard generated prompt directly?
             pass
diff --git a/evoprompt/template_de.py b/evoprompt/template_de.py
index 04aacd4..1a8cb0e 100644
--- a/evoprompt/template_de.py
+++ b/evoprompt/template_de.py
@@ -102,7 +102,7 @@ def get_de_prompt_template(use_demonstration_example: bool, task: None | Task =
     if use_demonstration_example:
         assert (
             task is not None
-        ), "Task cannot be done if demonstation data should be used."
+        ), "Task cannot be None if demonstation data should be used."
 
         if isinstance(task, (TextClassification, Summarization)):
             return DE_PROMPT_WITH_DEMONSTRATION_SIM
-- 
GitLab