diff --git a/evoprompt/evolution/evolution.py b/evoprompt/evolution/evolution.py
index 7eae7c2af18d1fbd7fc1eb4e6852f64a66f13e28..d3e0ec338c8356d3062868034770abd29ecd3fa6 100644
--- a/evoprompt/evolution/evolution.py
+++ b/evoprompt/evolution/evolution.py
@@ -108,6 +108,7 @@ class EvolutionAlgorithm(PromptOptimization, metaclass=ABCMeta):
         end_token: str = "</prompt>",
         allow_missing_end_token: bool = True,
     ):
+        # TODO another option would be to select the first match that is not equal to " and " (which is part of the instruction and usually repeated in the response)
         matches = re.findall(
             # regex that matches any characters between last pair of `start_token` and `end_token`, and optionally allow missing `end_token`
             rf"{start_token}(?!.*{start_token})(?:(.*){end_token}{"|(.*)" if allow_missing_end_token else ""})",
@@ -297,7 +298,7 @@ class GeneticAlgorithm(EvolutionAlgorithm):
         )
 
         judgement = self.judge_and_correct_step(
-            filled_prompt, response, history, recent_turn
+            filled_prompt, response, history=None, recent_turn=recent_turn
         )
 
         if judgement.skip:
@@ -410,11 +411,11 @@ class DifferentialEvolution(EvolutionAlgorithm):
         )
 
         judgement = self.judge_and_correct_step(
-            filled_prompt, response, history, recent_turn
+            filled_prompt, response, history=None, recent_turn=recent_turn
         )
 
         if judgement.skip:
-            # skip this prompt, for DE this means using the basic prompt
+            # user asked to skip this prompt, for DE this means using the basic prompt
             return (
                 prompts_current_evolution[current_iteration].content,
                 [judgement],
@@ -532,11 +533,11 @@ class DifferentialEvolutionWithCot(DifferentialEvolution):
                 self.evolution_model._get_user_message(filled_prompt)
             )
             # TODO Shall we still use only a single turn containing all messages if we do not use demonstrations for evolution?
-            messages = self.condense_messages(evolutions_steps)
+            prompt = self.condense_messages(evolutions_steps, return_str=True)
             response, history, recent_turn, usage = (
                 self.evolution_model.create_completion(
                     system_message=SYSTEM_MESSAGE,
-                    messages=messages,
+                    prompt=prompt,
                     history=messages_demos,
                     # the models often repeat the instuction which could also contain </prompt> therefore we should not stop early
                     stop=None,
@@ -552,13 +553,18 @@ class DifferentialEvolutionWithCot(DifferentialEvolution):
                 history + recent_turn,
                 response,
             )
+            # TODO use serialized messages as prompt or use previous evolution steps as history?
             judgement = self.judge_and_correct_step(
-                filled_prompt, response, history=history, recent_turn=recent_turn
+                filled_prompt,
+                response,
+                history=evolutions_steps[:-2],
+                recent_turn=recent_turn,
+                # prompt, response, history=None, recent_turn=recent_turn
             )
             judgements.append(judgement)
 
             if judgement.skip:
-                # skip this prompt, for DE this means using the basic prompt
+                # user asked to skip this prompt, for DE this means using the basic prompt
                 return (
                     prompts_current_evolution[current_iteration].content,
                     judgements,
@@ -595,8 +601,12 @@ class DifferentialEvolutionWithCot(DifferentialEvolution):
 
         return evolved_prompt, judgements, usage
 
-    def condense_messages(self, messages: list[ChatMessages]) -> list[dict]:
+    def condense_messages(
+        self, messages: list[ChatMessages], return_str: bool = False
+    ) -> list[dict] | str:
         if not messages:
+            if return_str:
+                return ""
             return []
 
         if messages[-1]["role"] == "assistant":
@@ -606,6 +616,12 @@ class DifferentialEvolutionWithCot(DifferentialEvolution):
             assistant_turn = None
 
         user_turn = "\n\n".join(message["content"] for message in messages)
+        if return_str:
+            assert (
+                assistant_turn is None
+            ), "Cannot return string if most recent turn is from assistant."
+            return user_turn
+
         messages = [self.evolution_model._get_user_message(user_turn)]
         if assistant_turn is not None:
             messages.append(assistant_turn)
diff --git a/evoprompt/optimization.py b/evoprompt/optimization.py
index ffeded99da45d76fd73728b9f9b9a174ef4a5e82..de7ffb5d3748ca6851b9456286708f08dd5aa5b6 100644
--- a/evoprompt/optimization.py
+++ b/evoprompt/optimization.py
@@ -67,16 +67,17 @@ class ResponseEditor(App):
 
     def compose(self) -> ComposeResult:
         self.text_area = TextArea.code_editor(self.response, soft_wrap=True)
-        yield ScrollableContainer(
-            *(
-                Collapsible(
-                    Static(message["content"]),
-                    title=message["role"],
-                    collapsed=idx != len(self.history) - 1,
+        if self.history is not None:
+            yield ScrollableContainer(
+                *(
+                    Collapsible(
+                        Static(message["content"]),
+                        title=message["role"],
+                        collapsed=idx != len(self.history) - 1,
+                    )
+                    for idx, message in enumerate(self.history)
                 )
-                for idx, message in enumerate(self.history)
             )
-        )
         yield ScrollableContainer(
             Label(Panel(self.judge_response, title="Judge response")),
             Label(Rule(title="Response to edit"), expand=True),
@@ -347,7 +348,7 @@ class PromptOptimization:
         self,
         instruction: str,
         response: str,
-        history: ChatMessages,
+        history: ChatMessages | None,
         recent_turn: ChatMessages,
     ) -> Judgement:
         # TODO potentially move to separate class wrapping the judge model and related functionality
@@ -357,27 +358,37 @@ class PromptOptimization:
         # judge the actual response
 
         # concatenate all user and assistant messages to provide context
-        history_str = "\n".join(
-            message["content"]
-            for message in history
-            if message["role"] in ["user", "assistant"]
-        )
-        # TODO What if the history does not exist (is empty), i.e., for the first step in de-cot?
-        prompt = (
-            f"Context: {history_str}\nInstruction: {instruction}\nResponse: {response}"
-        )
-        system_message = (
-            "You are acting as a judge. Please read the context, the instruction and the response "
-            "and decide if the response follows the instruction. "
-            "If it does, answer 'good'. If it does not, answer 'bad'. "
-            "Wrap the answer with tags <judgement> and </judgement>. "
-            "Please also add an explanation for your judgement."
-        )
+        # if there is no history, only show instruction and response
+        if history:
+            history_str = "\n".join(
+                message["content"]
+                for message in history
+                if message["role"] in ["user", "assistant"]
+            )
+            prompt = f"Context:\n{history_str}\n\nInstruction:\n{instruction}\n\nResponse:\n{response}"
+            system_message = (
+                "You are acting as a judge. Please read the context, the instruction and the response "
+                "and decide if the response follows the instruction. "
+                "If it does, answer 'good'. If it does not, answer 'bad'. "
+                "Wrap the answer with tags <judgement> and </judgement>. "
+                "Please also add an explanation for your judgement."
+            )
+        else:
+            prompt = f"Instruction:\n{instruction}\n\nResponse:\n{response}"
+            system_message = (
+                "You are acting as a judge. Please read the instruction and the response "
+                "and decide if the response follows the instruction. "
+                "If it does, answer 'good'. If it does not, answer 'bad'. "
+                "Wrap the answer with tags <judgement> and </judgement>. "
+                "Please also add an explanation for your judgement."
+            )
+
         # input(f"System message:\n{system_message}\n\nPrompt:\n{prompt}\n")
         judgement_response, _, _, _ = self.judge_model.create_completion(
             system_message=system_message,
             prompt=prompt,
         )
+        # input(f"Judgement response:\n{judgement_response}\n")
         matches = re.findall(
             # regex that matches `good` and `bad` between <judgement> and </judgement> where additional characters can be present, e.g., whitespace
             r"<judgement>.*(good|bad).*</judgement>",
@@ -407,7 +418,7 @@ class PromptOptimization:
         editor = ResponseEditor(
             instruction,
             response,
-            history[:-1],
+            history[:-1] if history is not None else None,
             judge_response=judgement_response,
         )
         editor.run()
@@ -418,13 +429,12 @@ class PromptOptimization:
             delta = Differ().compare(
                 response.splitlines(), editor.modified_response.splitlines()
             )
+            delta = [
+                line for line in delta if line.startswith("+") or line.startswith("-")
+            ]
             logger.info(
-                "User corrected prompt (delta):\n%s",
-                "\n".join(
-                    line
-                    for line in delta
-                    if line.startswith("+") or line.startswith("-")
-                ),
+                "User corrected prompt (delta):%s",
+                ("\n" + "\n".join(delta)) if delta else " No changes",
                 # "User corrected prompt:\n'%s'\n -> \n'%s'",
                 # response,
                 # editor.modified_response,