diff --git a/evoprompt/optimization.py b/evoprompt/optimization.py
index 94ef34013d2010563048db45962f9f8fa8563dff..4f419496f6f6d53d9103eb0f833361c16ecfb0e3 100644
--- a/evoprompt/optimization.py
+++ b/evoprompt/optimization.py
@@ -1,11 +1,15 @@
 import json
 import logging
 from pathlib import Path
+import re
 from typing import Any, Literal, NamedTuple, Optional, TypedDict
 
 from textual.app import App, ComposeResult
 from textual.binding import Binding
-from textual.widgets import Collapsible, Footer, Label, TextArea
+from textual.containers import ScrollableContainer
+from textual.widgets import Collapsible, Footer, Label, TextArea, Static
+from rich.panel import Panel
+from rich.rule import Rule
 from tqdm import tqdm, trange
 
 from evoprompt.models import ChatMessages, LLMModel
@@ -40,21 +44,36 @@ class ResponseEditor(App):
         ),
     ]
 
-    def __init__(self, instruction: str, original_response: str, history: ChatMessages):
+    def __init__(
+        self,
+        instruction: str,
+        original_response: str,
+        history: ChatMessages,
+        judge_response: str,
+    ):
         self.instruction = instruction
         self.response = original_response
         self.history = history
+        self.judge_response = judge_response
         super().__init__()
 
     def compose(self) -> ComposeResult:
-        self.text_area = TextArea.code_editor(self.response)
-        for idx, message in enumerate(self.history[:-1]):
-            yield Collapsible(
-                Label(message["content"]),
-                title=message["role"],
-                collapsed=idx != len(self.history) - 2,
+        self.text_area = TextArea.code_editor(self.response, soft_wrap=True)
+        yield ScrollableContainer(
+            *(
+                Collapsible(
+                    Static(message["content"]),
+                    title=message["role"],
+                    collapsed=idx != len(self.history) - 1,
+                )
+                for idx, message in enumerate(self.history)
             )
-        yield self.text_area
+        )
+        yield ScrollableContainer(
+            Label(Panel(self.judge_response, title="Judge response")),
+            Label(Rule(title="Response to edit"), expand=True),
+            self.text_area,
+        )
         yield Footer()
 
     @property
@@ -281,18 +300,46 @@ class PromptOptimization:
         if self.judge_model is None:
             return Judgement(response, response, happy=None)
 
-        # TODO: judge the actual response
-        judge_happy = False
+        # judge the actual response
+        prompt = f"Instruction: {instruction}\nResponse: {response}"
+        system_message = (
+            "You are acting as a judge. Please read the instruction and the response and decide "
+            "if the response follows the instruction. "
+            "If it does, answer 'good'. If it does not, answer 'bad'. "
+            "Wrap the answer with tags <judgement> and </judgement>. "
+            "Please also add an explanation for your judgement."
+        )
+        judgement_response, _, _ = self.judge_model.create_completion(
+            system_message=system_message,
+            prompt=prompt,
+        )
+        matches = re.findall(
+            # regex that matches `good` and `bad` between <judgement> and </judgement> where additional characters can be present, e.g., whitespace
+            r"<judgement>.*(good|bad).*</judgement>",
+            judgement_response,
+            flags=(re.IGNORECASE),
+        )
 
+        # parse the judgement response
+        if matches:
+            judge_happy = True if matches[0].lower() == "good" else False
+        else:
+            judge_happy = None
         logger.info(
             f"{self.judge_model.__class__.__name__} judged the response as {'good' if judge_happy else 'bad'}"
         )
+
         if judge_happy:
             return Judgement(response, response, happy=True)
 
         logger.info(f"Prompt judged as bad. Letting User change the prompt.")
 
-        editor = ResponseEditor(instruction, response, history)
+        editor = ResponseEditor(
+            instruction,
+            response,
+            history[:-1],
+            judge_response=judgement_response,
+        )
         editor.run()
 
         return Judgement(response, editor.modified_response, happy=False)