Skip to content
Snippets Groups Projects
Commit da2cc6ed authored by Max Kimmich's avatar Max Kimmich
Browse files

Add judgement using judge model and update editor rendering

parent 0099d51c
No related branches found
No related tags found
No related merge requests found
import json
import logging
from pathlib import Path
import re
from typing import Any, Literal, NamedTuple, Optional, TypedDict
from textual.app import App, ComposeResult
from textual.binding import Binding
from textual.widgets import Collapsible, Footer, Label, TextArea
from textual.containers import ScrollableContainer
from textual.widgets import Collapsible, Footer, Label, TextArea, Static
from rich.panel import Panel
from rich.rule import Rule
from tqdm import tqdm, trange
from evoprompt.models import ChatMessages, LLMModel
......@@ -40,21 +44,36 @@ class ResponseEditor(App):
),
]
def __init__(self, instruction: str, original_response: str, history: ChatMessages):
def __init__(
self,
instruction: str,
original_response: str,
history: ChatMessages,
judge_response: str,
):
self.instruction = instruction
self.response = original_response
self.history = history
self.judge_response = judge_response
super().__init__()
def compose(self) -> ComposeResult:
self.text_area = TextArea.code_editor(self.response)
for idx, message in enumerate(self.history[:-1]):
yield Collapsible(
Label(message["content"]),
title=message["role"],
collapsed=idx != len(self.history) - 2,
self.text_area = TextArea.code_editor(self.response, soft_wrap=True)
yield ScrollableContainer(
*(
Collapsible(
Static(message["content"]),
title=message["role"],
collapsed=idx != len(self.history) - 1,
)
for idx, message in enumerate(self.history)
)
yield self.text_area
)
yield ScrollableContainer(
Label(Panel(self.judge_response, title="Judge response")),
Label(Rule(title="Response to edit"), expand=True),
self.text_area,
)
yield Footer()
@property
......@@ -281,18 +300,46 @@ class PromptOptimization:
if self.judge_model is None:
return Judgement(response, response, happy=None)
# TODO: judge the actual response
judge_happy = False
# judge the actual response
prompt = f"Instruction: {instruction}\nResponse: {response}"
system_message = (
"You are acting as a judge. Please read the instruction and the response and decide "
"if the response follows the instruction. "
"If it does, answer 'good'. If it does not, answer 'bad'. "
"Wrap the answer with tags <judgement> and </judgement>. "
"Please also add an explanation for your judgement."
)
judgement_response, _, _ = self.judge_model.create_completion(
system_message=system_message,
prompt=prompt,
)
matches = re.findall(
# regex that matches `good` and `bad` between <judgement> and </judgement> where additional characters can be present, e.g., whitespace
r"<judgement>.*(good|bad).*</judgement>",
judgement_response,
flags=(re.IGNORECASE),
)
# parse the judgement response
if matches:
judge_happy = True if matches[0].lower() == "good" else False
else:
judge_happy = None
logger.info(
f"{self.judge_model.__class__.__name__} judged the response as {'good' if judge_happy else 'bad'}"
)
if judge_happy:
return Judgement(response, response, happy=True)
logger.info(f"Prompt judged as bad. Letting User change the prompt.")
editor = ResponseEditor(instruction, response, history)
editor = ResponseEditor(
instruction,
response,
history[:-1],
judge_response=judgement_response,
)
editor.run()
return Judgement(response, editor.modified_response, happy=False)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment