Skip to content
Snippets Groups Projects
Commit 9462d643 authored by Grießhaber Daniel's avatar Grießhaber Daniel :squid:
Browse files

allow to diable llmaaj by not specifying judgement engine

parent e751763b
No related branches found
No related tags found
1 merge request!5Llm as a judge
...@@ -46,7 +46,7 @@ class EvolutionAlgorithm(PromptOptimization, metaclass=ABCMeta): ...@@ -46,7 +46,7 @@ class EvolutionAlgorithm(PromptOptimization, metaclass=ABCMeta):
task: Task, task: Task,
evolution_model: LLMModel, evolution_model: LLMModel,
evaluation_model: LLMModel, evaluation_model: LLMModel,
judge_model: LLMModel, judge_model: LLMModel | None,
run_options: dict[str, Any] = {}, run_options: dict[str, Any] = {},
) -> None: ) -> None:
super().__init__( super().__init__(
......
...@@ -24,7 +24,7 @@ PromptSource = Literal["baseprompt", "paraphrase", "evolution", "corrected"] ...@@ -24,7 +24,7 @@ PromptSource = Literal["baseprompt", "paraphrase", "evolution", "corrected"]
class Judgement(NamedTuple): class Judgement(NamedTuple):
original_response: str original_response: str
corrected_response: str corrected_response: str
happy: bool happy: bool | None
class PromptMeta(TypedDict): class PromptMeta(TypedDict):
...@@ -122,7 +122,7 @@ class PromptOptimization: ...@@ -122,7 +122,7 @@ class PromptOptimization:
task: Task, task: Task,
evolution_model: LLMModel, evolution_model: LLMModel,
evaluation_model: LLMModel, evaluation_model: LLMModel,
judge_model: LLMModel, judge_model: LLMModel | None,
run_options: dict[str, Any] = {}, run_options: dict[str, Any] = {},
) -> None: ) -> None:
self.task = task self.task = task
...@@ -278,6 +278,9 @@ class PromptOptimization: ...@@ -278,6 +278,9 @@ class PromptOptimization:
def judge_and_correct_step( def judge_and_correct_step(
self, instruction: str, response: str, history: ChatMessages self, instruction: str, response: str, history: ChatMessages
) -> Judgement: ) -> Judgement:
if self.judge_model is None:
return Judgement(response, response, happy=None)
# TODO: judge the actual response # TODO: judge the actual response
judge_happy = False judge_happy = False
......
...@@ -69,13 +69,10 @@ if __name__ == "__main__": ...@@ -69,13 +69,10 @@ if __name__ == "__main__":
case "openai": case "openai":
logger.info(f"Using {options.openai_model} as the evolution engine") logger.info(f"Using {options.openai_model} as the evolution engine")
judge_model: LLMModel judge_model: LLMModel | None
if options.judge_engine is not None: if options.judge_engine is not None:
judge_model = LLMModel.get_model(options.judge_engine, options=options) judge_model = LLMModel.get_model(options.judge_engine, options=options)
logger.info(f"Using {options.judge_engine} as the judge engine") logger.info(f"Using {options.judge_engine} as the judge engine")
else:
judge_model = evolution_model
logger.info("Using the same model for judging as for evolution")
# set up evaluation model # set up evaluation model
# NOTE currenty we always stick to Llama as evaluation engine # NOTE currenty we always stick to Llama as evaluation engine
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment