From 691ced5275ca84b865d13fea79fbcba1383b479f Mon Sep 17 00:00:00 2001
From: Maximilian Schmidt <maximilian.schmidt@ims.uni-stuttgart.de>
Date: Fri, 16 Aug 2024 16:48:10 +0200
Subject: [PATCH] Add CLI option for maximum number of tokens being generated

---
 evoprompt/models.py    | 7 ++++++-
 evoprompt/task/task.py | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/evoprompt/models.py b/evoprompt/models.py
index 416eb2c..3131fff 100644
--- a/evoprompt/models.py
+++ b/evoprompt/models.py
@@ -67,12 +67,12 @@ class LLMModel(ABC):
         prompt_suffix: str = "",
         chat: bool | None = None,
         stop: str = None,
-        max_tokens: int = None,
         history: dict = None,
         **kwargs: Any,
     ) -> tuple[str, ModelUsage]:
         if chat is None:
             chat = self.chat
+        max_tokens = kwargs.pop("max_tokens", self.options.max_tokens)
 
         # create prompt
         prompt = prompt_prefix + prompt + prompt_suffix + prompt_appendix
@@ -334,4 +334,9 @@ argument_group.add_argument(
     "--disable-cache",
     action="store_true",
 )
+argument_group.add_argument(
+    "--max-tokens",
+    type=int,
+    help="Maximum number of tokens being generated from LLM. ",
+)
 argument_group.add_argument("--chat", "-c", action="store_true")
diff --git a/evoprompt/task/task.py b/evoprompt/task/task.py
index e623b3d..8b27328 100644
--- a/evoprompt/task/task.py
+++ b/evoprompt/task/task.py
@@ -318,6 +318,7 @@ class Task(metaclass=ABCMeta):
             prompt_appendix=self._get_prompt_text_for_datum(datum),
             # grammar can be applied to constrain the model output
             grammar=self._get_grammar(datum) if self.use_grammar else None,
+            # we use cached completions to speed up the process although we loose the non-deterministic behavior of LMs, but we're ok with a single result
             use_cache=True,
         )
 
-- 
GitLab