From 691ced5275ca84b865d13fea79fbcba1383b479f Mon Sep 17 00:00:00 2001 From: Maximilian Schmidt <maximilian.schmidt@ims.uni-stuttgart.de> Date: Fri, 16 Aug 2024 16:48:10 +0200 Subject: [PATCH] Add CLI option for maximum number of tokens being generated --- evoprompt/models.py | 7 ++++++- evoprompt/task/task.py | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/evoprompt/models.py b/evoprompt/models.py index 416eb2c..3131fff 100644 --- a/evoprompt/models.py +++ b/evoprompt/models.py @@ -67,12 +67,12 @@ class LLMModel(ABC): prompt_suffix: str = "", chat: bool | None = None, stop: str = None, - max_tokens: int = None, history: dict = None, **kwargs: Any, ) -> tuple[str, ModelUsage]: if chat is None: chat = self.chat + max_tokens = kwargs.pop("max_tokens", self.options.max_tokens) # create prompt prompt = prompt_prefix + prompt + prompt_suffix + prompt_appendix @@ -334,4 +334,9 @@ argument_group.add_argument( "--disable-cache", action="store_true", ) +argument_group.add_argument( + "--max-tokens", + type=int, + help="Maximum number of tokens being generated from LLM. ", +) argument_group.add_argument("--chat", "-c", action="store_true") diff --git a/evoprompt/task/task.py b/evoprompt/task/task.py index e623b3d..8b27328 100644 --- a/evoprompt/task/task.py +++ b/evoprompt/task/task.py @@ -318,6 +318,7 @@ class Task(metaclass=ABCMeta): prompt_appendix=self._get_prompt_text_for_datum(datum), # grammar can be applied to constrain the model output grammar=self._get_grammar(datum) if self.use_grammar else None, + # we use cached completions to speed up the process although we loose the non-deterministic behavior of LMs, but we're ok with a single result use_cache=True, ) -- GitLab