Skip to content
Snippets Groups Projects
Commit 691ced52 authored by Max Kimmich's avatar Max Kimmich
Browse files

Add CLI option for maximum number of tokens being generated

parent 8ba5ca7f
No related branches found
No related tags found
1 merge request!1Refactor models
......@@ -67,12 +67,12 @@ class LLMModel(ABC):
prompt_suffix: str = "",
chat: bool | None = None,
stop: str = None,
max_tokens: int = None,
history: dict = None,
**kwargs: Any,
) -> tuple[str, ModelUsage]:
if chat is None:
chat = self.chat
max_tokens = kwargs.pop("max_tokens", self.options.max_tokens)
# create prompt
prompt = prompt_prefix + prompt + prompt_suffix + prompt_appendix
......@@ -334,4 +334,9 @@ argument_group.add_argument(
"--disable-cache",
action="store_true",
)
argument_group.add_argument(
"--max-tokens",
type=int,
help="Maximum number of tokens being generated from LLM. ",
)
argument_group.add_argument("--chat", "-c", action="store_true")
......@@ -318,6 +318,7 @@ class Task(metaclass=ABCMeta):
prompt_appendix=self._get_prompt_text_for_datum(datum),
# grammar can be applied to constrain the model output
grammar=self._get_grammar(datum) if self.use_grammar else None,
# we use cached completions to speed up the process although we loose the non-deterministic behavior of LMs, but we're ok with a single result
use_cache=True,
)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment