Offload all layers to GPU for llama_cpp

3faa07e9 · Max Kimmich · cbade5a0 · 3faa07e9
Commit 3faa07e9 authored 5 months ago by Max Kimmich
--- a/evoprompt/models.py
+++ b/evoprompt/models.py
@@ -209,7 +209,7 @@ class Llama(LLMModel):
        chat_handler: str | None = None,
        verbose: int | bool = False,
        llama_verbose: bool = False,
-        n_gpu_layers: int = 60,
+        n_gpu_layers: int = -1,
        n_threads: int = 8,
        n_ctx: int = 8192,
        **kwargs,