From 773089515805bc32391f6662656824ae58d573a4 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Fri, 12 Dec 2025 11:48:43 -0800 Subject: [PATCH] Enable Ollama engine by default (#13443) This changes the default behavior to use the Ollama engine for supported models, while retaining the ability to disable the Ollama engine and fall back to the Llama engine. Models in the OllamaEngineRequired list will always run on the Ollama engine. --- envconfig/config.go | 4 ++-- llm/server.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/envconfig/config.go b/envconfig/config.go index 238e5e6e1..c0b2e2f01 100644 --- a/envconfig/config.go +++ b/envconfig/config.go @@ -199,7 +199,7 @@ var ( // MultiUserCache optimizes prompt caching for multi-user scenarios MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE") // Enable the new Ollama engine - NewEngine = Bool("OLLAMA_NEW_ENGINE") + NewEngine = BoolWithDefault("OLLAMA_NEW_ENGINE") // ContextLength sets the default context length ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 4096) // Auth enables authentication between the Ollama client and server @@ -291,7 +291,7 @@ func AsMap() map[string]EnvVar { "OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"}, "OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"}, "OLLAMA_CONTEXT_LENGTH": {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4096)"}, - "OLLAMA_NEW_ENGINE": {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"}, + "OLLAMA_NEW_ENGINE": {"OLLAMA_NEW_ENGINE", NewEngine(true), "Enable the new Ollama engine"}, "OLLAMA_REMOTES": {"OLLAMA_REMOTES", Remotes(), "Allowed hosts for remote models (default \"ollama.com\")"}, // Informational diff --git a/llm/server.go b/llm/server.go index 5c232f0fa..abf6035dd 100644 --- a/llm/server.go +++ b/llm/server.go @@ -143,7 +143,7 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st var llamaModel *llama.Model var textProcessor model.TextProcessor var err error - if envconfig.NewEngine() || f.KV().OllamaEngineRequired() { + if envconfig.NewEngine(true) || f.KV().OllamaEngineRequired() { if len(projectors) == 0 { textProcessor, err = model.NewTextProcessor(modelPath) } else {