From 773089515805bc32391f6662656824ae58d573a4 Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <dhiltgen@users.noreply.github.com>
Date: Fri, 12 Dec 2025 11:48:43 -0800
Subject: [PATCH] Enable Ollama engine by default (#13443)

This changes the default behavior to use the Ollama engine for supported
models, while retaining the ability to disable the Ollama engine and
fall back to the Llama engine.  Models in the OllamaEngineRequired list
will always run on the Ollama engine.
---
 envconfig/config.go | 4 ++--
 llm/server.go       | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/envconfig/config.go b/envconfig/config.go
index 238e5e6e1..c0b2e2f01 100644
--- a/envconfig/config.go
+++ b/envconfig/config.go
@@ -199,7 +199,7 @@ var (
 	// MultiUserCache optimizes prompt caching for multi-user scenarios
 	MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE")
 	// Enable the new Ollama engine
-	NewEngine = Bool("OLLAMA_NEW_ENGINE")
+	NewEngine = BoolWithDefault("OLLAMA_NEW_ENGINE")
 	// ContextLength sets the default context length
 	ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 4096)
 	// Auth enables authentication between the Ollama client and server
@@ -291,7 +291,7 @@ func AsMap() map[string]EnvVar {
 		"OLLAMA_SCHED_SPREAD":      {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
 		"OLLAMA_MULTIUSER_CACHE":   {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"},
 		"OLLAMA_CONTEXT_LENGTH":    {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4096)"},
-		"OLLAMA_NEW_ENGINE":        {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"},
+		"OLLAMA_NEW_ENGINE":        {"OLLAMA_NEW_ENGINE", NewEngine(true), "Enable the new Ollama engine"},
 		"OLLAMA_REMOTES":           {"OLLAMA_REMOTES", Remotes(), "Allowed hosts for remote models (default \"ollama.com\")"},
 
 		// Informational
diff --git a/llm/server.go b/llm/server.go
index 5c232f0fa..abf6035dd 100644
--- a/llm/server.go
+++ b/llm/server.go
@@ -143,7 +143,7 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st
 	var llamaModel *llama.Model
 	var textProcessor model.TextProcessor
 	var err error
-	if envconfig.NewEngine() || f.KV().OllamaEngineRequired() {
+	if envconfig.NewEngine(true) || f.KV().OllamaEngineRequired() {
 		if len(projectors) == 0 {
 			textProcessor, err = model.NewTextProcessor(modelPath)
 		} else {