diff --git a/envconfig/config.go b/envconfig/config.go index 238e5e6e1..c0b2e2f01 100644 --- a/envconfig/config.go +++ b/envconfig/config.go @@ -199,7 +199,7 @@ var ( // MultiUserCache optimizes prompt caching for multi-user scenarios MultiUserCache = Bool("OLLAMA_MULTIUSER_CACHE") // Enable the new Ollama engine - NewEngine = Bool("OLLAMA_NEW_ENGINE") + NewEngine = BoolWithDefault("OLLAMA_NEW_ENGINE") // ContextLength sets the default context length ContextLength = Uint("OLLAMA_CONTEXT_LENGTH", 4096) // Auth enables authentication between the Ollama client and server @@ -291,7 +291,7 @@ func AsMap() map[string]EnvVar { "OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"}, "OLLAMA_MULTIUSER_CACHE": {"OLLAMA_MULTIUSER_CACHE", MultiUserCache(), "Optimize prompt caching for multi-user scenarios"}, "OLLAMA_CONTEXT_LENGTH": {"OLLAMA_CONTEXT_LENGTH", ContextLength(), "Context length to use unless otherwise specified (default: 4096)"}, - "OLLAMA_NEW_ENGINE": {"OLLAMA_NEW_ENGINE", NewEngine(), "Enable the new Ollama engine"}, + "OLLAMA_NEW_ENGINE": {"OLLAMA_NEW_ENGINE", NewEngine(true), "Enable the new Ollama engine"}, "OLLAMA_REMOTES": {"OLLAMA_REMOTES", Remotes(), "Allowed hosts for remote models (default \"ollama.com\")"}, // Informational diff --git a/llm/server.go b/llm/server.go index 5c232f0fa..abf6035dd 100644 --- a/llm/server.go +++ b/llm/server.go @@ -143,7 +143,7 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st var llamaModel *llama.Model var textProcessor model.TextProcessor var err error - if envconfig.NewEngine() || f.KV().OllamaEngineRequired() { + if envconfig.NewEngine(true) || f.KV().OllamaEngineRequired() { if len(projectors) == 0 { textProcessor, err = model.NewTextProcessor(modelPath) } else {