From 903b1fc97f37fda25fd233ed853355acfc0f63cf Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Tue, 16 Dec 2025 11:29:19 -0800
Subject: [PATCH] use ollama engine for bert models (#13501)

register bpe tokenizer which enables granite-embedding
---
 fs/ggml/ggml.go            | 10 ++++---
 model/models/bert/embed.go | 53 +++++++++++++++++++-------------------
 2 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go
index 56614a321..44a48511c 100644
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -241,19 +241,20 @@ func (kv KV) Bools(key string, defaultValue ...[]bool) []bool {
 
 func (kv KV) OllamaEngineRequired() bool {
 	return slices.Contains([]string{
+		"bert",
+		"deepseek2",
+		"deepseekocr",
 		"gemma3",
 		"gemma3n",
 		"gptoss", "gpt-oss",
 		"llama4",
 		"mistral3",
 		"mllama",
+		"nomic-bert",
+		"olmo3",
 		"qwen25vl",
 		"qwen3", "qwen3moe",
 		"qwen3vl", "qwen3vlmoe",
-		"deepseekocr",
-		"deepseek2",
-		"nomic-bert",
-		"olmo3",
 	}, kv.Architecture())
 }
 
@@ -839,6 +840,7 @@ func (f GGML) SupportsFlashAttention() bool {
 // FlashAttention checks if the model should enable flash attention
 func (f GGML) FlashAttention() bool {
 	return slices.Contains([]string{
+		"bert",
 		"gemma3",
 		"gptoss", "gpt-oss",
 		"mistral3",
diff --git a/model/models/bert/embed.go b/model/models/bert/embed.go
index 5e7ca5e92..705c63138 100644
--- a/model/models/bert/embed.go
+++ b/model/models/bert/embed.go
@@ -129,35 +129,36 @@ func (o Options) headDim() int {
 }
 
 func New(c fs.Config) (model.Model, error) {
+	vocab := &model.Vocabulary{
+		Values: c.Strings("tokenizer.ggml.tokens"),
+		Scores: c.Floats("tokenizer.ggml.scores"),
+		Types:  c.Ints("tokenizer.ggml.token_type"),
+		AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
+		BOS: []int32{
+			int32(cmp.Or(
+				c.Uint("tokenizer.ggml.cls_token_id"),
+				c.Uint("tokenizer.ggml.bos_token_id"),
+			)),
+		},
+		AddEOS: c.Bool("tokenizer.ggml.add_eos_token", true),
+		EOS: []int32{
+			int32(cmp.Or(
+				c.Uint("tokenizer.ggml.separator_token_id"),
+				//nolint:misspell
+				// NOTE: "seperator_token_id" is a typo in model metadata but we need to
+				// support it for compatibility.
+				c.Uint("tokenizer.ggml.seperator_token_id"),
+				c.Uint("tokenizer.ggml.eos_token_id"),
+			)),
+		},
+	}
+
 	var processor model.TextProcessor
 	switch c.String("tokenizer.ggml.model", "bert") {
 	case "bert":
-		processor = model.NewWordPiece(
-			&model.Vocabulary{
-				Values: c.Strings("tokenizer.ggml.tokens"),
-				Scores: c.Floats("tokenizer.ggml.scores"),
-				Types:  c.Ints("tokenizer.ggml.token_type"),
-				AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
-				BOS: []int32{
-					int32(cmp.Or(
-						c.Uint("tokenizer.ggml.cls_token_id"),
-						c.Uint("tokenizer.ggml.bos_token_id"),
-					)),
-				},
-				AddEOS: c.Bool("tokenizer.ggml.add_eos_token", true),
-				EOS: []int32{
-					int32(cmp.Or(
-						c.Uint("tokenizer.ggml.separator_token_id"),
-						//nolint:misspell
-						// NOTE: "seperator_token_id" is a typo in model metadata but we need to
-						// support it for compatibility.
-						c.Uint("tokenizer.ggml.seperator_token_id"),
-						c.Uint("tokenizer.ggml.eos_token_id"),
-					)),
-				},
-			},
-			true,
-		)
+		processor = model.NewWordPiece(vocab, true)
+	case "gpt2":
+		processor = model.NewBytePairEncoding(vocab)
 	default:
 		return nil, model.ErrUnsupportedTokenizer
 	}