From 903b1fc97f37fda25fd233ed853355acfc0f63cf Mon Sep 17 00:00:00 2001 From: Michael Yang Date: Tue, 16 Dec 2025 11:29:19 -0800 Subject: [PATCH] use ollama engine for bert models (#13501) register bpe tokenizer which enables granite-embedding --- fs/ggml/ggml.go | 10 ++++--- model/models/bert/embed.go | 53 +++++++++++++++++++------------------- 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go index 56614a321..44a48511c 100644 --- a/fs/ggml/ggml.go +++ b/fs/ggml/ggml.go @@ -241,19 +241,20 @@ func (kv KV) Bools(key string, defaultValue ...[]bool) []bool { func (kv KV) OllamaEngineRequired() bool { return slices.Contains([]string{ + "bert", + "deepseek2", + "deepseekocr", "gemma3", "gemma3n", "gptoss", "gpt-oss", "llama4", "mistral3", "mllama", + "nomic-bert", + "olmo3", "qwen25vl", "qwen3", "qwen3moe", "qwen3vl", "qwen3vlmoe", - "deepseekocr", - "deepseek2", - "nomic-bert", - "olmo3", }, kv.Architecture()) } @@ -839,6 +840,7 @@ func (f GGML) SupportsFlashAttention() bool { // FlashAttention checks if the model should enable flash attention func (f GGML) FlashAttention() bool { return slices.Contains([]string{ + "bert", "gemma3", "gptoss", "gpt-oss", "mistral3", diff --git a/model/models/bert/embed.go b/model/models/bert/embed.go index 5e7ca5e92..705c63138 100644 --- a/model/models/bert/embed.go +++ b/model/models/bert/embed.go @@ -129,35 +129,36 @@ func (o Options) headDim() int { } func New(c fs.Config) (model.Model, error) { + vocab := &model.Vocabulary{ + Values: c.Strings("tokenizer.ggml.tokens"), + Scores: c.Floats("tokenizer.ggml.scores"), + Types: c.Ints("tokenizer.ggml.token_type"), + AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true), + BOS: []int32{ + int32(cmp.Or( + c.Uint("tokenizer.ggml.cls_token_id"), + c.Uint("tokenizer.ggml.bos_token_id"), + )), + }, + AddEOS: c.Bool("tokenizer.ggml.add_eos_token", true), + EOS: []int32{ + int32(cmp.Or( + c.Uint("tokenizer.ggml.separator_token_id"), + //nolint:misspell + // NOTE: "seperator_token_id" is a typo in model metadata but we need to + // support it for compatibility. + c.Uint("tokenizer.ggml.seperator_token_id"), + c.Uint("tokenizer.ggml.eos_token_id"), + )), + }, + } + var processor model.TextProcessor switch c.String("tokenizer.ggml.model", "bert") { case "bert": - processor = model.NewWordPiece( - &model.Vocabulary{ - Values: c.Strings("tokenizer.ggml.tokens"), - Scores: c.Floats("tokenizer.ggml.scores"), - Types: c.Ints("tokenizer.ggml.token_type"), - AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true), - BOS: []int32{ - int32(cmp.Or( - c.Uint("tokenizer.ggml.cls_token_id"), - c.Uint("tokenizer.ggml.bos_token_id"), - )), - }, - AddEOS: c.Bool("tokenizer.ggml.add_eos_token", true), - EOS: []int32{ - int32(cmp.Or( - c.Uint("tokenizer.ggml.separator_token_id"), - //nolint:misspell - // NOTE: "seperator_token_id" is a typo in model metadata but we need to - // support it for compatibility. - c.Uint("tokenizer.ggml.seperator_token_id"), - c.Uint("tokenizer.ggml.eos_token_id"), - )), - }, - }, - true, - ) + processor = model.NewWordPiece(vocab, true) + case "gpt2": + processor = model.NewBytePairEncoding(vocab) default: return nil, model.ErrUnsupportedTokenizer }