use ollama engine for bert models (#13501)

register bpe tokenizer which enables granite-embedding
This commit is contained in:
Michael Yang 2025-12-16 11:29:19 -08:00 committed by GitHub
parent 89eb795293
commit 903b1fc97f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 33 additions and 30 deletions

View File

@ -241,19 +241,20 @@ func (kv KV) Bools(key string, defaultValue ...[]bool) []bool {
func (kv KV) OllamaEngineRequired() bool { func (kv KV) OllamaEngineRequired() bool {
return slices.Contains([]string{ return slices.Contains([]string{
"bert",
"deepseek2",
"deepseekocr",
"gemma3", "gemma3",
"gemma3n", "gemma3n",
"gptoss", "gpt-oss", "gptoss", "gpt-oss",
"llama4", "llama4",
"mistral3", "mistral3",
"mllama", "mllama",
"nomic-bert",
"olmo3",
"qwen25vl", "qwen25vl",
"qwen3", "qwen3moe", "qwen3", "qwen3moe",
"qwen3vl", "qwen3vlmoe", "qwen3vl", "qwen3vlmoe",
"deepseekocr",
"deepseek2",
"nomic-bert",
"olmo3",
}, kv.Architecture()) }, kv.Architecture())
} }
@ -839,6 +840,7 @@ func (f GGML) SupportsFlashAttention() bool {
// FlashAttention checks if the model should enable flash attention // FlashAttention checks if the model should enable flash attention
func (f GGML) FlashAttention() bool { func (f GGML) FlashAttention() bool {
return slices.Contains([]string{ return slices.Contains([]string{
"bert",
"gemma3", "gemma3",
"gptoss", "gpt-oss", "gptoss", "gpt-oss",
"mistral3", "mistral3",

View File

@ -129,35 +129,36 @@ func (o Options) headDim() int {
} }
func New(c fs.Config) (model.Model, error) { func New(c fs.Config) (model.Model, error) {
vocab := &model.Vocabulary{
Values: c.Strings("tokenizer.ggml.tokens"),
Scores: c.Floats("tokenizer.ggml.scores"),
Types: c.Ints("tokenizer.ggml.token_type"),
AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
BOS: []int32{
int32(cmp.Or(
c.Uint("tokenizer.ggml.cls_token_id"),
c.Uint("tokenizer.ggml.bos_token_id"),
)),
},
AddEOS: c.Bool("tokenizer.ggml.add_eos_token", true),
EOS: []int32{
int32(cmp.Or(
c.Uint("tokenizer.ggml.separator_token_id"),
//nolint:misspell
// NOTE: "seperator_token_id" is a typo in model metadata but we need to
// support it for compatibility.
c.Uint("tokenizer.ggml.seperator_token_id"),
c.Uint("tokenizer.ggml.eos_token_id"),
)),
},
}
var processor model.TextProcessor var processor model.TextProcessor
switch c.String("tokenizer.ggml.model", "bert") { switch c.String("tokenizer.ggml.model", "bert") {
case "bert": case "bert":
processor = model.NewWordPiece( processor = model.NewWordPiece(vocab, true)
&model.Vocabulary{ case "gpt2":
Values: c.Strings("tokenizer.ggml.tokens"), processor = model.NewBytePairEncoding(vocab)
Scores: c.Floats("tokenizer.ggml.scores"),
Types: c.Ints("tokenizer.ggml.token_type"),
AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
BOS: []int32{
int32(cmp.Or(
c.Uint("tokenizer.ggml.cls_token_id"),
c.Uint("tokenizer.ggml.bos_token_id"),
)),
},
AddEOS: c.Bool("tokenizer.ggml.add_eos_token", true),
EOS: []int32{
int32(cmp.Or(
c.Uint("tokenizer.ggml.separator_token_id"),
//nolint:misspell
// NOTE: "seperator_token_id" is a typo in model metadata but we need to
// support it for compatibility.
c.Uint("tokenizer.ggml.seperator_token_id"),
c.Uint("tokenizer.ggml.eos_token_id"),
)),
},
},
true,
)
default: default:
return nil, model.ErrUnsupportedTokenizer return nil, model.ErrUnsupportedTokenizer
} }