use ollama engine for bert models (#13501)

register bpe tokenizer which enables granite-embedding
This commit is contained in:
Michael Yang 2025-12-16 11:29:19 -08:00 committed by GitHub
parent 89eb795293
commit 903b1fc97f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 33 additions and 30 deletions

View File

@ -241,19 +241,20 @@ func (kv KV) Bools(key string, defaultValue ...[]bool) []bool {
func (kv KV) OllamaEngineRequired() bool {
return slices.Contains([]string{
"bert",
"deepseek2",
"deepseekocr",
"gemma3",
"gemma3n",
"gptoss", "gpt-oss",
"llama4",
"mistral3",
"mllama",
"nomic-bert",
"olmo3",
"qwen25vl",
"qwen3", "qwen3moe",
"qwen3vl", "qwen3vlmoe",
"deepseekocr",
"deepseek2",
"nomic-bert",
"olmo3",
}, kv.Architecture())
}
@ -839,6 +840,7 @@ func (f GGML) SupportsFlashAttention() bool {
// FlashAttention checks if the model should enable flash attention
func (f GGML) FlashAttention() bool {
return slices.Contains([]string{
"bert",
"gemma3",
"gptoss", "gpt-oss",
"mistral3",

View File

@ -129,11 +129,7 @@ func (o Options) headDim() int {
}
func New(c fs.Config) (model.Model, error) {
var processor model.TextProcessor
switch c.String("tokenizer.ggml.model", "bert") {
case "bert":
processor = model.NewWordPiece(
&model.Vocabulary{
vocab := &model.Vocabulary{
Values: c.Strings("tokenizer.ggml.tokens"),
Scores: c.Floats("tokenizer.ggml.scores"),
Types: c.Ints("tokenizer.ggml.token_type"),
@ -155,9 +151,14 @@ func New(c fs.Config) (model.Model, error) {
c.Uint("tokenizer.ggml.eos_token_id"),
)),
},
},
true,
)
}
var processor model.TextProcessor
switch c.String("tokenizer.ggml.model", "bert") {
case "bert":
processor = model.NewWordPiece(vocab, true)
case "gpt2":
processor = model.NewBytePairEncoding(vocab)
default:
return nil, model.ErrUnsupportedTokenizer
}