mirror of https://github.com/ollama/ollama
84 lines
2.4 KiB
Go
84 lines
2.4 KiB
Go
package deepseekocr
|
|
|
|
import (
|
|
"bytes"
|
|
"image"
|
|
"image/color"
|
|
"math"
|
|
"slices"
|
|
|
|
"golang.org/x/image/draw"
|
|
|
|
"github.com/ollama/ollama/ml"
|
|
"github.com/ollama/ollama/model/imageproc"
|
|
)
|
|
|
|
type ratio struct {
|
|
x, y int
|
|
}
|
|
|
|
func ProcessImage(ctx ml.Context, bts []byte) (ml.Tensor, ml.Tensor, []int, error) {
|
|
img, _, err := image.Decode(bytes.NewReader(bts))
|
|
if err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
|
|
minNum, maxNum, imageSize, baseSize := 2, 9, 640, 1024
|
|
var targetRatios []ratio
|
|
for n := minNum; n <= maxNum; n++ {
|
|
for i := 1; i <= n; i++ {
|
|
for j := 1; j <= n; j++ {
|
|
if i*j <= maxNum && i*j >= minNum && !slices.Contains(targetRatios, ratio{i, j}) {
|
|
targetRatios = append(targetRatios, ratio{i, j})
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
targetRatio := findBestAspectRatio(targetRatios, img.Bounds().Dx(), img.Bounds().Dy(), imageSize)
|
|
targetWidth, targetHeight := imageSize*targetRatio.x, imageSize*targetRatio.y
|
|
blocks := targetRatio.x * targetRatio.y
|
|
|
|
mean := imageproc.ImageNetStandardMean
|
|
std := imageproc.ImageNetStandardSTD
|
|
|
|
var patches []float32
|
|
resized := imageproc.Resize(img, image.Point{X: targetWidth, Y: targetHeight}, imageproc.ResizeBilinear)
|
|
for i := range blocks {
|
|
patch := image.NewRGBA(image.Rect(0, 0, imageSize, imageSize))
|
|
draw.Draw(patch, patch.Bounds(), resized, image.Point{
|
|
X: i % (targetWidth / imageSize) * imageSize,
|
|
Y: i / (targetWidth / imageSize) * imageSize,
|
|
}, draw.Over)
|
|
|
|
patches = append(patches, imageproc.Normalize(patch, mean, std, true, true)...)
|
|
}
|
|
|
|
img = imageproc.CompositeColor(img, color.Gray{})
|
|
img = imageproc.Pad(img, image.Point{X: baseSize, Y: baseSize}, color.Gray{127}, draw.BiLinear)
|
|
|
|
return ctx.Input().FromFloats(patches, imageSize, imageSize, 3, blocks),
|
|
ctx.Input().FromFloats(imageproc.Normalize(img, mean, std, true, true), baseSize, baseSize, 3),
|
|
[]int{targetRatio.x, targetRatio.y},
|
|
nil
|
|
}
|
|
|
|
func findBestAspectRatio(targetRatios []ratio, width, height, imageSize int) ratio {
|
|
bestDiff := math.MaxFloat64
|
|
best := ratio{1, 1}
|
|
realRatio := float64(width) / float64(height)
|
|
for _, target := range targetRatios {
|
|
targetRatio := float64(target.x) / float64(target.y)
|
|
diff := math.Abs(realRatio - targetRatio)
|
|
if diff < bestDiff {
|
|
bestDiff = diff
|
|
best = target
|
|
} else if diff == bestDiff {
|
|
if float64(width*height) > 0.5*float64(imageSize*imageSize*best.x*best.y) {
|
|
best = target
|
|
}
|
|
}
|
|
}
|
|
return best
|
|
}
|