ollamarunner: fix deadlock

hardErrCh will deadlock since forwardBatch is blocked on computeStartedCh which never gets sent. since the response to hardErrCh is to panic, just panic instead
2025-10-10 16:38:12 -07:00 · 2025-10-10 16:38:12 -07:00 · 1a2feb2a97
parent aab2190420
commit 1a2feb2a97
1 changed files with 3 additions and 13 deletions
--- a/runner/ollamarunner/runner.go
+++ b/runner/ollamarunner/runner.go
@ -321,9 +321,6 @@ type Server struct {
 	// TODO (jmorganca): make this n_batch
 	batchSize int

-	// Used to signal a hard failure during async processing which will panic the runner
-	hardErrCh chan error
-
 	// Simple counter used only for trace logging batches
 	batchID int

@ -411,8 +408,6 @@ func (s *Server) run(ctx context.Context) {
 		select {
 		case <-ctx.Done():
 			return
-		case err := <-s.hardErrCh:
-			panic(err)
 		default:
 			var err error
 			nextBatch, err := s.forwardBatch(previousBatch)
@ -663,9 +658,7 @@ func (s *Server) computeBatch(activeBatch batchState) {
 		// don't sample prompt processing
 		if len(seq.inputs) != 0 {
 			if !s.cache.enabled {
-				s.hardErrCh <- fmt.Errorf("caching disabled but unable to fit entire input in a batch")
-				s.mu.Unlock()
-				return
+				panic("caching disabled but unable to fit entire input in a batch")
 			}
 			continue
 		}
@ -720,8 +713,7 @@ func (s *Server) computeBatch(activeBatch batchState) {
 		logutil.Trace("computeBatch: vocab details", "batchID", activeBatch.id, "seqIdx", i, "len(logits)", len(outputs), "len(activeBatch.batch.Outputs)", activeBatch.batch.Outputs.Dim(0), "vocabSize", vocabSize, "iBatches", iBatches)
 		token, err := seq.sampler.Sample(outputs[iBatches[i]*vocabSize : (iBatches[i]+1)*vocabSize])
 		if err != nil {
-			s.hardErrCh <- fmt.Errorf("failed to sample token: %w", err)
-			return
+			panic("failed to sample token")
 		}

 		nextBatchTokens[i].Token = token
@ -738,8 +730,7 @@ func (s *Server) computeBatch(activeBatch batchState) {

 		piece, err := s.model.(model.TextProcessor).Decode([]int32{token})
 		if err != nil {
-			s.hardErrCh <- fmt.Errorf("failed to decode token: %w", err)
-			return
+			panic("failed to decode token")
 		}

 		seq.pendingResponses = append(seq.pendingResponses, piece)
@ -1321,7 +1312,6 @@ func Execute(args []string) error {
 	server := &Server{
 		modelPath: *mpath,
 		status:    llm.ServerStatusLaunched,
-		hardErrCh: make(chan error, 1),
 	}

 	server.cond = sync.NewCond(&server.mu)