Sybren A. Stüvel 02fac6a4df Change Go package name from git.blender.org to projects.blender.org
Change the package base name of the Go code, from
`git.blender.org/flamenco` to `projects.blender.org/studio/flamenco`.

The old location, `git.blender.org`, has no longer been use since the
[migration to Gitea][1]. The new package names now reflect the actual
location where Flamenco is hosted.

[1]: https://code.blender.org/2023/02/new-blender-development-infrastructure/
2023-08-01 12:42:31 +02:00

72 lines
2.1 KiB
Go

package timeout_checker
// SPDX-License-Identifier: GPL-3.0-or-later
import (
"context"
"github.com/rs/zerolog/log"
"projects.blender.org/studio/flamenco/internal/manager/persistence"
"projects.blender.org/studio/flamenco/pkg/api"
)
func (ttc *TimeoutChecker) checkWorkers(ctx context.Context) {
timeoutThreshold := ttc.clock.Now().UTC().Add(-ttc.workerTimeout)
logger := log.With().
Time("threshold", timeoutThreshold.Local()).
Logger()
logger.Trace().Msg("TimeoutChecker: finding all awake workers that have not been seen since threshold")
workers, err := ttc.persist.FetchTimedOutWorkers(ctx, timeoutThreshold)
if err != nil {
log.Error().Err(err).Msg("TimeoutChecker: error fetching timed-out workers from database")
return
}
if len(workers) == 0 {
logger.Trace().Msg("TimeoutChecker: no timed-out workers")
return
}
logger.Debug().
Int("numWorkers", len(workers)).
Msg("TimeoutChecker: failing all awake workers that have not been seen since threshold")
for _, worker := range workers {
ttc.timeoutWorker(ctx, worker)
}
}
// timeoutTask marks a task as 'failed' due to a timeout.
func (ttc *TimeoutChecker) timeoutWorker(ctx context.Context, worker *persistence.Worker) {
logger := log.With().
Str("worker", worker.UUID).
Str("name", worker.Name).
Str("lastSeenAt", worker.LastSeenAt.String()).
Logger()
logger.Warn().Msg("TimeoutChecker: worker timed out")
prevStatus := worker.Status
worker.Status = api.WorkerStatusError
worker.StatusChangeClear()
err := ttc.persist.SaveWorker(ctx, worker)
if err != nil {
logger.Error().Err(err).Msg("TimeoutChecker: error saving timed-out worker to database")
}
err = ttc.taskStateMachine.RequeueActiveTasksOfWorker(ctx, worker, "worker timed out")
if err != nil {
logger.Error().Err(err).Msg("TimeoutChecker: error re-queueing tasks of timed-out worker")
}
// Broadcast worker change via SocketIO
ttc.broadcaster.BroadcastWorkerUpdate(api.SocketIOWorkerUpdate{
Id: worker.UUID,
Name: worker.Name,
PreviousStatus: &prevStatus,
Status: api.WorkerStatusError,
Updated: worker.UpdatedAt,
Version: worker.Software,
})
}