Gracefully handle sleep schedules of deleted workers

Workers can be soft-deleted, which means that they stay in the database.
As such, foreign key constraints `ON DELETE CASCADE` do not trigger, and
thus their sleep schedule can still be active. This is now detected and
handled gracefully.
This commit is contained in:
Sybren A. Stüvel 2023-02-09 11:18:38 +01:00
parent 55bfc8db88
commit 426b2aab4d
3 changed files with 22 additions and 1 deletions

View File

@ -92,10 +92,16 @@ func (db *DB) SetWorkerSleepScheduleNextCheck(ctx context.Context, schedule *Sle
// FetchSleepScheduleWorker sets the given schedule's `Worker` pointer. // FetchSleepScheduleWorker sets the given schedule's `Worker` pointer.
func (db *DB) FetchSleepScheduleWorker(ctx context.Context, schedule *SleepSchedule) error { func (db *DB) FetchSleepScheduleWorker(ctx context.Context, schedule *SleepSchedule) error {
var worker Worker var worker Worker
tx := db.gormDB.WithContext(ctx).First(&worker, schedule.WorkerID) tx := db.gormDB.WithContext(ctx).Limit(1).Find(&worker, schedule.WorkerID)
if tx.Error != nil { if tx.Error != nil {
return workerError(tx.Error, "finding worker by their sleep schedule") return workerError(tx.Error, "finding worker by their sleep schedule")
} }
if worker.ID == 0 {
// Worker was not found. It could be that the worker was soft-deleted, which
// keeps the schedule around in the database.
schedule.Worker = nil
return ErrWorkerNotFound
}
schedule.Worker = &worker schedule.Worker = &worker
return nil return nil
} }

View File

@ -9,6 +9,7 @@ import (
"git.blender.org/flamenco/internal/uuid" "git.blender.org/flamenco/internal/uuid"
"git.blender.org/flamenco/pkg/api" "git.blender.org/flamenco/pkg/api"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
) )
func TestFetchWorkerSleepSchedule(t *testing.T) { func TestFetchWorkerSleepSchedule(t *testing.T) {
@ -103,6 +104,11 @@ func TestFetchSleepScheduleWorker(t *testing.T) {
assert.Equal(t, linuxWorker.ID, dbSchedule.Worker.ID) assert.Equal(t, linuxWorker.ID, dbSchedule.Worker.ID)
assert.Equal(t, linuxWorker.UUID, dbSchedule.Worker.UUID) assert.Equal(t, linuxWorker.UUID, dbSchedule.Worker.UUID)
} }
// Deleting the Worker should result in a specific error when fetching the schedule again.
require.NoError(t, db.DeleteWorker(ctx, linuxWorker.UUID))
assert.ErrorIs(t, db.FetchSleepScheduleWorker(ctx, dbSchedule), ErrWorkerNotFound)
assert.Nil(t, dbSchedule.Worker)
} }
func TestSetWorkerSleepSchedule(t *testing.T) { func TestSetWorkerSleepSchedule(t *testing.T) {

View File

@ -23,6 +23,11 @@ var skipWorkersInStatus = map[api.WorkerStatus]bool{
api.WorkerStatusError: true, api.WorkerStatusError: true,
} }
// ErrWorkerNotFound is returned when the owning Worker of a sleep schedule cannot be found.
// This can happen when a Worker has been soft-deleted, which doesn't
// automatically trigger the deletion of the foreign key constraints.
var ErrWorkerNotFound = errors.New("worker not found")
// SleepScheduler manages wake/sleep cycles of Workers. // SleepScheduler manages wake/sleep cycles of Workers.
type SleepScheduler struct { type SleepScheduler struct {
clock clock.Clock clock clock.Clock
@ -219,6 +224,10 @@ func (ss *SleepScheduler) checkSchedule(ctx context.Context, schedule *persisten
switch { switch {
case errors.Is(ctx.Err(), context.Canceled): case errors.Is(ctx.Err(), context.Canceled):
// Manager is shutting down, this is fine. // Manager is shutting down, this is fine.
case errors.Is(err, ErrWorkerNotFound):
// This schedule's worker cannot be found. That's fine, it could have been
// soft-deleted (and thus foreign key constraints don't trigger deletion of
// the sleep schedule).
case err != nil: case err != nil:
log.Error(). log.Error().
Err(err). Err(err).