From 426b2aab4deb910e7627ea2d85809105dc8187f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sybren=20A=2E=20St=C3=BCvel?= Date: Thu, 9 Feb 2023 11:18:38 +0100 Subject: [PATCH] Gracefully handle sleep schedules of deleted workers Workers can be soft-deleted, which means that they stay in the database. As such, foreign key constraints `ON DELETE CASCADE` do not trigger, and thus their sleep schedule can still be active. This is now detected and handled gracefully. --- internal/manager/persistence/worker_sleep_schedule.go | 8 +++++++- .../manager/persistence/worker_sleep_schedule_test.go | 6 ++++++ internal/manager/sleep_scheduler/sleep_scheduler.go | 9 +++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/internal/manager/persistence/worker_sleep_schedule.go b/internal/manager/persistence/worker_sleep_schedule.go index a68a0707..4fa40122 100644 --- a/internal/manager/persistence/worker_sleep_schedule.go +++ b/internal/manager/persistence/worker_sleep_schedule.go @@ -92,10 +92,16 @@ func (db *DB) SetWorkerSleepScheduleNextCheck(ctx context.Context, schedule *Sle // FetchSleepScheduleWorker sets the given schedule's `Worker` pointer. func (db *DB) FetchSleepScheduleWorker(ctx context.Context, schedule *SleepSchedule) error { var worker Worker - tx := db.gormDB.WithContext(ctx).First(&worker, schedule.WorkerID) + tx := db.gormDB.WithContext(ctx).Limit(1).Find(&worker, schedule.WorkerID) if tx.Error != nil { return workerError(tx.Error, "finding worker by their sleep schedule") } + if worker.ID == 0 { + // Worker was not found. It could be that the worker was soft-deleted, which + // keeps the schedule around in the database. + schedule.Worker = nil + return ErrWorkerNotFound + } schedule.Worker = &worker return nil } diff --git a/internal/manager/persistence/worker_sleep_schedule_test.go b/internal/manager/persistence/worker_sleep_schedule_test.go index 4a015681..bcfc70b0 100644 --- a/internal/manager/persistence/worker_sleep_schedule_test.go +++ b/internal/manager/persistence/worker_sleep_schedule_test.go @@ -9,6 +9,7 @@ import ( "git.blender.org/flamenco/internal/uuid" "git.blender.org/flamenco/pkg/api" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestFetchWorkerSleepSchedule(t *testing.T) { @@ -103,6 +104,11 @@ func TestFetchSleepScheduleWorker(t *testing.T) { assert.Equal(t, linuxWorker.ID, dbSchedule.Worker.ID) assert.Equal(t, linuxWorker.UUID, dbSchedule.Worker.UUID) } + + // Deleting the Worker should result in a specific error when fetching the schedule again. + require.NoError(t, db.DeleteWorker(ctx, linuxWorker.UUID)) + assert.ErrorIs(t, db.FetchSleepScheduleWorker(ctx, dbSchedule), ErrWorkerNotFound) + assert.Nil(t, dbSchedule.Worker) } func TestSetWorkerSleepSchedule(t *testing.T) { diff --git a/internal/manager/sleep_scheduler/sleep_scheduler.go b/internal/manager/sleep_scheduler/sleep_scheduler.go index efd3228d..58223564 100644 --- a/internal/manager/sleep_scheduler/sleep_scheduler.go +++ b/internal/manager/sleep_scheduler/sleep_scheduler.go @@ -23,6 +23,11 @@ var skipWorkersInStatus = map[api.WorkerStatus]bool{ api.WorkerStatusError: true, } +// ErrWorkerNotFound is returned when the owning Worker of a sleep schedule cannot be found. +// This can happen when a Worker has been soft-deleted, which doesn't +// automatically trigger the deletion of the foreign key constraints. +var ErrWorkerNotFound = errors.New("worker not found") + // SleepScheduler manages wake/sleep cycles of Workers. type SleepScheduler struct { clock clock.Clock @@ -219,6 +224,10 @@ func (ss *SleepScheduler) checkSchedule(ctx context.Context, schedule *persisten switch { case errors.Is(ctx.Err(), context.Canceled): // Manager is shutting down, this is fine. + case errors.Is(err, ErrWorkerNotFound): + // This schedule's worker cannot be found. That's fine, it could have been + // soft-deleted (and thus foreign key constraints don't trigger deletion of + // the sleep schedule). case err != nil: log.Error(). Err(err).