Gracefully handle sleep schedules of deleted workers
Workers can be soft-deleted, which means that they stay in the database. As such, foreign key constraints `ON DELETE CASCADE` do not trigger, and thus their sleep schedule can still be active. This is now detected and handled gracefully.
This commit is contained in:
parent
55bfc8db88
commit
426b2aab4d
@ -92,10 +92,16 @@ func (db *DB) SetWorkerSleepScheduleNextCheck(ctx context.Context, schedule *Sle
|
||||
// FetchSleepScheduleWorker sets the given schedule's `Worker` pointer.
|
||||
func (db *DB) FetchSleepScheduleWorker(ctx context.Context, schedule *SleepSchedule) error {
|
||||
var worker Worker
|
||||
tx := db.gormDB.WithContext(ctx).First(&worker, schedule.WorkerID)
|
||||
tx := db.gormDB.WithContext(ctx).Limit(1).Find(&worker, schedule.WorkerID)
|
||||
if tx.Error != nil {
|
||||
return workerError(tx.Error, "finding worker by their sleep schedule")
|
||||
}
|
||||
if worker.ID == 0 {
|
||||
// Worker was not found. It could be that the worker was soft-deleted, which
|
||||
// keeps the schedule around in the database.
|
||||
schedule.Worker = nil
|
||||
return ErrWorkerNotFound
|
||||
}
|
||||
schedule.Worker = &worker
|
||||
return nil
|
||||
}
|
||||
|
@ -9,6 +9,7 @@ import (
|
||||
"git.blender.org/flamenco/internal/uuid"
|
||||
"git.blender.org/flamenco/pkg/api"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestFetchWorkerSleepSchedule(t *testing.T) {
|
||||
@ -103,6 +104,11 @@ func TestFetchSleepScheduleWorker(t *testing.T) {
|
||||
assert.Equal(t, linuxWorker.ID, dbSchedule.Worker.ID)
|
||||
assert.Equal(t, linuxWorker.UUID, dbSchedule.Worker.UUID)
|
||||
}
|
||||
|
||||
// Deleting the Worker should result in a specific error when fetching the schedule again.
|
||||
require.NoError(t, db.DeleteWorker(ctx, linuxWorker.UUID))
|
||||
assert.ErrorIs(t, db.FetchSleepScheduleWorker(ctx, dbSchedule), ErrWorkerNotFound)
|
||||
assert.Nil(t, dbSchedule.Worker)
|
||||
}
|
||||
|
||||
func TestSetWorkerSleepSchedule(t *testing.T) {
|
||||
|
@ -23,6 +23,11 @@ var skipWorkersInStatus = map[api.WorkerStatus]bool{
|
||||
api.WorkerStatusError: true,
|
||||
}
|
||||
|
||||
// ErrWorkerNotFound is returned when the owning Worker of a sleep schedule cannot be found.
|
||||
// This can happen when a Worker has been soft-deleted, which doesn't
|
||||
// automatically trigger the deletion of the foreign key constraints.
|
||||
var ErrWorkerNotFound = errors.New("worker not found")
|
||||
|
||||
// SleepScheduler manages wake/sleep cycles of Workers.
|
||||
type SleepScheduler struct {
|
||||
clock clock.Clock
|
||||
@ -219,6 +224,10 @@ func (ss *SleepScheduler) checkSchedule(ctx context.Context, schedule *persisten
|
||||
switch {
|
||||
case errors.Is(ctx.Err(), context.Canceled):
|
||||
// Manager is shutting down, this is fine.
|
||||
case errors.Is(err, ErrWorkerNotFound):
|
||||
// This schedule's worker cannot be found. That's fine, it could have been
|
||||
// soft-deleted (and thus foreign key constraints don't trigger deletion of
|
||||
// the sleep schedule).
|
||||
case err != nil:
|
||||
log.Error().
|
||||
Err(err).
|
||||
|
Loading…
x
Reference in New Issue
Block a user