From 8764f8f7c1435bc3e724bf25dc76eb6b72eeef1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sybren=20A=2E=20St=C3=BCvel?= Date: Thu, 16 Jun 2022 16:02:28 +0200 Subject: [PATCH] Manager: task scheduler, don't schedule tasks the worker failed before When a worker asks for a task to perform, don't give it a task that it failed before. --- .../manager/persistence/task_scheduler.go | 2 ++ .../persistence/task_scheduler_test.go | 30 +++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/internal/manager/persistence/task_scheduler.go b/internal/manager/persistence/task_scheduler.go index 2669cada..900738b1 100644 --- a/internal/manager/persistence/task_scheduler.go +++ b/internal/manager/persistence/task_scheduler.go @@ -118,10 +118,12 @@ func findTaskForWorker(tx *gorm.DB, w *Worker) (*Task, error) { findTaskResult := tx. Model(&task). Joins("left join jobs on tasks.job_id = jobs.id"). + Joins("left join task_failures TF on tasks.id = TF.task_id and TF.worker_id=?", w.ID). Where("tasks.status in ?", schedulableTaskStatuses). // Schedulable task statuses Where("jobs.status in ?", schedulableJobStatuses). // Schedulable job statuses Where("tasks.type in ?", w.TaskTypes()). // Supported task types Where("tasks.id not in (?)", incompleteDepsQuery). // Dependencies completed + Where("TF.worker_id is NULL"). // Not failed before // TODO: Non-blocklisted Order("jobs.priority desc"). // Highest job priority Order("tasks.priority desc"). // Highest task priority diff --git a/internal/manager/persistence/task_scheduler_test.go b/internal/manager/persistence/task_scheduler_test.go index dcd24a11..5732a495 100644 --- a/internal/manager/persistence/task_scheduler_test.go +++ b/internal/manager/persistence/task_scheduler_test.go @@ -259,6 +259,36 @@ func TestAssignedToOtherWorker(t *testing.T) { assert.Equal(t, *task.WorkerID, w.ID, "the task should now be assigned to the worker it was scheduled for") } +func TestPreviouslyFailed(t *testing.T) { + ctx, cancel, db := persistenceTestFixtures(t, schedulerTestTimeout) + defer cancel() + + w := linuxWorker(t, db) + + att1 := authorTestTask("1 failed task", "blender") + att2 := authorTestTask("2 expected task", "blender") + atj := authorTestJob( + "1295757b-e668-4c49-8b89-f73db8270e42", + "simple-blender-render", + att1, att2) + job := constructTestJob(ctx, t, db, atj) + + // Mimick that this worker already failed the first task. + tasks, err := db.FetchTasksOfJob(ctx, job) + assert.NoError(t, err) + numFailed, err := db.AddWorkerToTaskFailedList(ctx, tasks[0], &w) + assert.NoError(t, err) + assert.Equal(t, 1, numFailed) + + // This should assign the 2nd task. + task, err := db.ScheduleTask(ctx, &w) + assert.NoError(t, err) + if task == nil { + t.Fatal("task is nil") + } + assert.Equal(t, att2.Name, task.Name, "the second task should have been chosen") +} + // To test: blocklists // To test: variable replacement