From 530520b1c7c08eb404d739efe68ed51ef5ff969d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sybren=20A=2E=20St=C3=BCvel?= Date: Tue, 17 May 2022 14:48:50 +0200 Subject: [PATCH] Implement mass updating of tasks when `JobUpdate.refresh_tasks = true` Send & handle `JobUpdate.refresh_tasks = true` when many tasks are updated simultaneously. This applies to things like cancelling & requeueing an entire job. This partially rolls back 67bf77de13d99b1bc5d7344951068822c4fadd88, as it was too slow when 1000+ tasks were being updated all at once. --- internal/manager/persistence/jobs.go | 39 ++++ .../mocks/interfaces_mock.gen.go | 63 +++--- .../task_state_machine/task_state_machine.go | 107 +++++----- .../task_state_machine_test.go | 189 +++++++++--------- web/app/src/views/JobsView.vue | 12 +- 5 files changed, 229 insertions(+), 181 deletions(-) diff --git a/internal/manager/persistence/jobs.go b/internal/manager/persistence/jobs.go index 655156e5..8993ff4a 100644 --- a/internal/manager/persistence/jobs.go +++ b/internal/manager/persistence/jobs.go @@ -346,3 +346,42 @@ func (db *DB) FetchTasksOfJobInStatus(ctx context.Context, job *Job, taskStatuse return tasks, nil } + +// UpdateJobsTaskStatuses updates the status & activity of all tasks of `job`. +func (db *DB) UpdateJobsTaskStatuses(ctx context.Context, job *Job, + taskStatus api.TaskStatus, activity string) error { + + if taskStatus == "" { + return taskError(nil, "empty status not allowed") + } + + tx := db.gormDB.WithContext(ctx). + Model(Task{}). + Where("job_Id = ?", job.ID). + Updates(Task{Status: taskStatus, Activity: activity}) + + if tx.Error != nil { + return taskError(tx.Error, "updating status of all tasks of job %s", job.UUID) + } + return nil +} + +// UpdateJobsTaskStatusesConditional updates the status & activity of the tasks of `job`, +// limited to those tasks with status in `statusesToUpdate`. +func (db *DB) UpdateJobsTaskStatusesConditional(ctx context.Context, job *Job, + statusesToUpdate []api.TaskStatus, taskStatus api.TaskStatus, activity string) error { + + if taskStatus == "" { + return taskError(nil, "empty status not allowed") + } + + tx := db.gormDB.WithContext(ctx). + Model(Task{}). + Where("job_Id = ?", job.ID). + Where("status in ?", statusesToUpdate). + Updates(Task{Status: taskStatus, Activity: activity}) + if tx.Error != nil { + return taskError(tx.Error, "updating status of all tasks in status %v of job %s", statusesToUpdate, job.UUID) + } + return nil +} diff --git a/internal/manager/task_state_machine/mocks/interfaces_mock.gen.go b/internal/manager/task_state_machine/mocks/interfaces_mock.gen.go index 135989ec..f0b7f8a6 100644 --- a/internal/manager/task_state_machine/mocks/interfaces_mock.gen.go +++ b/internal/manager/task_state_machine/mocks/interfaces_mock.gen.go @@ -77,41 +77,6 @@ func (mr *MockPersistenceServiceMockRecorder) FetchJobsInStatus(arg0 interface{} return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FetchJobsInStatus", reflect.TypeOf((*MockPersistenceService)(nil).FetchJobsInStatus), varargs...) } -// FetchTasksOfJob mocks base method. -func (m *MockPersistenceService) FetchTasksOfJob(arg0 context.Context, arg1 *persistence.Job) ([]*persistence.Task, error) { - m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "FetchTasksOfJob", arg0, arg1) - ret0, _ := ret[0].([]*persistence.Task) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// FetchTasksOfJob indicates an expected call of FetchTasksOfJob. -func (mr *MockPersistenceServiceMockRecorder) FetchTasksOfJob(arg0, arg1 interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FetchTasksOfJob", reflect.TypeOf((*MockPersistenceService)(nil).FetchTasksOfJob), arg0, arg1) -} - -// FetchTasksOfJobInStatus mocks base method. -func (m *MockPersistenceService) FetchTasksOfJobInStatus(arg0 context.Context, arg1 *persistence.Job, arg2 ...api.TaskStatus) ([]*persistence.Task, error) { - m.ctrl.T.Helper() - varargs := []interface{}{arg0, arg1} - for _, a := range arg2 { - varargs = append(varargs, a) - } - ret := m.ctrl.Call(m, "FetchTasksOfJobInStatus", varargs...) - ret0, _ := ret[0].([]*persistence.Task) - ret1, _ := ret[1].(error) - return ret0, ret1 -} - -// FetchTasksOfJobInStatus indicates an expected call of FetchTasksOfJobInStatus. -func (mr *MockPersistenceServiceMockRecorder) FetchTasksOfJobInStatus(arg0, arg1 interface{}, arg2 ...interface{}) *gomock.Call { - mr.mock.ctrl.T.Helper() - varargs := append([]interface{}{arg0, arg1}, arg2...) - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FetchTasksOfJobInStatus", reflect.TypeOf((*MockPersistenceService)(nil).FetchTasksOfJobInStatus), varargs...) -} - // JobHasTasksInStatus mocks base method. func (m *MockPersistenceService) JobHasTasksInStatus(arg0 context.Context, arg1 *persistence.Job, arg2 api.TaskStatus) (bool, error) { m.ctrl.T.Helper() @@ -155,6 +120,34 @@ func (mr *MockPersistenceServiceMockRecorder) SaveTask(arg0, arg1 interface{}) * return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SaveTask", reflect.TypeOf((*MockPersistenceService)(nil).SaveTask), arg0, arg1) } +// UpdateJobsTaskStatuses mocks base method. +func (m *MockPersistenceService) UpdateJobsTaskStatuses(arg0 context.Context, arg1 *persistence.Job, arg2 api.TaskStatus, arg3 string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "UpdateJobsTaskStatuses", arg0, arg1, arg2, arg3) + ret0, _ := ret[0].(error) + return ret0 +} + +// UpdateJobsTaskStatuses indicates an expected call of UpdateJobsTaskStatuses. +func (mr *MockPersistenceServiceMockRecorder) UpdateJobsTaskStatuses(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdateJobsTaskStatuses", reflect.TypeOf((*MockPersistenceService)(nil).UpdateJobsTaskStatuses), arg0, arg1, arg2, arg3) +} + +// UpdateJobsTaskStatusesConditional mocks base method. +func (m *MockPersistenceService) UpdateJobsTaskStatusesConditional(arg0 context.Context, arg1 *persistence.Job, arg2 []api.TaskStatus, arg3 api.TaskStatus, arg4 string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "UpdateJobsTaskStatusesConditional", arg0, arg1, arg2, arg3, arg4) + ret0, _ := ret[0].(error) + return ret0 +} + +// UpdateJobsTaskStatusesConditional indicates an expected call of UpdateJobsTaskStatusesConditional. +func (mr *MockPersistenceServiceMockRecorder) UpdateJobsTaskStatusesConditional(arg0, arg1, arg2, arg3, arg4 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdateJobsTaskStatusesConditional", reflect.TypeOf((*MockPersistenceService)(nil).UpdateJobsTaskStatusesConditional), arg0, arg1, arg2, arg3, arg4) +} + // MockChangeBroadcaster is a mock of ChangeBroadcaster interface. type MockChangeBroadcaster struct { ctrl *gomock.Controller diff --git a/internal/manager/task_state_machine/task_state_machine.go b/internal/manager/task_state_machine/task_state_machine.go index 69f23753..ef5b7ef1 100644 --- a/internal/manager/task_state_machine/task_state_machine.go +++ b/internal/manager/task_state_machine/task_state_machine.go @@ -34,8 +34,14 @@ type PersistenceService interface { JobHasTasksInStatus(ctx context.Context, job *persistence.Job, taskStatus api.TaskStatus) (bool, error) CountTasksOfJobInStatus(ctx context.Context, job *persistence.Job, taskStatuses ...api.TaskStatus) (numInStatus, numTotal int, err error) - FetchTasksOfJob(ctx context.Context, job *persistence.Job) ([]*persistence.Task, error) - FetchTasksOfJobInStatus(ctx context.Context, job *persistence.Job, taskStatuses ...api.TaskStatus) ([]*persistence.Task, error) + // UpdateJobsTaskStatuses updates the status & activity of the tasks of `job`. + UpdateJobsTaskStatuses(ctx context.Context, job *persistence.Job, + taskStatus api.TaskStatus, activity string) error + + // UpdateJobsTaskStatusesConditional updates the status & activity of the tasks of `job`, + // limited to those tasks with status in `statusesToUpdate`. + UpdateJobsTaskStatusesConditional(ctx context.Context, job *persistence.Job, + statusesToUpdate []api.TaskStatus, taskStatus api.TaskStatus, activity string) error FetchJobsInStatus(ctx context.Context, jobStatuses ...api.JobStatus) ([]*persistence.Job, error) } @@ -346,7 +352,7 @@ func (sm *StateMachine) jobStatusSet(ctx context.Context, } // Handle the status change. - newJobStatus, err = sm.updateTasksAfterJobStatusChange(ctx, logger, job, oldJobStatus) + result, err := sm.updateTasksAfterJobStatusChange(ctx, logger, job, oldJobStatus) if err != nil { return "", fmt.Errorf("updating job's tasks after job status change: %w", err) } @@ -354,9 +360,21 @@ func (sm *StateMachine) jobStatusSet(ctx context.Context, // Broadcast this change to the SocketIO clients. jobUpdate := webupdates.NewJobUpdate(job) jobUpdate.PreviousStatus = &oldJobStatus + jobUpdate.RefreshTasks = result.massTaskUpdate sm.broadcaster.BroadcastJobUpdate(jobUpdate) - return newJobStatus, nil + return result.followingJobStatus, nil +} + +// tasksUpdateResult is returned by `updateTasksAfterJobStatusChange`. +type tasksUpdateResult struct { + // FollowingJobStatus is set when the task updates should trigger another job status update. + followingJobStatus api.JobStatus + // massTaskUpdate is true when multiple/all tasks were updated simultaneously. + // This hasn't triggered individual task updates to SocketIO clients, and thus + // the resulting SocketIO job update should indicate all tasks must be + // reloaded. + massTaskUpdate bool } // updateTasksAfterJobStatusChange updates the status of its tasks based on the @@ -371,31 +389,43 @@ func (sm *StateMachine) updateTasksAfterJobStatusChange( logger zerolog.Logger, job *persistence.Job, oldJobStatus api.JobStatus, -) (api.JobStatus, error) { +) (tasksUpdateResult, error) { // Every case in this switch MUST return, for sanity sake. switch job.Status { case api.JobStatusCompleted, api.JobStatusCanceled: // Nothing to do; this will happen as a response to all tasks receiving this status. - return "", nil + return tasksUpdateResult{}, nil case api.JobStatusActive: // Nothing to do; this happens when a task gets started, which has nothing to // do with other tasks in the job. - return "", nil + return tasksUpdateResult{}, nil case api.JobStatusCancelRequested, api.JobStatusFailed: - return sm.cancelTasks(ctx, logger, job) + jobStatus, err := sm.cancelTasks(ctx, logger, job) + return tasksUpdateResult{ + followingJobStatus: jobStatus, + massTaskUpdate: true, + }, err case api.JobStatusRequeued: - return sm.requeueTasks(ctx, logger, job, oldJobStatus) + jobStatus, err := sm.requeueTasks(ctx, logger, job, oldJobStatus) + return tasksUpdateResult{ + followingJobStatus: jobStatus, + massTaskUpdate: true, + }, err case api.JobStatusQueued: - return sm.checkTaskCompletion(ctx, logger, job) + jobStatus, err := sm.checkTaskCompletion(ctx, logger, job) + return tasksUpdateResult{ + followingJobStatus: jobStatus, + massTaskUpdate: true, + }, err default: logger.Warn().Msg("unknown job status change, ignoring") - return "", nil + return tasksUpdateResult{}, nil } } @@ -408,16 +438,15 @@ func (sm *StateMachine) cancelTasks( logger.Info().Msg("cancelling tasks of job") // Any task that is running or might run in the future should get cancelled. - tasks, err := sm.persist.FetchTasksOfJobInStatus(ctx, job, + taskStatusesToCancel := []api.TaskStatus{ api.TaskStatusActive, api.TaskStatusQueued, api.TaskStatusSoftFailed, - ) - if err != nil { - return "", err } - activity := fmt.Sprintf("Manager cancelled this task because the job got status %q.", job.Status) - err = sm.massUpdateTaskStatus(ctx, tasks, api.TaskStatusCanceled, activity) + err := sm.persist.UpdateJobsTaskStatusesConditional( + ctx, job, taskStatusesToCancel, api.TaskStatusCanceled, + fmt.Sprintf("Manager cancelled this task because the job got status %q.", job.Status), + ) if err != nil { return "", fmt.Errorf("cancelling tasks of job %s: %w", job.UUID, err) } @@ -447,7 +476,6 @@ func (sm *StateMachine) requeueTasks( } var err error - var tasks []*persistence.Task switch oldJobStatus { case api.JobStatusUnderConstruction: @@ -457,19 +485,22 @@ func (sm *StateMachine) requeueTasks( return "", nil case api.JobStatusCompleted: // Re-queue all tasks. - tasks, err = sm.persist.FetchTasksOfJob(ctx, job) + err = sm.persist.UpdateJobsTaskStatuses(ctx, job, api.TaskStatusQueued, + fmt.Sprintf("Queued because job transitioned status from %q to %q", oldJobStatus, job.Status)) default: + statusesToUpdate := []api.TaskStatus{ + api.TaskStatusCanceled, + api.TaskStatusFailed, + api.TaskStatusPaused, + api.TaskStatusSoftFailed, + } // Re-queue only the non-completed tasks. - tasks, err = sm.persist.FetchTasksOfJobInStatus(ctx, job, nonCompletedStatuses...) + err = sm.persist.UpdateJobsTaskStatusesConditional(ctx, job, + statusesToUpdate, api.TaskStatusQueued, + fmt.Sprintf("Queued because job transitioned status from %q to %q", oldJobStatus, job.Status)) } if err != nil { - return "", err - } - - activity := fmt.Sprintf("Queued because job transitioned status from %q to %q", oldJobStatus, job.Status) - err = sm.massUpdateTaskStatus(ctx, tasks, api.TaskStatusQueued, activity) - if err != nil { - return "", err + return "", fmt.Errorf("queueing tasks of job %s: %w", job.UUID, err) } // TODO: also reset the 'failed by workers' blacklist. @@ -478,28 +509,6 @@ func (sm *StateMachine) requeueTasks( return api.JobStatusQueued, nil } -// massUpdateTaskStatus updates the status of all the given tasks. -// NOTE: these task statuses do NOT affect the job status. -// Tasks that are passed in the `tasks` parameter but already have the given status will be skipped. -func (sm *StateMachine) massUpdateTaskStatus( - ctx context.Context, - tasks []*persistence.Task, - status api.TaskStatus, - activity string, -) error { - for _, task := range tasks { - if task.Status == status { - continue - } - task.Activity = activity - err := sm.taskStatusChangeOnly(ctx, task, status) - if err != nil { - return err - } - } - return nil -} - // checkTaskCompletion returns "completed" as next job status when all tasks of // the job are completed. // diff --git a/internal/manager/task_state_machine/task_state_machine_test.go b/internal/manager/task_state_machine/task_state_machine_test.go index bf4b4582..8cbf81e6 100644 --- a/internal/manager/task_state_machine/task_state_machine_test.go +++ b/internal/manager/task_state_machine/task_state_machine_test.go @@ -116,28 +116,25 @@ func TestTaskStatusChangeActiveToFailedFailJob(t *testing.T) { // T: active > failed (10% task1 failure) --> J: active > failed + cancellation of any runnable tasks. task1 := taskWithStatus(api.JobStatusActive, api.TaskStatusActive) - task2 := taskOfSameJob(task1, api.TaskStatusFailed) - task3 := taskOfSameJob(task2, api.TaskStatusSoftFailed) - remainingTasks := []*persistence.Task{task2, task3} - mocks.expectSaveTaskWithStatus(t, task1, api.TaskStatusFailed) + // The change to the failed task should be broadcast. mocks.expectBroadcastTaskChange(task1, api.TaskStatusActive, api.TaskStatusFailed) mocks.expectSaveJobWithStatus(t, task1.Job, api.JobStatusFailed) - mocks.expectBroadcastJobChange(task1.Job, api.JobStatusActive, api.JobStatusFailed) + // The resulting cancellation of the other tasks should be communicated as mass-task-update in the job update broadcast. + mocks.expectBroadcastJobChangeWithTaskRefresh(task1.Job, api.JobStatusActive, api.JobStatusFailed) mocks.persist.EXPECT().CountTasksOfJobInStatus(ctx, task1.Job, api.TaskStatusFailed).Return(10, 100, nil) // 10 out of 100 failed. // Expect failure of the job to trigger cancellation of remaining tasks. - mocks.persist.EXPECT().FetchTasksOfJobInStatus(ctx, task1.Job, + taskStatusesToCancel := []api.TaskStatus{ api.TaskStatusActive, api.TaskStatusQueued, api.TaskStatusSoftFailed, - ).Return(remainingTasks, nil) - mocks.expectSaveTaskWithStatus(t, task2, api.TaskStatusCanceled) - mocks.expectSaveTaskWithStatus(t, task3, api.TaskStatusCanceled) + } - mocks.expectBroadcastTaskChange(task2, api.TaskStatusFailed, api.TaskStatusCanceled) - mocks.expectBroadcastTaskChange(task3, api.TaskStatusSoftFailed, api.TaskStatusCanceled) + mocks.persist.EXPECT().UpdateJobsTaskStatusesConditional(ctx, task1.Job, taskStatusesToCancel, api.TaskStatusCanceled, + "Manager cancelled this task because the job got status \"failed\".", + ) assert.NoError(t, sm.TaskStatusChange(ctx, task1, api.TaskStatusFailed)) } @@ -147,30 +144,22 @@ func TestTaskStatusChangeRequeueOnCompletedJob(t *testing.T) { defer mockCtrl.Finish() // T: completed > queued --> J: completed > requeued > queued - task1 := taskWithStatus(api.JobStatusCompleted, api.TaskStatusCompleted) - task2 := taskOfSameJob(task1, api.TaskStatusCompleted) - task3 := taskOfSameJob(task2, api.TaskStatusCompleted) - allTaskIDs := []*persistence.Task{task1, task2, task3} - - mocks.expectSaveTaskWithStatus(t, task1, api.TaskStatusQueued) - mocks.expectBroadcastTaskChange(task1, api.TaskStatusCompleted, api.TaskStatusQueued) - mocks.expectSaveJobWithStatus(t, task1.Job, api.JobStatusRequeued) - mocks.expectBroadcastJobChange(task1.Job, api.JobStatusCompleted, api.JobStatusRequeued) - mocks.expectBroadcastJobChange(task1.Job, api.JobStatusRequeued, api.JobStatusQueued) + task := taskWithStatus(api.JobStatusCompleted, api.TaskStatusCompleted) + mocks.expectSaveTaskWithStatus(t, task, api.TaskStatusQueued) + mocks.expectBroadcastTaskChange(task, api.TaskStatusCompleted, api.TaskStatusQueued) + mocks.expectSaveJobWithStatus(t, task.Job, api.JobStatusRequeued) + mocks.expectBroadcastJobChangeWithTaskRefresh(task.Job, api.JobStatusCompleted, api.JobStatusRequeued) + mocks.expectBroadcastJobChangeWithTaskRefresh(task.Job, api.JobStatusRequeued, api.JobStatusQueued) // Expect queueing of the job to trigger queueing of all its tasks, if those tasks were all completed before. // 2 out of 3 completed, because one was just queued. - mocks.persist.EXPECT().CountTasksOfJobInStatus(ctx, task1.Job, api.TaskStatusCompleted).Return(2, 3, nil) - fetchCall := mocks.persist.EXPECT().FetchTasksOfJob(ctx, task1.Job).Return(allTaskIDs, nil) - mocks.expectSaveTaskWithStatus(t, task2, api.TaskStatusQueued).After(fetchCall) - mocks.expectSaveTaskWithStatus(t, task3, api.TaskStatusQueued).After(fetchCall) + mocks.persist.EXPECT().CountTasksOfJobInStatus(ctx, task.Job, api.TaskStatusCompleted).Return(2, 3, nil) + mocks.persist.EXPECT().UpdateJobsTaskStatuses(ctx, task.Job, api.TaskStatusQueued, + "Queued because job transitioned status from \"completed\" to \"requeued\"", + ) + mocks.expectSaveJobWithStatus(t, task.Job, api.JobStatusQueued) - mocks.expectBroadcastTaskChange(task2, api.TaskStatusCompleted, api.TaskStatusQueued) - mocks.expectBroadcastTaskChange(task3, api.TaskStatusCompleted, api.TaskStatusQueued) - - mocks.expectSaveJobWithStatus(t, task1.Job, api.JobStatusQueued) - - assert.NoError(t, sm.TaskStatusChange(ctx, task1, api.TaskStatusQueued)) + assert.NoError(t, sm.TaskStatusChange(ctx, task, api.TaskStatusQueued)) } func TestTaskStatusChangeCancelSingleTask(t *testing.T) { @@ -241,31 +230,31 @@ func TestJobRequeueWithSomeCompletedTasks(t *testing.T) { defer mockCtrl.Finish() task1 := taskWithStatus(api.JobStatusActive, api.TaskStatusCompleted) - task2 := taskOfSameJob(task1, api.TaskStatusFailed) - task3 := taskOfSameJob(task2, api.TaskStatusSoftFailed) - notCompleteTasks := []*persistence.Task{task2, task3} + // These are not necessary to create for this test, but just imagine these tasks are there too. + // This is mimicked by returning (1, 3, nil) when counting the tasks (1 of 3 completed). + // task2 := taskOfSameJob(task1, api.TaskStatusFailed) + // task3 := taskOfSameJob(task2, api.TaskStatusSoftFailed) job := task1.Job mocks.expectSaveJobWithStatus(t, job, api.JobStatusRequeued) // Expect queueing of the job to trigger queueing of all its not-yet-completed tasks. mocks.persist.EXPECT().CountTasksOfJobInStatus(ctx, job, api.TaskStatusCompleted).Return(1, 3, nil) - mocks.persist.EXPECT().FetchTasksOfJobInStatus(ctx, job, - api.TaskStatusCanceled, - api.TaskStatusFailed, - api.TaskStatusPaused, - api.TaskStatusSoftFailed, - ).Return(notCompleteTasks, nil) + mocks.persist.EXPECT().UpdateJobsTaskStatusesConditional(ctx, job, + []api.TaskStatus{ + api.TaskStatusCanceled, + api.TaskStatusFailed, + api.TaskStatusPaused, + api.TaskStatusSoftFailed, + }, + api.TaskStatusQueued, + "Queued because job transitioned status from \"active\" to \"requeued\"", + ) - mocks.expectSaveTaskWithStatus(t, task2, api.TaskStatusQueued) - mocks.expectSaveTaskWithStatus(t, task3, api.TaskStatusQueued) mocks.expectSaveJobWithStatus(t, job, api.JobStatusQueued) - mocks.expectBroadcastJobChange(job, api.JobStatusActive, api.JobStatusRequeued) - mocks.expectBroadcastJobChange(job, api.JobStatusRequeued, api.JobStatusQueued) - - mocks.expectBroadcastTaskChange(task2, api.TaskStatusFailed, api.TaskStatusQueued) - mocks.expectBroadcastTaskChange(task3, api.TaskStatusSoftFailed, api.TaskStatusQueued) + mocks.expectBroadcastJobChangeWithTaskRefresh(job, api.JobStatusActive, api.JobStatusRequeued) + mocks.expectBroadcastJobChangeWithTaskRefresh(job, api.JobStatusRequeued, api.JobStatusQueued) assert.NoError(t, sm.JobStatusChange(ctx, job, api.JobStatusRequeued, "someone wrote a unittest")) } @@ -275,35 +264,29 @@ func TestJobRequeueWithAllCompletedTasks(t *testing.T) { defer mockCtrl.Finish() task1 := taskWithStatus(api.JobStatusCompleted, api.TaskStatusCompleted) - task2 := taskOfSameJob(task1, api.TaskStatusCompleted) - task3 := taskOfSameJob(task2, api.TaskStatusCompleted) - allTasks := []*persistence.Task{task1, task2, task3} + // These are not necessary to create for this test, but just imagine these tasks are there too. + // This is mimicked by returning (3, 3, nil) when counting the tasks (3 of 3 completed). + // task2 := taskOfSameJob(task1, api.TaskStatusCompleted) + // task3 := taskOfSameJob(task2, api.TaskStatusCompleted) job := task1.Job call1 := mocks.expectSaveJobWithStatus(t, job, api.JobStatusRequeued) // Expect queueing of the job to trigger queueing of all its not-yet-completed tasks. - fetchCall := mocks.persist.EXPECT().FetchTasksOfJob(ctx, job). - Return(allTasks, nil). + updateCall := mocks.persist.EXPECT(). + UpdateJobsTaskStatuses(ctx, job, api.TaskStatusQueued, + "Queued because job transitioned status from \"completed\" to \"requeued\""). After(call1) - mocks.expectSaveTaskWithStatus(t, task1, api.TaskStatusQueued).After(fetchCall) - mocks.expectSaveTaskWithStatus(t, task2, api.TaskStatusQueued).After(fetchCall) - mocks.expectSaveTaskWithStatus(t, task3, api.TaskStatusQueued).After(fetchCall) - - saveJobCall := mocks.expectSaveJobWithStatus(t, job, api.JobStatusQueued).After(fetchCall) + saveJobCall := mocks.expectSaveJobWithStatus(t, job, api.JobStatusQueued).After(updateCall) mocks.persist.EXPECT(). CountTasksOfJobInStatus(ctx, job, api.TaskStatusCompleted). Return(0, 3, nil). // By now all tasks are queued. After(saveJobCall) - mocks.expectBroadcastJobChange(job, api.JobStatusCompleted, api.JobStatusRequeued) - mocks.expectBroadcastJobChange(job, api.JobStatusRequeued, api.JobStatusQueued) - - mocks.expectBroadcastTaskChange(task1, api.TaskStatusCompleted, api.TaskStatusQueued) - mocks.expectBroadcastTaskChange(task2, api.TaskStatusCompleted, api.TaskStatusQueued) - mocks.expectBroadcastTaskChange(task3, api.TaskStatusCompleted, api.TaskStatusQueued) + mocks.expectBroadcastJobChangeWithTaskRefresh(job, api.JobStatusCompleted, api.JobStatusRequeued) + mocks.expectBroadcastJobChangeWithTaskRefresh(job, api.JobStatusRequeued, api.JobStatusQueued) assert.NoError(t, sm.JobStatusChange(ctx, job, api.JobStatusRequeued, "someone wrote a unit test")) } @@ -313,29 +296,29 @@ func TestJobCancelWithSomeCompletedTasks(t *testing.T) { defer mockCtrl.Finish() task1 := taskWithStatus(api.JobStatusActive, api.TaskStatusCompleted) - task2 := taskOfSameJob(task1, api.TaskStatusFailed) - task3 := taskOfSameJob(task2, api.TaskStatusSoftFailed) + // task2 := taskOfSameJob(task1, api.TaskStatusFailed) + // task3 := taskOfSameJob(task2, api.TaskStatusSoftFailed) job := task1.Job - potentialRunTasks := []*persistence.Task{task2, task3} mocks.expectSaveJobWithStatus(t, job, api.JobStatusCancelRequested) // Expect cancelling of the job to trigger cancelling of all its could-potentially-still-run tasks. - fetchCall := mocks.persist.EXPECT().FetchTasksOfJobInStatus(ctx, job, - api.TaskStatusActive, - api.TaskStatusQueued, - api.TaskStatusSoftFailed, - ).Return(potentialRunTasks, nil) - mocks.expectSaveTaskWithStatus(t, task2, api.TaskStatusCanceled).After(fetchCall) - mocks.expectSaveTaskWithStatus(t, task3, api.TaskStatusCanceled).After(fetchCall) - mocks.expectSaveJobWithStatus(t, job, api.JobStatusCanceled).After(fetchCall) + mocks.persist.EXPECT().UpdateJobsTaskStatusesConditional(ctx, job, + []api.TaskStatus{ + api.TaskStatusActive, + api.TaskStatusQueued, + // TODO: add api.TaskStatusPaused as well, as those should get cancelled too, + api.TaskStatusSoftFailed, + }, + api.TaskStatusCanceled, + "Manager cancelled this task because the job got status \"cancel-requested\".", + ) - mocks.expectBroadcastJobChange(job, api.JobStatusActive, api.JobStatusCancelRequested) + mocks.expectSaveJobWithStatus(t, job, api.JobStatusCanceled) + + mocks.expectBroadcastJobChangeWithTaskRefresh(job, api.JobStatusActive, api.JobStatusCancelRequested) mocks.expectBroadcastJobChange(job, api.JobStatusCancelRequested, api.JobStatusCanceled) - mocks.expectBroadcastTaskChange(task2, api.TaskStatusFailed, api.TaskStatusCanceled) - mocks.expectBroadcastTaskChange(task3, api.TaskStatusSoftFailed, api.TaskStatusCanceled) - assert.NoError(t, sm.JobStatusChange(ctx, job, api.JobStatusCancelRequested, "someone wrote a unittest")) } @@ -344,32 +327,32 @@ func TestCheckStuck(t *testing.T) { defer mockCtrl.Finish() task1 := taskWithStatus(api.JobStatusActive, api.TaskStatusCompleted) - task2 := taskOfSameJob(task1, api.TaskStatusFailed) - task3 := taskOfSameJob(task2, api.TaskStatusSoftFailed) + // task2 := taskOfSameJob(task1, api.TaskStatusFailed) + // task3 := taskOfSameJob(task2, api.TaskStatusSoftFailed) job := task1.Job job.Status = api.JobStatusRequeued mocks.persist.EXPECT().FetchJobsInStatus(ctx, api.JobStatusCancelRequested, api.JobStatusRequeued). Return([]*persistence.Job{job}, nil) - mocks.persist.EXPECT().CountTasksOfJobInStatus(ctx, job, api.TaskStatusCompleted).Return(2, 3, nil) - mocks.persist.EXPECT().FetchTasksOfJobInStatus(ctx, job, - api.TaskStatusCanceled, - api.TaskStatusFailed, - api.TaskStatusPaused, - api.TaskStatusSoftFailed, - ). - Return([]*persistence.Task{task2, task3}, nil) + mocks.persist.EXPECT().CountTasksOfJobInStatus(ctx, job, api.TaskStatusCompleted).Return(1, 3, nil) + + mocks.persist.EXPECT().UpdateJobsTaskStatusesConditional(ctx, job, + []api.TaskStatus{ + api.TaskStatusCanceled, + api.TaskStatusFailed, + api.TaskStatusPaused, + api.TaskStatusSoftFailed, + }, + api.TaskStatusQueued, + fmt.Sprintf("Queued because job transitioned status from %q to %q", job.Status, job.Status), + ) // Expect Job -> Queued and non-completed tasks -> Queued. mocks.expectSaveJobWithStatus(t, job, api.JobStatusRequeued) // should be called once for the current status mocks.expectSaveJobWithStatus(t, job, api.JobStatusQueued) // and then with the new status - mocks.expectSaveTaskWithStatus(t, task2, api.TaskStatusQueued) - mocks.expectSaveTaskWithStatus(t, task3, api.TaskStatusQueued) - mocks.expectBroadcastJobChange(job, api.JobStatusRequeued, api.JobStatusRequeued) - mocks.expectBroadcastJobChange(job, api.JobStatusRequeued, api.JobStatusQueued) - mocks.expectBroadcastTaskChange(task2, api.TaskStatusFailed, api.TaskStatusQueued) - mocks.expectBroadcastTaskChange(task3, api.TaskStatusSoftFailed, api.TaskStatusQueued) + mocks.expectBroadcastJobChangeWithTaskRefresh(job, api.JobStatusRequeued, api.JobStatusRequeued) + mocks.expectBroadcastJobChangeWithTaskRefresh(job, api.JobStatusRequeued, api.JobStatusQueued) sm.CheckStuck(ctx) } @@ -416,9 +399,25 @@ func (m *StateMachineMocks) expectBroadcastJobChange( expectUpdate := api.JobUpdate{ Id: job.UUID, Name: &job.Name, - Updated: job.UpdatedAt, PreviousStatus: &fromStatus, + RefreshTasks: false, Status: toStatus, + Updated: job.UpdatedAt, + } + return m.broadcaster.EXPECT().BroadcastJobUpdate(expectUpdate) +} + +func (m *StateMachineMocks) expectBroadcastJobChangeWithTaskRefresh( + job *persistence.Job, + fromStatus, toStatus api.JobStatus, +) *gomock.Call { + expectUpdate := api.JobUpdate{ + Id: job.UUID, + Name: &job.Name, + PreviousStatus: &fromStatus, + RefreshTasks: true, + Status: toStatus, + Updated: job.UpdatedAt, } return m.broadcaster.EXPECT().BroadcastJobUpdate(expectUpdate) } diff --git a/web/app/src/views/JobsView.vue b/web/app/src/views/JobsView.vue index 6b05c261..b805474c 100644 --- a/web/app/src/views/JobsView.vue +++ b/web/app/src/views/JobsView.vue @@ -95,14 +95,22 @@ export default { // SocketIO data event handlers: onSioJobUpdate(jobUpdate) { + console.log("job update", jobUpdate); if (this.$refs.jobsTable) { if (jobUpdate.previous_status) this.$refs.jobsTable.processJobUpdate(jobUpdate); else this.$refs.jobsTable.processNewJob(jobUpdate); } - if (this.jobID == jobUpdate.id) - this._fetchJob(this.jobID); + if (this.jobID != jobUpdate.id) + return; + + this._fetchJob(this.jobID); + if (jobUpdate.refresh_tasks) { + if (this.$refs.tasksTable) + this.$refs.tasksTable.fetchTasks(); + this._fetchTask(this.taskID); + } }, /**