
The canary test asserts that certain constants still have the expected value. Lowering those constants is good for testing the timeout stuff with the actual Flamenco Manager + Worker (without having to wait 5 minutes for it to kick in), but it's too easy to accidentally run the unit tests and get cryptic errors about everything failing horribly and miserably when you leave those constants low.
143 lines
5.1 KiB
Go
143 lines
5.1 KiB
Go
package timeout_checker
|
|
|
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/golang/mock/gomock"
|
|
"github.com/stretchr/testify/assert"
|
|
"gorm.io/gorm"
|
|
|
|
"git.blender.org/flamenco/internal/manager/persistence"
|
|
"git.blender.org/flamenco/pkg/api"
|
|
)
|
|
|
|
const taskTimeout = 20 * time.Minute
|
|
|
|
func TestTimeoutCheckerTiming(t *testing.T) {
|
|
canaryTest(t)
|
|
|
|
ttc, finish, mocks := timeoutCheckerTestFixtures(t)
|
|
defer finish()
|
|
|
|
mocks.run(ttc)
|
|
|
|
// Wait for the timeout checker to actually be sleeping, otherwise it could
|
|
// have a different sleep-start time than we expect.
|
|
time.Sleep(1 * time.Millisecond)
|
|
|
|
// Determine the deadlines relative to the initial clock value.
|
|
initialTime := mocks.clock.Now().UTC()
|
|
deadlines := []time.Time{
|
|
initialTime.Add(timeoutInitialSleep - taskTimeout),
|
|
initialTime.Add(timeoutInitialSleep - taskTimeout + 1*timeoutCheckInterval),
|
|
initialTime.Add(timeoutInitialSleep - taskTimeout + 2*timeoutCheckInterval),
|
|
}
|
|
|
|
mocks.persist.EXPECT().FetchTimedOutWorkers(mocks.ctx, gomock.Any()).AnyTimes().Return(nil, nil)
|
|
|
|
// Expect three fetches, one after the initial sleep time, and two a regular interval later.
|
|
fetchTimes := make([]time.Time, len(deadlines))
|
|
firstCall := mocks.persist.EXPECT().FetchTimedOutTasks(mocks.ctx, deadlines[0]).
|
|
DoAndReturn(func(ctx context.Context, timeout time.Time) ([]*persistence.Task, error) {
|
|
fetchTimes[0] = mocks.clock.Now().UTC()
|
|
return []*persistence.Task{}, nil
|
|
})
|
|
|
|
secondCall := mocks.persist.EXPECT().FetchTimedOutTasks(mocks.ctx, deadlines[1]).
|
|
DoAndReturn(func(ctx context.Context, timeout time.Time) ([]*persistence.Task, error) {
|
|
fetchTimes[1] = mocks.clock.Now().UTC()
|
|
// Return a database error. This shouldn't break the check loop.
|
|
return []*persistence.Task{}, errors.New("testing what errors do")
|
|
}).
|
|
After(firstCall)
|
|
|
|
mocks.persist.EXPECT().FetchTimedOutTasks(mocks.ctx, deadlines[2]).
|
|
DoAndReturn(func(ctx context.Context, timeout time.Time) ([]*persistence.Task, error) {
|
|
fetchTimes[2] = mocks.clock.Now().UTC()
|
|
return []*persistence.Task{}, nil
|
|
}).
|
|
After(secondCall)
|
|
|
|
mocks.clock.Add(2 * time.Minute) // Should still be sleeping.
|
|
mocks.clock.Add(2 * time.Minute) // Should still be sleeping.
|
|
mocks.clock.Add(time.Minute) // Should trigger the first fetch.
|
|
mocks.clock.Add(time.Minute) // Should trigger the second fetch.
|
|
mocks.clock.Add(time.Minute) // Should trigger the third fetch.
|
|
|
|
// Wait for the timeout checker to actually run & hit the expected calls.
|
|
time.Sleep(1 * time.Millisecond)
|
|
|
|
for idx, fetchTime := range fetchTimes {
|
|
// Check for zero values first, because they can be a bit confusing in the assert.Equal() logs.
|
|
if !assert.Falsef(t, fetchTime.IsZero(), "fetchTime[%d] should not be zero", idx) {
|
|
continue
|
|
}
|
|
expect := initialTime.Add(timeoutInitialSleep + time.Duration(idx)*timeoutCheckInterval)
|
|
assert.Equalf(t, expect, fetchTime, "fetchTime[%d] not as expected", idx)
|
|
}
|
|
}
|
|
|
|
func TestTaskTimeout(t *testing.T) {
|
|
canaryTest(t)
|
|
|
|
ttc, finish, mocks := timeoutCheckerTestFixtures(t)
|
|
defer finish()
|
|
|
|
mocks.run(ttc)
|
|
|
|
// Wait for the timeout checker to actually be sleeping, otherwise it could
|
|
// have a different sleep-start time than we expect.
|
|
time.Sleep(1 * time.Millisecond)
|
|
|
|
lastTime := mocks.clock.Now().UTC().Add(-1 * time.Hour)
|
|
|
|
job := persistence.Job{UUID: "JOB-UUID"}
|
|
worker := persistence.Worker{
|
|
UUID: "WORKER-UUID",
|
|
Name: "Tester",
|
|
Model: gorm.Model{ID: 47},
|
|
}
|
|
taskUnassigned := persistence.Task{
|
|
UUID: "TASK-UUID-UNASSIGNED",
|
|
Job: &job,
|
|
LastTouchedAt: lastTime,
|
|
}
|
|
taskUnknownWorker := persistence.Task{
|
|
UUID: "TASK-UUID-UNKNOWN",
|
|
Job: &job,
|
|
LastTouchedAt: lastTime,
|
|
WorkerID: &worker.ID,
|
|
}
|
|
taskAssigned := persistence.Task{
|
|
UUID: "TASK-UUID-ASSIGNED",
|
|
Job: &job,
|
|
LastTouchedAt: lastTime,
|
|
WorkerID: &worker.ID,
|
|
Worker: &worker,
|
|
}
|
|
|
|
mocks.persist.EXPECT().FetchTimedOutWorkers(mocks.ctx, gomock.Any()).AnyTimes().Return(nil, nil)
|
|
|
|
mocks.persist.EXPECT().FetchTimedOutTasks(mocks.ctx, gomock.Any()).
|
|
Return([]*persistence.Task{&taskUnassigned, &taskUnknownWorker, &taskAssigned}, nil)
|
|
|
|
mocks.taskStateMachine.EXPECT().TaskStatusChange(mocks.ctx, &taskUnassigned, api.TaskStatusFailed)
|
|
mocks.taskStateMachine.EXPECT().TaskStatusChange(mocks.ctx, &taskUnknownWorker, api.TaskStatusFailed)
|
|
mocks.taskStateMachine.EXPECT().TaskStatusChange(mocks.ctx, &taskAssigned, api.TaskStatusFailed)
|
|
|
|
mocks.logStorage.EXPECT().WriteTimestamped(gomock.Any(), job.UUID, taskUnassigned.UUID,
|
|
"Task timed out. It was assigned to worker -unassigned-, but untouched since 1969-12-31T23:00:00Z")
|
|
mocks.logStorage.EXPECT().WriteTimestamped(gomock.Any(), job.UUID, taskUnknownWorker.UUID,
|
|
"Task timed out. It was assigned to worker -unknown-, but untouched since 1969-12-31T23:00:00Z")
|
|
mocks.logStorage.EXPECT().WriteTimestamped(gomock.Any(), job.UUID, taskAssigned.UUID,
|
|
"Task timed out. It was assigned to worker Tester (WORKER-UUID), but untouched since 1969-12-31T23:00:00Z")
|
|
|
|
// All the timeouts should be handled after the initial sleep.
|
|
mocks.clock.Add(timeoutInitialSleep)
|
|
}
|