API for deleting workers
Workers can now be soft-deleted. Tasks assigned to the worker will remain associated with that Worker. Active tasks will be re-queued so other workers can pick them up.
This commit is contained in:
parent
458c33573e
commit
2a345a3d2c
@ -46,6 +46,7 @@ type PersistenceService interface {
|
||||
SaveWorker(ctx context.Context, w *persistence.Worker) error
|
||||
SaveWorkerStatus(ctx context.Context, w *persistence.Worker) error
|
||||
WorkerSeen(ctx context.Context, w *persistence.Worker) error
|
||||
DeleteWorker(ctx context.Context, uuid string) error
|
||||
|
||||
// ScheduleTask finds a task to execute by the given worker, and assigns it to that worker.
|
||||
// If no task is available, (nil, nil) is returned, as this is not an error situation.
|
||||
|
@ -141,6 +141,20 @@ func (mr *MockPersistenceServiceMockRecorder) CreateWorker(arg0, arg1 interface{
|
||||
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateWorker", reflect.TypeOf((*MockPersistenceService)(nil).CreateWorker), arg0, arg1)
|
||||
}
|
||||
|
||||
// DeleteWorker mocks base method.
|
||||
func (m *MockPersistenceService) DeleteWorker(arg0 context.Context, arg1 string) error {
|
||||
m.ctrl.T.Helper()
|
||||
ret := m.ctrl.Call(m, "DeleteWorker", arg0, arg1)
|
||||
ret0, _ := ret[0].(error)
|
||||
return ret0
|
||||
}
|
||||
|
||||
// DeleteWorker indicates an expected call of DeleteWorker.
|
||||
func (mr *MockPersistenceServiceMockRecorder) DeleteWorker(arg0, arg1 interface{}) *gomock.Call {
|
||||
mr.mock.ctrl.T.Helper()
|
||||
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteWorker", reflect.TypeOf((*MockPersistenceService)(nil).DeleteWorker), arg0, arg1)
|
||||
}
|
||||
|
||||
// FetchJob mocks base method.
|
||||
func (m *MockPersistenceService) FetchJob(arg0 context.Context, arg1 string) (*persistence.Job, error) {
|
||||
m.ctrl.T.Helper()
|
||||
|
@ -71,6 +71,63 @@ func (f *Flamenco) FetchWorker(e echo.Context, workerUUID string) error {
|
||||
return e.JSON(http.StatusOK, apiWorker)
|
||||
}
|
||||
|
||||
func (f *Flamenco) DeleteWorker(e echo.Context, workerUUID string) error {
|
||||
logger := requestLogger(e)
|
||||
logger = logger.With().Str("worker", workerUUID).Logger()
|
||||
|
||||
if !uuid.IsValid(workerUUID) {
|
||||
return sendAPIError(e, http.StatusBadRequest, "not a valid UUID")
|
||||
}
|
||||
|
||||
// All information to do the deletion is known, so even when the client
|
||||
// disconnects, the deletion should be completed.
|
||||
ctx, ctxCancel := bgContext()
|
||||
defer ctxCancel()
|
||||
|
||||
// Fetch the worker in order to re-queue its tasks.
|
||||
worker, err := f.persist.FetchWorker(ctx, workerUUID)
|
||||
if errors.Is(err, persistence.ErrWorkerNotFound) {
|
||||
logger.Debug().Msg("deletion of non-existent worker requested")
|
||||
return sendAPIError(e, http.StatusNotFound, "worker %q not found", workerUUID)
|
||||
}
|
||||
if err != nil {
|
||||
logger.Error().Err(err).Msg("error fetching worker for deletion")
|
||||
return sendAPIError(e, http.StatusInternalServerError,
|
||||
"error fetching worker for deletion: %v", err)
|
||||
}
|
||||
|
||||
err = f.stateMachine.RequeueActiveTasksOfWorker(ctx, worker, "worker is being deleted")
|
||||
if err != nil {
|
||||
logger.Error().Err(err).Msg("error requeueing tasks before deleting worker")
|
||||
return sendAPIError(e, http.StatusInternalServerError,
|
||||
"error requeueing tasks before deleting worker: %v", err)
|
||||
}
|
||||
|
||||
// Actually delete the worker.
|
||||
err = f.persist.DeleteWorker(ctx, workerUUID)
|
||||
if errors.Is(err, persistence.ErrWorkerNotFound) {
|
||||
logger.Debug().Msg("deletion of non-existent worker requested")
|
||||
return sendAPIError(e, http.StatusNotFound, "worker %q not found", workerUUID)
|
||||
}
|
||||
if err != nil {
|
||||
logger.Error().Err(err).Msg("error deleting worker")
|
||||
return sendAPIError(e, http.StatusInternalServerError, "error deleting worker: %v", err)
|
||||
}
|
||||
logger.Info().Msg("deleted worker")
|
||||
|
||||
// It would be cleaner to re-fetch the Worker from the database and get the
|
||||
// exact 'deleted at' timestamp from there, but that would require more DB
|
||||
// operations, and this is accurate enough for a quick broadcast via SocketIO.
|
||||
now := f.clock.Now()
|
||||
|
||||
// Broadcast the fact that this worker was just deleted.
|
||||
update := webupdates.NewWorkerUpdate(worker)
|
||||
update.DeletedAt = &now
|
||||
f.broadcaster.BroadcastWorkerUpdate(update)
|
||||
|
||||
return e.NoContent(http.StatusNoContent)
|
||||
}
|
||||
|
||||
func (f *Flamenco) RequestWorkerStatusChange(e echo.Context, workerUUID string) error {
|
||||
logger := requestLogger(e)
|
||||
logger = logger.With().Str("worker", workerUUID).Logger()
|
||||
|
@ -141,6 +141,44 @@ func TestFetchWorker(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestDeleteWorker(t *testing.T) {
|
||||
mockCtrl := gomock.NewController(t)
|
||||
defer mockCtrl.Finish()
|
||||
|
||||
mf := newMockedFlamenco(mockCtrl)
|
||||
worker := testWorker()
|
||||
workerUUID := worker.UUID
|
||||
|
||||
// Test on non-existent worker.
|
||||
mf.persistence.EXPECT().FetchWorker(gomock.Any(), workerUUID).
|
||||
Return(nil, fmt.Errorf("wrapped: %w", persistence.ErrWorkerNotFound))
|
||||
echo := mf.prepareMockedRequest(nil)
|
||||
err := mf.flamenco.DeleteWorker(echo, workerUUID)
|
||||
assert.NoError(t, err)
|
||||
assertResponseAPIError(t, echo, http.StatusNotFound, fmt.Sprintf("worker %q not found", workerUUID))
|
||||
|
||||
// Test with existing worker.
|
||||
mf.persistence.EXPECT().FetchWorker(gomock.Any(), workerUUID).Return(&worker, nil)
|
||||
mf.stateMachine.EXPECT().RequeueActiveTasksOfWorker(
|
||||
gomock.Any(), &worker, "worker is being deleted")
|
||||
mf.persistence.EXPECT().DeleteWorker(gomock.Any(), workerUUID).Return(nil)
|
||||
|
||||
mockedNow := mf.clock.Now()
|
||||
mf.broadcaster.EXPECT().BroadcastWorkerUpdate(api.SocketIOWorkerUpdate{
|
||||
DeletedAt: &mockedNow,
|
||||
Id: worker.UUID,
|
||||
Name: worker.Name,
|
||||
Status: worker.Status,
|
||||
Updated: worker.UpdatedAt,
|
||||
Version: worker.Software,
|
||||
})
|
||||
|
||||
echo = mf.prepareMockedRequest(nil)
|
||||
err = mf.flamenco.DeleteWorker(echo, workerUUID)
|
||||
assert.NoError(t, err)
|
||||
assertResponseNoContent(t, echo)
|
||||
}
|
||||
|
||||
func TestRequestWorkerStatusChange(t *testing.T) {
|
||||
mockCtrl := gomock.NewController(t)
|
||||
defer mockCtrl.Finish()
|
||||
|
@ -237,6 +237,9 @@ func (db *DB) SaveJobStatus(ctx context.Context, j *Job) error {
|
||||
func (db *DB) FetchTask(ctx context.Context, taskUUID string) (*Task, error) {
|
||||
dbTask := Task{}
|
||||
tx := db.gormDB.WithContext(ctx).
|
||||
// Allow finding the Worker, even after it was deleted. Jobs and Tasks
|
||||
// don't have soft-deletion.
|
||||
Unscoped().
|
||||
Joins("Job").
|
||||
Joins("Worker").
|
||||
First(&dbTask, "tasks.uuid = ?", taskUUID)
|
||||
|
@ -9,10 +9,13 @@ import (
|
||||
"time"
|
||||
|
||||
"git.blender.org/flamenco/pkg/api"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
type Worker struct {
|
||||
Model
|
||||
DeletedAt gorm.DeletedAt `gorm:"index"`
|
||||
|
||||
UUID string `gorm:"type:char(36);default:'';unique;index;default:''"`
|
||||
Secret string `gorm:"type:varchar(255);default:''"`
|
||||
Name string `gorm:"type:varchar(64);default:''"`
|
||||
@ -71,6 +74,19 @@ func (db *DB) FetchWorker(ctx context.Context, uuid string) (*Worker, error) {
|
||||
return &w, nil
|
||||
}
|
||||
|
||||
func (db *DB) DeleteWorker(ctx context.Context, uuid string) error {
|
||||
tx := db.gormDB.WithContext(ctx).
|
||||
Where("uuid = ?", uuid).
|
||||
Delete(&Worker{})
|
||||
if tx.Error != nil {
|
||||
return workerError(tx.Error, "deleting worker")
|
||||
}
|
||||
if tx.RowsAffected == 0 {
|
||||
return ErrWorkerNotFound
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (db *DB) FetchWorkers(ctx context.Context) ([]*Worker, error) {
|
||||
workers := make([]*Worker, 0)
|
||||
tx := db.gormDB.WithContext(ctx).Model(&Worker{}).Scan(&workers)
|
||||
|
@ -252,3 +252,68 @@ func TestFetchWorkers(t *testing.T) {
|
||||
assert.Equal(t, windowsWorker.UUID, workers[1].UUID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteWorker(t *testing.T) {
|
||||
ctx, cancel, db := persistenceTestFixtures(t, 1*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// Test deleting non-existent worker
|
||||
err := db.DeleteWorker(ctx, "dabf67a1-b591-4232-bf73-0b8de2a9488e")
|
||||
assert.ErrorIs(t, err, ErrWorkerNotFound)
|
||||
|
||||
// Test deleting existing worker
|
||||
w1 := Worker{
|
||||
UUID: "fd97a35b-a5bd-44b4-ac2b-64c193ca877d",
|
||||
Name: "Worker 1",
|
||||
Status: api.WorkerStatusAwake,
|
||||
}
|
||||
w2 := Worker{
|
||||
UUID: "82b2d176-cb8c-4bfa-8300-41c216d766df",
|
||||
Name: "Worker 2",
|
||||
Status: api.WorkerStatusOffline,
|
||||
}
|
||||
|
||||
assert.NoError(t, db.CreateWorker(ctx, &w1))
|
||||
assert.NoError(t, db.CreateWorker(ctx, &w2))
|
||||
|
||||
// Delete the 2nd worker, just to have a test with ID != 1.
|
||||
assert.NoError(t, db.DeleteWorker(ctx, w2.UUID))
|
||||
|
||||
// The deleted worker should now no longer be found.
|
||||
{
|
||||
fetchedWorker, err := db.FetchWorker(ctx, w2.UUID)
|
||||
assert.ErrorIs(t, err, ErrWorkerNotFound)
|
||||
assert.Nil(t, fetchedWorker)
|
||||
}
|
||||
|
||||
// The other worker should still exist.
|
||||
{
|
||||
fetchedWorker, err := db.FetchWorker(ctx, w1.UUID)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, w1.UUID, fetchedWorker.UUID)
|
||||
}
|
||||
|
||||
// Assign a task to the other worker, and then delete that worker.
|
||||
authJob := createTestAuthoredJobWithTasks()
|
||||
persistAuthoredJob(t, ctx, db, authJob)
|
||||
taskUUID := authJob.Tasks[0].UUID
|
||||
{
|
||||
task, err := db.FetchTask(ctx, taskUUID)
|
||||
assert.NoError(t, err)
|
||||
task.Worker = &w1
|
||||
assert.NoError(t, db.SaveTask(ctx, task))
|
||||
}
|
||||
|
||||
// Delete the worker.
|
||||
assert.NoError(t, db.DeleteWorker(ctx, w1.UUID))
|
||||
|
||||
// Check the task after deletion of the Worker.
|
||||
{
|
||||
fetchedTask, err := db.FetchTask(ctx, taskUUID)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, taskUUID, fetchedTask.UUID)
|
||||
assert.Equal(t, w1.UUID, fetchedTask.Worker.UUID)
|
||||
assert.NotZero(t, fetchedTask.Worker.DeletedAt.Time)
|
||||
assert.True(t, fetchedTask.Worker.DeletedAt.Valid)
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user