Refactor: Manager, refactor handling of task failures

Split the handling of soft and hard failures into separate functions.

No functional changes intended.
This commit is contained in:
Sybren A. Stüvel 2022-06-17 15:01:46 +02:00
parent 0396919229
commit 56abc825a6

View File

@ -167,37 +167,59 @@ func (f *Flamenco) onTaskFailed(
Int("failedByWorkerCount", numFailed). Int("failedByWorkerCount", numFailed).
Int("threshold", threshold). Int("threshold", threshold).
Logger() Logger()
if numFailed < threshold {
return f.softFailTask(ctx, logger, worker, task, numFailed)
}
return f.hardFailTask(ctx, logger, worker, task, numFailed)
}
var ( func (f *Flamenco) hardFailTask(
newStatus api.TaskStatus ctx context.Context,
localLog, taskLog string logger zerolog.Logger,
) worker *persistence.Worker,
task *persistence.Task,
numFailed int,
) error {
// Add the failure to the task log.
pluralizer := pluralize.NewClient() pluralizer := pluralize.NewClient()
if numFailed >= threshold { taskLog := fmt.Sprintf(
newStatus = api.TaskStatusFailed
localLog = "too many workers failed this task, hard-failing it"
taskLog = fmt.Sprintf(
"Task failed by %s, Manager will mark it as hard failure", "Task failed by %s, Manager will mark it as hard failure",
pluralizer.Pluralize("worker", numFailed, true), pluralizer.Pluralize("worker", numFailed, true),
) )
} else { if err := f.logStorage.WriteTimestamped(logger, task.Job.UUID, task.UUID, taskLog); err != nil {
newStatus = api.TaskStatusSoftFailed logger.Error().Err(err).Msg("error writing failure notice to task log")
}
localLog = "worker failed this task, soft-failing to give another worker a try" // Mark the task as failed.
logger.Info().Str("newTaskStatus", string(api.TaskStatusFailed)).
Msg("too many workers failed this task, hard-failing it")
return f.stateMachine.TaskStatusChange(ctx, task, api.TaskStatusFailed)
}
func (f *Flamenco) softFailTask(
ctx context.Context,
logger zerolog.Logger,
worker *persistence.Worker,
task *persistence.Task,
numFailed int,
) error {
threshold := f.config.Get().TaskFailAfterSoftFailCount
failsToThreshold := threshold - numFailed failsToThreshold := threshold - numFailed
taskLog = fmt.Sprintf(
// Add the failure to the task log.
pluralizer := pluralize.NewClient()
taskLog := fmt.Sprintf(
"Task failed by %s, Manager will mark it as soft failure. %d more %s will cause hard failure.", "Task failed by %s, Manager will mark it as soft failure. %d more %s will cause hard failure.",
pluralizer.Pluralize("worker", numFailed, true), pluralizer.Pluralize("worker", numFailed, true),
failsToThreshold, failsToThreshold,
pluralizer.Pluralize("failure", failsToThreshold, false), pluralizer.Pluralize("failure", failsToThreshold, false),
) )
}
if err := f.logStorage.WriteTimestamped(logger, task.Job.UUID, task.UUID, taskLog); err != nil { if err := f.logStorage.WriteTimestamped(logger, task.Job.UUID, task.UUID, taskLog); err != nil {
logger.Error().Err(err).Msg("error writing failure notice to task log") logger.Error().Err(err).Msg("error writing failure notice to task log")
} }
logger.Info().Str("newTaskStatus", string(newStatus)).Msg(localLog) // Mark the task as soft-failed.
return f.stateMachine.TaskStatusChange(ctx, task, newStatus) logger.Info().Str("newTaskStatus", string(api.TaskStatusSoftFailed)).
Msg("worker failed this task, soft-failing to give another worker a try")
return f.stateMachine.TaskStatusChange(ctx, task, api.TaskStatusSoftFailed)
} }