Worker: Wait for subprocess even when it failed

The Worker now always waits for subprocesses. When faced with multiple
errors (like I/O reading from stdout and a returned error status from
the process) will return the most important one (in this case the exit
status of the process).

Subprocesses need to be waited for, even when they crashed, otherwise
they will linger around as "defunct" processes. This caused
out-of-memory errors, because several defunct Blenders were eating up
the memory.
This commit is contained in:
Sybren A. Stüvel 2022-07-28 14:36:01 +02:00
parent c79fe55068
commit 8c86d4c1a9

View File

@ -30,6 +30,8 @@ func (cli *CLIRunner) CommandContext(ctx context.Context, name string, arg ...st
// RunWithTextOutput runs a command and sends its output line-by-line to the // RunWithTextOutput runs a command and sends its output line-by-line to the
// lineChannel. Stdout and stderr are combined. // lineChannel. Stdout and stderr are combined.
// Before returning. RunWithTextOutput() waits for the subprocess, to ensure it
// doesn't become defunct.
func (cli *CLIRunner) RunWithTextOutput( func (cli *CLIRunner) RunWithTextOutput(
ctx context.Context, ctx context.Context,
logger zerolog.Logger, logger zerolog.Logger,
@ -53,14 +55,22 @@ func (cli *CLIRunner) RunWithTextOutput(
reader := bufio.NewReaderSize(outPipe, StdoutBufferSize) reader := bufio.NewReaderSize(outPipe, StdoutBufferSize)
// returnErr determines which error is returned to the caller. More important
// errors overwrite less important ones. This is done via a variable instead
// of simply returning, because the function must be run to completion in
// order to wait for processes (and not create defunct ones).
var returnErr error = nil
readloop:
for { for {
lineBytes, isPrefix, readErr := reader.ReadLine() lineBytes, isPrefix, readErr := reader.ReadLine()
if readErr == io.EOF {
break switch {
} case readErr == io.EOF:
if readErr != nil { break readloop
case readErr != nil:
logger.Error().Err(err).Msg("error reading stdout/err") logger.Error().Err(err).Msg("error reading stdout/err")
return err returnErr = readErr
break readloop
} }
line := string(lineBytes) line := string(lineBytes)
@ -77,27 +87,31 @@ func (cli *CLIRunner) RunWithTextOutput(
} }
if err := logChunker.Append(ctx, fmt.Sprintf("pid=%d > %s", blenderPID, line)); err != nil { if err := logChunker.Append(ctx, fmt.Sprintf("pid=%d > %s", blenderPID, line)); err != nil {
return fmt.Errorf("appending log entry to log chunker: %w", err) returnErr = fmt.Errorf("appending log entry to log chunker: %w", err)
break readloop
} }
} }
if err := logChunker.Flush(ctx); err != nil { if err := logChunker.Flush(ctx); err != nil {
return fmt.Errorf("flushing log chunker: %w", err) // any readErr is less important, as these are likely caused by other
// issues, which will surface on the Wait() and Success() calls.
returnErr = fmt.Errorf("flushing log chunker: %w", err)
} }
if err := execCmd.Wait(); err != nil { if err := execCmd.Wait(); err != nil {
logger.Error().Err(err).Msg("error in CLI execution")
return err
}
if execCmd.ProcessState.Success() {
logger.Info().Msg("command exited succesfully")
} else {
logger.Error(). logger.Error().
Int("exitCode", execCmd.ProcessState.ExitCode()). Int("exitCode", execCmd.ProcessState.ExitCode()).
Msg("command exited abnormally") Msg("command exited abnormally")
return fmt.Errorf("command exited abnormally with code %d", execCmd.ProcessState.ExitCode()) returnErr = fmt.Errorf("command exited abnormally with code %d", execCmd.ProcessState.ExitCode())
} }
if returnErr != nil {
logger.Error().Err(err).
Int("exitCode", execCmd.ProcessState.ExitCode()).
Msg("command exited abnormally")
return returnErr
}
logger.Info().Msg("command exited succesfully")
return nil return nil
} }