From 8c86d4c1a9fac52a93d389f2c14e055a843816a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sybren=20A=2E=20St=C3=BCvel?= Date: Thu, 28 Jul 2022 14:36:01 +0200 Subject: [PATCH] Worker: Wait for subprocess even when it failed The Worker now always waits for subprocesses. When faced with multiple errors (like I/O reading from stdout and a returned error status from the process) will return the most important one (in this case the exit status of the process). Subprocesses need to be waited for, even when they crashed, otherwise they will linger around as "defunct" processes. This caused out-of-memory errors, because several defunct Blenders were eating up the memory. --- internal/worker/cli_runner/cli_runner.go | 44 ++++++++++++++++-------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/internal/worker/cli_runner/cli_runner.go b/internal/worker/cli_runner/cli_runner.go index 245bad5e..22df760c 100644 --- a/internal/worker/cli_runner/cli_runner.go +++ b/internal/worker/cli_runner/cli_runner.go @@ -30,6 +30,8 @@ func (cli *CLIRunner) CommandContext(ctx context.Context, name string, arg ...st // RunWithTextOutput runs a command and sends its output line-by-line to the // lineChannel. Stdout and stderr are combined. +// Before returning. RunWithTextOutput() waits for the subprocess, to ensure it +// doesn't become defunct. func (cli *CLIRunner) RunWithTextOutput( ctx context.Context, logger zerolog.Logger, @@ -53,14 +55,22 @@ func (cli *CLIRunner) RunWithTextOutput( reader := bufio.NewReaderSize(outPipe, StdoutBufferSize) + // returnErr determines which error is returned to the caller. More important + // errors overwrite less important ones. This is done via a variable instead + // of simply returning, because the function must be run to completion in + // order to wait for processes (and not create defunct ones). + var returnErr error = nil +readloop: for { lineBytes, isPrefix, readErr := reader.ReadLine() - if readErr == io.EOF { - break - } - if readErr != nil { + + switch { + case readErr == io.EOF: + break readloop + case readErr != nil: logger.Error().Err(err).Msg("error reading stdout/err") - return err + returnErr = readErr + break readloop } line := string(lineBytes) @@ -77,27 +87,31 @@ func (cli *CLIRunner) RunWithTextOutput( } if err := logChunker.Append(ctx, fmt.Sprintf("pid=%d > %s", blenderPID, line)); err != nil { - return fmt.Errorf("appending log entry to log chunker: %w", err) + returnErr = fmt.Errorf("appending log entry to log chunker: %w", err) + break readloop } } if err := logChunker.Flush(ctx); err != nil { - return fmt.Errorf("flushing log chunker: %w", err) + // any readErr is less important, as these are likely caused by other + // issues, which will surface on the Wait() and Success() calls. + returnErr = fmt.Errorf("flushing log chunker: %w", err) } if err := execCmd.Wait(); err != nil { - logger.Error().Err(err).Msg("error in CLI execution") - return err - } - - if execCmd.ProcessState.Success() { - logger.Info().Msg("command exited succesfully") - } else { logger.Error(). Int("exitCode", execCmd.ProcessState.ExitCode()). Msg("command exited abnormally") - return fmt.Errorf("command exited abnormally with code %d", execCmd.ProcessState.ExitCode()) + returnErr = fmt.Errorf("command exited abnormally with code %d", execCmd.ProcessState.ExitCode()) } + if returnErr != nil { + logger.Error().Err(err). + Int("exitCode", execCmd.ProcessState.ExitCode()). + Msg("command exited abnormally") + return returnErr + } + + logger.Info().Msg("command exited succesfully") return nil }