OAPI: Better 'SQLITE_BUSY' error handling
SQLite can return `SQLITE_BUSY` errors when it's doing too many things at the same time. This is now improved a bit by setting a 5-second timeout, during which the SQLite driver will wait for the database to become available. If that doesn't happen, Flamenco Manager will return a `503 Service Unavailable` response so that the client knows to back off a little.
This commit is contained in:
parent
89e130c04f
commit
930d7497d7
@ -7,6 +7,9 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"strconv"
|
||||||
|
"time"
|
||||||
|
|
||||||
"git.blender.org/flamenco/internal/manager/job_compilers"
|
"git.blender.org/flamenco/internal/manager/job_compilers"
|
||||||
"git.blender.org/flamenco/internal/manager/persistence"
|
"git.blender.org/flamenco/internal/manager/persistence"
|
||||||
@ -155,3 +158,23 @@ func sendAPIError(e echo.Context, code int, message string, args ...interface{})
|
|||||||
}
|
}
|
||||||
return e.JSON(code, petErr)
|
return e.JSON(code, petErr)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// sendAPIErrorDBBusy sends a HTTP 503 Service Unavailable, with a hopefully
|
||||||
|
// reasonable "retry after" header.
|
||||||
|
func sendAPIErrorDBBusy(e echo.Context, message string, args ...interface{}) error {
|
||||||
|
if len(args) > 0 {
|
||||||
|
// Only interpret 'message' as format string if there are actually format parameters.
|
||||||
|
message = fmt.Sprintf(message, args)
|
||||||
|
}
|
||||||
|
|
||||||
|
code := http.StatusServiceUnavailable
|
||||||
|
apiErr := api.Error{
|
||||||
|
Code: int32(code),
|
||||||
|
Message: message,
|
||||||
|
}
|
||||||
|
|
||||||
|
retryAfter := 1 * time.Second
|
||||||
|
seconds := int64(retryAfter.Seconds())
|
||||||
|
e.Response().Header().Set("Retry-After", strconv.FormatInt(seconds, 10))
|
||||||
|
return e.JSON(code, apiErr)
|
||||||
|
}
|
||||||
|
@ -49,6 +49,9 @@ func (f *Flamenco) RegisterWorker(e echo.Context) error {
|
|||||||
}
|
}
|
||||||
if err := f.persist.CreateWorker(e.Request().Context(), &dbWorker); err != nil {
|
if err := f.persist.CreateWorker(e.Request().Context(), &dbWorker); err != nil {
|
||||||
logger.Warn().Err(err).Msg("error creating new worker in DB")
|
logger.Warn().Err(err).Msg("error creating new worker in DB")
|
||||||
|
if persistence.ErrIsDBBusy(err) {
|
||||||
|
return sendAPIErrorDBBusy(e, "too busy to register worker, try again later")
|
||||||
|
}
|
||||||
return sendAPIError(e, http.StatusBadRequest, "error registering worker")
|
return sendAPIError(e, http.StatusBadRequest, "error registering worker")
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -264,6 +267,10 @@ func (f *Flamenco) ScheduleTask(e echo.Context) error {
|
|||||||
// Get a task to execute:
|
// Get a task to execute:
|
||||||
dbTask, err := f.persist.ScheduleTask(e.Request().Context(), worker)
|
dbTask, err := f.persist.ScheduleTask(e.Request().Context(), worker)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
if persistence.ErrIsDBBusy(err) {
|
||||||
|
logger.Warn().Msg("database busy scheduling task for worker")
|
||||||
|
return sendAPIErrorDBBusy(e, "too busy to find a task for you, try again later")
|
||||||
|
}
|
||||||
logger.Warn().Err(err).Msg("error scheduling task for worker")
|
logger.Warn().Err(err).Msg("error scheduling task for worker")
|
||||||
return sendAPIError(e, http.StatusInternalServerError, "internal error finding a task for you: %v", err)
|
return sendAPIError(e, http.StatusInternalServerError, "internal error finding a task for you: %v", err)
|
||||||
}
|
}
|
||||||
|
@ -29,6 +29,10 @@ func OpenDB(ctx context.Context, dsn string) (*DB, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err := setBusyTimeout(db.gormDB, 5*time.Second); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
if err := db.migrate(); err != nil {
|
if err := db.migrate(); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
39
internal/manager/persistence/sqlite_busy.go
Normal file
39
internal/manager/persistence/sqlite_busy.go
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
package persistence
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/glebarez/go-sqlite"
|
||||||
|
"gorm.io/gorm"
|
||||||
|
sqlite3 "modernc.org/sqlite/lib"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
// errDatabaseBusy is returned by this package when the operation could not be
|
||||||
|
// performed due to SQLite being busy.
|
||||||
|
errDatabaseBusy = errors.New("database busy")
|
||||||
|
)
|
||||||
|
|
||||||
|
// ErrIsDBBusy returns true when the error is a "database busy" error.
|
||||||
|
func ErrIsDBBusy(err error) bool {
|
||||||
|
return errors.Is(err, errDatabaseBusy) || isDatabaseBusyError(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// isDatabaseBusyError returns true when the error returned by GORM is a
|
||||||
|
// SQLITE_BUSY error.
|
||||||
|
func isDatabaseBusyError(err error) bool {
|
||||||
|
sqlErr, ok := err.(*sqlite.Error)
|
||||||
|
return ok && sqlErr.Code() == sqlite3.SQLITE_BUSY
|
||||||
|
}
|
||||||
|
|
||||||
|
// setBusyTimeout sets the SQLite busy_timeout busy handler.
|
||||||
|
// See https://sqlite.org/pragma.html#pragma_busy_timeout
|
||||||
|
func setBusyTimeout(gormDB *gorm.DB, busyTimeout time.Duration) error {
|
||||||
|
if tx := gormDB.Exec(fmt.Sprintf("PRAGMA busy_timeout = %d", busyTimeout.Milliseconds())); tx.Error != nil {
|
||||||
|
return fmt.Errorf("setting busy_timeout: %w", tx.Error)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
@ -33,6 +33,10 @@ func (db *DB) ScheduleTask(ctx context.Context, w *Worker) (*Task, error) {
|
|||||||
var err error
|
var err error
|
||||||
task, err = findTaskForWorker(tx, w)
|
task, err = findTaskForWorker(tx, w)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
if isDatabaseBusyError(err) {
|
||||||
|
logger.Trace().Err(err).Msg("database busy while finding task for worker")
|
||||||
|
return errDatabaseBusy
|
||||||
|
}
|
||||||
logger.Error().Err(err).Msg("finding task for worker")
|
logger.Error().Err(err).Msg("finding task for worker")
|
||||||
return fmt.Errorf("finding task for worker: %w", err)
|
return fmt.Errorf("finding task for worker: %w", err)
|
||||||
}
|
}
|
||||||
@ -43,6 +47,11 @@ func (db *DB) ScheduleTask(ctx context.Context, w *Worker) (*Task, error) {
|
|||||||
|
|
||||||
// Found a task, now assign it to the requesting worker.
|
// Found a task, now assign it to the requesting worker.
|
||||||
if err := assignTaskToWorker(tx, w, task); err != nil {
|
if err := assignTaskToWorker(tx, w, task); err != nil {
|
||||||
|
if isDatabaseBusyError(err) {
|
||||||
|
logger.Trace().Err(err).Msg("database busy while assigning task to worker")
|
||||||
|
return errDatabaseBusy
|
||||||
|
}
|
||||||
|
|
||||||
logger.Warn().
|
logger.Warn().
|
||||||
Str("taskID", task.UUID).
|
Str("taskID", task.UUID).
|
||||||
Err(err).
|
Err(err).
|
||||||
|
@ -766,10 +766,22 @@ components:
|
|||||||
required: [jobs]
|
required: [jobs]
|
||||||
|
|
||||||
Error:
|
Error:
|
||||||
|
description: Generic error response.
|
||||||
type: object
|
type: object
|
||||||
required: [code, message]
|
required: [code, message]
|
||||||
properties:
|
properties:
|
||||||
code: {type: integer, format: int32}
|
code:
|
||||||
|
type: integer
|
||||||
|
format: int32
|
||||||
|
description: >
|
||||||
|
HTTP status code of this response. Is included in the payload so
|
||||||
|
that a single object represents all error information.
|
||||||
|
|
||||||
|
Code 503 is used when the database is busy. The HTTP response will
|
||||||
|
contain a 'Retry-After' HTTP header that indicates after which time
|
||||||
|
the request can be retried. Following the header is not mandatory,
|
||||||
|
and it's up to the client to do something reasonable like
|
||||||
|
exponential backoff.
|
||||||
message: {type: string}
|
message: {type: string}
|
||||||
|
|
||||||
SecurityError:
|
SecurityError:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user