mirror of
https://github.com/opencloud-eu/opencloud.git
synced 2026-04-28 06:49:49 -05:00
Merge pull request #6840 from owncloud/graceful-timeout
storage-users: Add config option for graceful shutdown timeout
This commit is contained in:
@@ -13,7 +13,7 @@ require (
|
||||
github.com/coreos/go-oidc v2.2.1+incompatible
|
||||
github.com/coreos/go-oidc/v3 v3.6.0
|
||||
github.com/cs3org/go-cs3apis v0.0.0-20230516150832-730ac860c71d
|
||||
github.com/cs3org/reva/v2 v2.15.0
|
||||
github.com/cs3org/reva/v2 v2.15.1-0.20230718140539-0af2a07c7fd9
|
||||
github.com/disintegration/imaging v1.6.2
|
||||
github.com/dutchcoders/go-clamd v0.0.0-20170520113014-b970184f4d9e
|
||||
github.com/egirna/icap-client v0.1.1
|
||||
|
||||
@@ -625,8 +625,8 @@ github.com/crewjam/httperr v0.2.0 h1:b2BfXR8U3AlIHwNeFFvZ+BV1LFvKLlzMjzaTnZMybNo
|
||||
github.com/crewjam/httperr v0.2.0/go.mod h1:Jlz+Sg/XqBQhyMjdDiC+GNNRzZTD7x39Gu3pglZ5oH4=
|
||||
github.com/crewjam/saml v0.4.13 h1:TYHggH/hwP7eArqiXSJUvtOPNzQDyQ7vwmwEqlFWhMc=
|
||||
github.com/crewjam/saml v0.4.13/go.mod h1:igEejV+fihTIlHXYP8zOec3V5A8y3lws5bQBFsTm4gA=
|
||||
github.com/cs3org/reva/v2 v2.15.0 h1:saU2Heig/HswkNdDHh2Jttmhsn0nfTkvaYbXUspcNOM=
|
||||
github.com/cs3org/reva/v2 v2.15.0/go.mod h1:4z5EQghS2LhSWZWocH51Dw9VAs16No1zSFvFgQtgS7w=
|
||||
github.com/cs3org/reva/v2 v2.15.1-0.20230718140539-0af2a07c7fd9 h1:ycV7H1siLmMiRmc9kaS0WonysHYUT7irvH4FDDAghqQ=
|
||||
github.com/cs3org/reva/v2 v2.15.1-0.20230718140539-0af2a07c7fd9/go.mod h1:4z5EQghS2LhSWZWocH51Dw9VAs16No1zSFvFgQtgS7w=
|
||||
github.com/cubewise-code/go-mime v0.0.0-20200519001935-8c5762b177d8 h1:Z9lwXumT5ACSmJ7WGnFl+OMLLjpz5uR2fyz7dC255FI=
|
||||
github.com/cubewise-code/go-mime v0.0.0-20200519001935-8c5762b177d8/go.mod h1:4abs/jPXcmJzYoYGF91JF9Uq9s/KL5n1jvFDix8KcqY=
|
||||
github.com/cyberdelia/templates v0.0.0-20141128023046-ca7fffd4298c/go.mod h1:GyV+0YP4qX0UQ7r2MoYZ+AvYDp12OF5yg4q8rGnyNh4=
|
||||
|
||||
@@ -6,6 +6,18 @@ Purpose and description to be added
|
||||
|
||||
Starting with ocis version 3.0.0, the default backend for metadata switched to messagepack. If the setting `STORAGE_USERS_OCIS_METADATA_BACKEND` has not been defined manually, the backend will be migrated to `messagepack` automatically. Though still possible to manually configure `xattrs`, this setting should not be used anymore as it will be removed in a later version.
|
||||
|
||||
## Graceful Shutdown
|
||||
|
||||
Starting with Infinite Scale version 3.1, you can define a graceful shutdown period for the `storage-users` service.
|
||||
|
||||
IMPORTANT: The graceful shutdown period is only applicable if the `storage-users` service runs as standalone service. It does not apply if the `storage-users` service runs as part of the single binary or as single Docker environment. To build an environment where the `storage-users` service runs as a standalone service, you must start two instances, one _without_ the `storage-users` service and one _only with_ the the `storage-users` service. Note that both instances must be able to communicate on the same network.
|
||||
|
||||
When hard-stopping Infinite Scale, for example with the `kill <pid>` command (SIGKILL), it is possible and likely that not all data from the decomposedfs (metadata) has been written to the storage which may result in an inconsistent decomposedfs. When gracefully shutting down Infinite Scale, using a command like SIGTERM, the process will no longer accept any write requests from _other_ services and will try to write the internal open requests which can take an undefined duration based on many factors. To mitigate that situation, the following things have been implemented:
|
||||
|
||||
* With the value of the environment variable `STORAGE_USERS_GRACEFUL_SHUTDOWN_TIMEOUT`, the `storage-users` service will delay its shutdown giving it time to finalize writing necessary data. This delay can be necessary if there is a lot of data to be saved and/or if storage access/thruput is slow. In such a case you would receive an error log entry informing you that not all data could be saved in time. To prevent such occurrences, you must increase the default value.
|
||||
|
||||
* If a shutdown error has been logged, the command-line maintenance tool [Inspect and Manipulate Node Metadata](https://doc.owncloud.com/ocis/next/maintenance/commands/commands.html#inspect-and-manipulate-node-metadata) can help to fix the issue. Please contact support for details.
|
||||
|
||||
## CLI Commands
|
||||
|
||||
### Manage Unfinished Uploads
|
||||
|
||||
@@ -21,7 +21,8 @@ type Config struct {
|
||||
TokenManager *TokenManager `yaml:"token_manager"`
|
||||
Reva *shared.Reva `yaml:"reva"`
|
||||
|
||||
SkipUserGroupsInToken bool `yaml:"skip_user_groups_in_token" env:"STORAGE_USERS_SKIP_USER_GROUPS_IN_TOKEN" desc:"Disables the loading of user's group memberships from the reva access token."`
|
||||
SkipUserGroupsInToken bool `yaml:"skip_user_groups_in_token" env:"STORAGE_USERS_SKIP_USER_GROUPS_IN_TOKEN" desc:"Disables the loading of user's group memberships from the reva access token."`
|
||||
GracefulShutdownTimeout int `yaml:"graceful_shutdown_timeout" env:"STORAGE_USERS_GRACEFUL_SHUTDOWN_TIMEOUT" desc:"The number of seconds to wait for the 'storage-users' service to shutdown cleanly before exiting with an error that gets logged. Note: This setting is only applicable when running the 'storage-users' service as a standalone service. See the text description for more details."`
|
||||
|
||||
Driver string `yaml:"driver" env:"STORAGE_USERS_DRIVER" desc:"The storage driver which should be used by the service. Defaults to 'ocis', Supported values are: 'ocis', 's3ng' and 'owncloudsql'. The 'ocis' driver stores all data (blob and meta data) in an POSIX compliant volume. The 's3ng' driver stores metadata in a POSIX compliant volume and uploads blobs to the s3 bucket."`
|
||||
Drivers Drivers `yaml:"drivers"`
|
||||
|
||||
@@ -41,12 +41,13 @@ func DefaultConfig() *config.Config {
|
||||
Service: config.Service{
|
||||
Name: "storage-users",
|
||||
},
|
||||
Reva: shared.DefaultRevaConfig(),
|
||||
DataServerURL: "http://localhost:9158/data",
|
||||
DataGatewayURL: "https://localhost:9200/data",
|
||||
TransferExpires: 86400,
|
||||
UploadExpiration: 24 * 60 * 60,
|
||||
Driver: "ocis",
|
||||
Reva: shared.DefaultRevaConfig(),
|
||||
DataServerURL: "http://localhost:9158/data",
|
||||
DataGatewayURL: "https://localhost:9200/data",
|
||||
TransferExpires: 86400,
|
||||
UploadExpiration: 24 * 60 * 60,
|
||||
GracefulShutdownTimeout: 30,
|
||||
Driver: "ocis",
|
||||
Drivers: config.Drivers{
|
||||
OwnCloudSQL: config.OwnCloudSQLDriver{
|
||||
Root: filepath.Join(defaults.BaseDataPath(), "storage", "owncloud"),
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
// Package revaconfig contains the config for the reva service
|
||||
package revaconfig
|
||||
|
||||
import (
|
||||
@@ -10,11 +11,12 @@ import (
|
||||
func StorageUsersConfigFromStruct(cfg *config.Config) map[string]interface{} {
|
||||
rcfg := map[string]interface{}{
|
||||
"core": map[string]interface{}{
|
||||
"tracing_enabled": cfg.Tracing.Enabled,
|
||||
"tracing_exporter": cfg.Tracing.Type,
|
||||
"tracing_endpoint": cfg.Tracing.Endpoint,
|
||||
"tracing_collector": cfg.Tracing.Collector,
|
||||
"tracing_service_name": cfg.Service.Name,
|
||||
"tracing_enabled": cfg.Tracing.Enabled,
|
||||
"tracing_exporter": cfg.Tracing.Type,
|
||||
"tracing_endpoint": cfg.Tracing.Endpoint,
|
||||
"tracing_collector": cfg.Tracing.Collector,
|
||||
"tracing_service_name": cfg.Service.Name,
|
||||
"graceful_shutdown_timeout": cfg.GracefulShutdownTimeout,
|
||||
},
|
||||
"shared": map[string]interface{}{
|
||||
"jwt_secret": cfg.TokenManager.JWTSecret,
|
||||
|
||||
+71
-46
@@ -36,13 +36,14 @@ import (
|
||||
// Watcher watches a process for a graceful restart
|
||||
// preserving open network sockets to avoid packet loss.
|
||||
type Watcher struct {
|
||||
log zerolog.Logger
|
||||
graceful bool
|
||||
ppid int
|
||||
lns map[string]net.Listener
|
||||
ss map[string]Server
|
||||
pidFile string
|
||||
childPIDs []int
|
||||
log zerolog.Logger
|
||||
graceful bool
|
||||
ppid int
|
||||
lns map[string]net.Listener
|
||||
ss map[string]Server
|
||||
pidFile string
|
||||
childPIDs []int
|
||||
gracefulShutdownTimeout int
|
||||
}
|
||||
|
||||
// Option represent an option.
|
||||
@@ -62,6 +63,12 @@ func WithPIDFile(fn string) Option {
|
||||
}
|
||||
}
|
||||
|
||||
func WithGracefuleShutdownTimeout(seconds int) Option {
|
||||
return func(w *Watcher) {
|
||||
w.gracefulShutdownTimeout = seconds
|
||||
}
|
||||
}
|
||||
|
||||
// NewWatcher creates a Watcher.
|
||||
func NewWatcher(opts ...Option) *Watcher {
|
||||
w := &Watcher{
|
||||
@@ -279,51 +286,69 @@ func (w *Watcher) TrapSignals() {
|
||||
}
|
||||
|
||||
case syscall.SIGQUIT:
|
||||
w.log.Info().Msg("preparing for a graceful shutdown with deadline of 10 seconds")
|
||||
go func() {
|
||||
count := 10
|
||||
ticker := time.NewTicker(time.Second)
|
||||
for ; true; <-ticker.C {
|
||||
w.log.Info().Msgf("shutting down in %d seconds", count-1)
|
||||
count--
|
||||
if count <= 0 {
|
||||
w.log.Info().Msg("deadline reached before draining active conns, hard stopping ...")
|
||||
for _, s := range w.ss {
|
||||
err := s.Stop()
|
||||
if err != nil {
|
||||
w.log.Error().Err(err).Msg("error stopping server")
|
||||
}
|
||||
w.log.Info().Msgf("fd to %s:%s abruptly closed", s.Network(), s.Address())
|
||||
}
|
||||
w.Exit(1)
|
||||
}
|
||||
}
|
||||
}()
|
||||
for _, s := range w.ss {
|
||||
w.log.Info().Msgf("fd to %s:%s gracefully closed ", s.Network(), s.Address())
|
||||
err := s.GracefulStop()
|
||||
if err != nil {
|
||||
w.log.Error().Err(err).Msg("error stopping server")
|
||||
w.log.Info().Msg("exit with error code 1")
|
||||
w.Exit(1)
|
||||
}
|
||||
}
|
||||
w.log.Info().Msg("exit with error code 0")
|
||||
w.Exit(0)
|
||||
gracefulShutdown(w)
|
||||
case syscall.SIGINT, syscall.SIGTERM:
|
||||
w.log.Info().Msg("preparing for hard shutdown, aborting all conns")
|
||||
for _, s := range w.ss {
|
||||
w.log.Info().Msgf("fd to %s:%s abruptly closed", s.Network(), s.Address())
|
||||
err := s.Stop()
|
||||
if err != nil {
|
||||
w.log.Error().Err(err).Msg("error stopping server")
|
||||
}
|
||||
if w.gracefulShutdownTimeout == 0 {
|
||||
hardShutdown(w)
|
||||
}
|
||||
w.Exit(0)
|
||||
gracefulShutdown(w)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Ideally this would call exit() but properly return an error. The
|
||||
// exit() is problematic (i.e. racey) especiaily when orchestrating multiple
|
||||
// reva services from some external runtime (like in the "ocis server" case
|
||||
func gracefulShutdown(w *Watcher) {
|
||||
w.log.Info().Int("Timeout", w.gracefulShutdownTimeout).Msg("preparing for a graceful shutdown with deadline")
|
||||
go func() {
|
||||
count := w.gracefulShutdownTimeout
|
||||
ticker := time.NewTicker(time.Second)
|
||||
for ; true; <-ticker.C {
|
||||
w.log.Info().Msgf("shutting down in %d seconds", count-1)
|
||||
count--
|
||||
if count <= 0 {
|
||||
w.log.Info().Msg("deadline reached before draining active conns, hard stopping ...")
|
||||
for _, s := range w.ss {
|
||||
err := s.Stop()
|
||||
if err != nil {
|
||||
w.log.Error().Err(err).Msg("error stopping server")
|
||||
}
|
||||
w.log.Info().Msgf("fd to %s:%s abruptly closed", s.Network(), s.Address())
|
||||
}
|
||||
w.Exit(1)
|
||||
}
|
||||
}
|
||||
}()
|
||||
for _, s := range w.ss {
|
||||
w.log.Info().Msgf("fd to %s:%s gracefully closed ", s.Network(), s.Address())
|
||||
err := s.GracefulStop()
|
||||
if err != nil {
|
||||
w.log.Error().Err(err).Msg("error stopping server")
|
||||
w.log.Info().Msg("exit with error code 1")
|
||||
|
||||
w.Exit(1)
|
||||
}
|
||||
}
|
||||
w.log.Info().Msg("exit with error code 0")
|
||||
w.Exit(0)
|
||||
}
|
||||
|
||||
// TODO: Ideally this would call exit() but properly return an error. The
|
||||
// exit() is problematic (i.e. racey) especiaily when orchestrating multiple
|
||||
// reva services from some external runtime (like in the "ocis server" case
|
||||
func hardShutdown(w *Watcher) {
|
||||
w.log.Info().Msg("preparing for hard shutdown, aborting all conns")
|
||||
for _, s := range w.ss {
|
||||
w.log.Info().Msgf("fd to %s:%s abruptly closed", s.Network(), s.Address())
|
||||
err := s.Stop()
|
||||
if err != nil {
|
||||
w.log.Error().Err(err).Msg("error stopping server")
|
||||
}
|
||||
}
|
||||
w.Exit(0)
|
||||
}
|
||||
|
||||
func getListenerFile(ln net.Listener) (*os.File, error) {
|
||||
switch t := ln.(type) {
|
||||
case *net.TCPListener:
|
||||
|
||||
+10
-8
@@ -72,6 +72,8 @@ type coreConf struct {
|
||||
|
||||
// TracingService specifies the service. i.e OpenCensus, OpenTelemetry, OpenTracing...
|
||||
TracingService string `mapstructure:"tracing_service"`
|
||||
|
||||
GracefulShutdownTimeout int `mapstructure:"graceful_shutdown_timeout"`
|
||||
}
|
||||
|
||||
func run(
|
||||
@@ -92,7 +94,7 @@ func run(
|
||||
initCPUCount(coreConf, logger)
|
||||
|
||||
servers := initServers(mainConf, logger, tp)
|
||||
watcher, err := initWatcher(logger, filename)
|
||||
watcher, err := initWatcher(logger, filename, coreConf.GracefulShutdownTimeout)
|
||||
if err != nil {
|
||||
log.Panic(err)
|
||||
}
|
||||
@@ -110,8 +112,8 @@ func initListeners(watcher *grace.Watcher, servers map[string]grace.Server, log
|
||||
return listeners
|
||||
}
|
||||
|
||||
func initWatcher(log *zerolog.Logger, filename string) (*grace.Watcher, error) {
|
||||
watcher, err := handlePIDFlag(log, filename)
|
||||
func initWatcher(log *zerolog.Logger, filename string, gracefulShutdownTimeout int) (*grace.Watcher, error) {
|
||||
watcher, err := handlePIDFlag(log, filename, gracefulShutdownTimeout)
|
||||
// TODO(labkode): maybe pidfile can be created later on? like once a server is going to be created?
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msg("error creating grace watcher")
|
||||
@@ -187,11 +189,11 @@ func initLogger(conf *logConf) *zerolog.Logger {
|
||||
return log
|
||||
}
|
||||
|
||||
func handlePIDFlag(l *zerolog.Logger, pidFile string) (*grace.Watcher, error) {
|
||||
var opts []grace.Option
|
||||
opts = append(opts, grace.WithPIDFile(pidFile))
|
||||
opts = append(opts, grace.WithLogger(l.With().Str("pkg", "grace").Logger()))
|
||||
w := grace.NewWatcher(opts...)
|
||||
func handlePIDFlag(l *zerolog.Logger, pidFile string, gracefulShutdownTimeout int) (*grace.Watcher, error) {
|
||||
w := grace.NewWatcher(grace.WithPIDFile(pidFile),
|
||||
grace.WithLogger(l.With().Str("pkg", "grace").Logger()),
|
||||
grace.WithGracefuleShutdownTimeout(gracefulShutdownTimeout),
|
||||
)
|
||||
err := w.WritePID()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
||||
+10
@@ -49,6 +49,16 @@ func (md Attributes) SetInt64(key string, val int64) {
|
||||
md[key] = []byte(strconv.FormatInt(val, 10))
|
||||
}
|
||||
|
||||
// UInt64 reads an uint64 value
|
||||
func (md Attributes) UInt64(key string) (uint64, error) {
|
||||
return strconv.ParseUint(string(md[key]), 10, 64)
|
||||
}
|
||||
|
||||
// SetInt64 sets an uint64 value
|
||||
func (md Attributes) SetUInt64(key string, val uint64) {
|
||||
md[key] = []byte(strconv.FormatUint(val, 10))
|
||||
}
|
||||
|
||||
// SetXattrs sets multiple extended attributes on the write-through cache/node
|
||||
func (n *Node) SetXattrsWithContext(ctx context.Context, attribs map[string][]byte, acquireLock bool) (err error) {
|
||||
if n.xattrsCache != nil {
|
||||
|
||||
+3
@@ -122,6 +122,9 @@ func (fs *Decomposedfs) CreateStorageSpace(ctx context.Context, req *provider.Cr
|
||||
metadata.SetString(prefixes.NameAttr, req.Name)
|
||||
metadata.SetString(prefixes.SpaceNameAttr, req.Name)
|
||||
|
||||
// This space is empty so set initial treesize to 0
|
||||
metadata.SetUInt64(prefixes.TreesizeAttr, 0)
|
||||
|
||||
if req.Type != "" {
|
||||
metadata.SetString(prefixes.SpaceTypeAttr, req.Type)
|
||||
}
|
||||
|
||||
Vendored
+1
-1
@@ -352,7 +352,7 @@ github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1
|
||||
github.com/cs3org/go-cs3apis/cs3/storage/registry/v1beta1
|
||||
github.com/cs3org/go-cs3apis/cs3/tx/v1beta1
|
||||
github.com/cs3org/go-cs3apis/cs3/types/v1beta1
|
||||
# github.com/cs3org/reva/v2 v2.15.0
|
||||
# github.com/cs3org/reva/v2 v2.15.1-0.20230718140539-0af2a07c7fd9
|
||||
## explicit; go 1.20
|
||||
github.com/cs3org/reva/v2/cmd/revad/internal/grace
|
||||
github.com/cs3org/reva/v2/cmd/revad/runtime
|
||||
|
||||
Reference in New Issue
Block a user