Merge pull request #6840 from owncloud/graceful-timeout

storage-users: Add config option for graceful shutdown timeout
2026-04-28 06:49:49 -05:00 · 2023-07-19 16:19:25 +02:00
parent 02a3bfd176 6be90847b7
commit 7a0bdce8b4
11 changed files with 126 additions and 70 deletions
@@ -13,7 +13,7 @@ require (
 	github.com/coreos/go-oidc v2.2.1+incompatible
 	github.com/coreos/go-oidc/v3 v3.6.0
 	github.com/cs3org/go-cs3apis v0.0.0-20230516150832-730ac860c71d
-	github.com/cs3org/reva/v2 v2.15.0
+	github.com/cs3org/reva/v2 v2.15.1-0.20230718140539-0af2a07c7fd9
 	github.com/disintegration/imaging v1.6.2
 	github.com/dutchcoders/go-clamd v0.0.0-20170520113014-b970184f4d9e
 	github.com/egirna/icap-client v0.1.1
@@ -625,8 +625,8 @@ github.com/crewjam/httperr v0.2.0 h1:b2BfXR8U3AlIHwNeFFvZ+BV1LFvKLlzMjzaTnZMybNo
 github.com/crewjam/httperr v0.2.0/go.mod h1:Jlz+Sg/XqBQhyMjdDiC+GNNRzZTD7x39Gu3pglZ5oH4=
 github.com/crewjam/saml v0.4.13 h1:TYHggH/hwP7eArqiXSJUvtOPNzQDyQ7vwmwEqlFWhMc=
 github.com/crewjam/saml v0.4.13/go.mod h1:igEejV+fihTIlHXYP8zOec3V5A8y3lws5bQBFsTm4gA=
-github.com/cs3org/reva/v2 v2.15.0 h1:saU2Heig/HswkNdDHh2Jttmhsn0nfTkvaYbXUspcNOM=
-github.com/cs3org/reva/v2 v2.15.0/go.mod h1:4z5EQghS2LhSWZWocH51Dw9VAs16No1zSFvFgQtgS7w=
+github.com/cs3org/reva/v2 v2.15.1-0.20230718140539-0af2a07c7fd9 h1:ycV7H1siLmMiRmc9kaS0WonysHYUT7irvH4FDDAghqQ=
+github.com/cs3org/reva/v2 v2.15.1-0.20230718140539-0af2a07c7fd9/go.mod h1:4z5EQghS2LhSWZWocH51Dw9VAs16No1zSFvFgQtgS7w=
 github.com/cubewise-code/go-mime v0.0.0-20200519001935-8c5762b177d8 h1:Z9lwXumT5ACSmJ7WGnFl+OMLLjpz5uR2fyz7dC255FI=
 github.com/cubewise-code/go-mime v0.0.0-20200519001935-8c5762b177d8/go.mod h1:4abs/jPXcmJzYoYGF91JF9Uq9s/KL5n1jvFDix8KcqY=
 github.com/cyberdelia/templates v0.0.0-20141128023046-ca7fffd4298c/go.mod h1:GyV+0YP4qX0UQ7r2MoYZ+AvYDp12OF5yg4q8rGnyNh4=
@@ -6,6 +6,18 @@ Purpose and description to be added

 Starting with ocis version 3.0.0, the default backend for metadata switched to messagepack. If the setting `STORAGE_USERS_OCIS_METADATA_BACKEND` has not been defined manually, the backend will be migrated to `messagepack` automatically. Though still possible to manually configure `xattrs`, this setting should not be used anymore as it will be removed in a later version.

+## Graceful Shutdown
+
+Starting with Infinite Scale version 3.1, you can define a graceful shutdown period for the `storage-users` service.
+
+IMPORTANT: The graceful shutdown period is only applicable if the `storage-users` service runs as standalone service. It does not apply if the `storage-users` service runs as part of the single binary or as single Docker environment. To build an environment where the `storage-users` service runs as a standalone service, you must start two instances, one _without_ the `storage-users` service and one _only with_ the the `storage-users` service. Note that both instances must be able to communicate on the same network. 
+
+When hard-stopping Infinite Scale, for example with the `kill <pid>` command (SIGKILL), it is possible and likely that not all data from the decomposedfs (metadata) has been written to the storage which may result in an inconsistent decomposedfs. When gracefully shutting down Infinite Scale, using a command like SIGTERM, the process will no longer accept any write requests from _other_ services and will try to write the internal open  requests which can take an undefined duration based on many factors. To mitigate that situation, the following things have been implemented:
+
+*   With the value of the environment variable `STORAGE_USERS_GRACEFUL_SHUTDOWN_TIMEOUT`, the `storage-users` service will delay its shutdown giving it time to finalize writing necessary data. This delay can be necessary if there is a lot of data to be saved and/or if storage access/thruput is slow. In such a case you would receive an error log entry informing you that not all data could be saved in time. To prevent such occurrences, you must increase the default value.
+
+*   If a shutdown error has been logged, the command-line maintenance tool [Inspect and Manipulate Node Metadata](https://doc.owncloud.com/ocis/next/maintenance/commands/commands.html#inspect-and-manipulate-node-metadata) can help to fix the issue. Please contact support for details. 
+
 ## CLI Commands

 ### Manage Unfinished Uploads
@@ -21,7 +21,8 @@ type Config struct {
 	TokenManager *TokenManager `yaml:"token_manager"`
 	Reva         *shared.Reva  `yaml:"reva"`

-	SkipUserGroupsInToken bool `yaml:"skip_user_groups_in_token" env:"STORAGE_USERS_SKIP_USER_GROUPS_IN_TOKEN" desc:"Disables the loading of user's group memberships from the reva access token."`
+	SkipUserGroupsInToken   bool `yaml:"skip_user_groups_in_token" env:"STORAGE_USERS_SKIP_USER_GROUPS_IN_TOKEN" desc:"Disables the loading of user's group memberships from the reva access token."`
+	GracefulShutdownTimeout int  `yaml:"graceful_shutdown_timeout" env:"STORAGE_USERS_GRACEFUL_SHUTDOWN_TIMEOUT" desc:"The number of seconds to wait for the 'storage-users' service to shutdown cleanly before exiting with an error that gets logged. Note: This setting is only applicable when running the 'storage-users' service as a standalone service. See the text description for more details."`

 	Driver            string            `yaml:"driver" env:"STORAGE_USERS_DRIVER" desc:"The storage driver which should be used by the service. Defaults to 'ocis', Supported values are: 'ocis', 's3ng' and 'owncloudsql'. The 'ocis' driver stores all data (blob and meta data) in an POSIX compliant volume. The 's3ng' driver stores metadata in a POSIX compliant volume and uploads blobs to the s3 bucket."`
 	Drivers           Drivers           `yaml:"drivers"`
@@ -41,12 +41,13 @@ func DefaultConfig() *config.Config {
 		Service: config.Service{
 			Name: "storage-users",
 		},
-		Reva:             shared.DefaultRevaConfig(),
-		DataServerURL:    "http://localhost:9158/data",
-		DataGatewayURL:   "https://localhost:9200/data",
-		TransferExpires:  86400,
-		UploadExpiration: 24 * 60 * 60,
-		Driver:           "ocis",
+		Reva:                    shared.DefaultRevaConfig(),
+		DataServerURL:           "http://localhost:9158/data",
+		DataGatewayURL:          "https://localhost:9200/data",
+		TransferExpires:         86400,
+		UploadExpiration:        24 * 60 * 60,
+		GracefulShutdownTimeout: 30,
+		Driver:                  "ocis",
 		Drivers: config.Drivers{
 			OwnCloudSQL: config.OwnCloudSQLDriver{
 				Root:                  filepath.Join(defaults.BaseDataPath(), "storage", "owncloud"),
@@ -1,3 +1,4 @@
+// Package revaconfig contains the config for the reva service
 package revaconfig

 import (
@@ -10,11 +11,12 @@ import (
 func StorageUsersConfigFromStruct(cfg *config.Config) map[string]interface{} {
 	rcfg := map[string]interface{}{
 		"core": map[string]interface{}{
-			"tracing_enabled":      cfg.Tracing.Enabled,
-			"tracing_exporter":     cfg.Tracing.Type,
-			"tracing_endpoint":     cfg.Tracing.Endpoint,
-			"tracing_collector":    cfg.Tracing.Collector,
-			"tracing_service_name": cfg.Service.Name,
+			"tracing_enabled":           cfg.Tracing.Enabled,
+			"tracing_exporter":          cfg.Tracing.Type,
+			"tracing_endpoint":          cfg.Tracing.Endpoint,
+			"tracing_collector":         cfg.Tracing.Collector,
+			"tracing_service_name":      cfg.Service.Name,
+			"graceful_shutdown_timeout": cfg.GracefulShutdownTimeout,
 		},
 		"shared": map[string]interface{}{
 			"jwt_secret":                cfg.TokenManager.JWTSecret,
@@ -36,13 +36,14 @@ import (
 // Watcher watches a process for a graceful restart
 // preserving open network sockets to avoid packet loss.
 type Watcher struct {
-	log       zerolog.Logger
-	graceful  bool
-	ppid      int
-	lns       map[string]net.Listener
-	ss        map[string]Server
-	pidFile   string
-	childPIDs []int
+	log                     zerolog.Logger
+	graceful                bool
+	ppid                    int
+	lns                     map[string]net.Listener
+	ss                      map[string]Server
+	pidFile                 string
+	childPIDs               []int
+	gracefulShutdownTimeout int
 }

 // Option represent an option.
@@ -62,6 +63,12 @@ func WithPIDFile(fn string) Option {
 	}
 }

+func WithGracefuleShutdownTimeout(seconds int) Option {
+	return func(w *Watcher) {
+		w.gracefulShutdownTimeout = seconds
+	}
+}
+
 // NewWatcher creates a Watcher.
 func NewWatcher(opts ...Option) *Watcher {
 	w := &Watcher{
@@ -279,51 +286,69 @@ func (w *Watcher) TrapSignals() {
 			}

 		case syscall.SIGQUIT:
-			w.log.Info().Msg("preparing for a graceful shutdown with deadline of 10 seconds")
-			go func() {
-				count := 10
-				ticker := time.NewTicker(time.Second)
-				for ; true; <-ticker.C {
-					w.log.Info().Msgf("shutting down in %d seconds", count-1)
-					count--
-					if count <= 0 {
-						w.log.Info().Msg("deadline reached before draining active conns, hard stopping ...")
-						for _, s := range w.ss {
-							err := s.Stop()
-							if err != nil {
-								w.log.Error().Err(err).Msg("error stopping server")
-							}
-							w.log.Info().Msgf("fd to %s:%s abruptly closed", s.Network(), s.Address())
-						}
-						w.Exit(1)
-					}
-				}
-			}()
-			for _, s := range w.ss {
-				w.log.Info().Msgf("fd to %s:%s gracefully closed ", s.Network(), s.Address())
-				err := s.GracefulStop()
-				if err != nil {
-					w.log.Error().Err(err).Msg("error stopping server")
-					w.log.Info().Msg("exit with error code 1")
-					w.Exit(1)
-				}
-			}
-			w.log.Info().Msg("exit with error code 0")
-			w.Exit(0)
+			gracefulShutdown(w)
 		case syscall.SIGINT, syscall.SIGTERM:
-			w.log.Info().Msg("preparing for hard shutdown, aborting all conns")
-			for _, s := range w.ss {
-				w.log.Info().Msgf("fd to %s:%s abruptly closed", s.Network(), s.Address())
-				err := s.Stop()
-				if err != nil {
-					w.log.Error().Err(err).Msg("error stopping server")
-				}
+			if w.gracefulShutdownTimeout == 0 {
+				hardShutdown(w)
 			}
-			w.Exit(0)
+			gracefulShutdown(w)
 		}
 	}
 }

+// TODO: Ideally this would call exit() but properly return an error. The
+// exit() is problematic (i.e. racey) especiaily when orchestrating multiple
+// reva services from some external runtime (like in the "ocis server" case
+func gracefulShutdown(w *Watcher) {
+	w.log.Info().Int("Timeout", w.gracefulShutdownTimeout).Msg("preparing for a graceful shutdown with deadline")
+	go func() {
+		count := w.gracefulShutdownTimeout
+		ticker := time.NewTicker(time.Second)
+		for ; true; <-ticker.C {
+			w.log.Info().Msgf("shutting down in %d seconds", count-1)
+			count--
+			if count <= 0 {
+				w.log.Info().Msg("deadline reached before draining active conns, hard stopping ...")
+				for _, s := range w.ss {
+					err := s.Stop()
+					if err != nil {
+						w.log.Error().Err(err).Msg("error stopping server")
+					}
+					w.log.Info().Msgf("fd to %s:%s abruptly closed", s.Network(), s.Address())
+				}
+				w.Exit(1)
+			}
+		}
+	}()
+	for _, s := range w.ss {
+		w.log.Info().Msgf("fd to %s:%s gracefully closed ", s.Network(), s.Address())
+		err := s.GracefulStop()
+		if err != nil {
+			w.log.Error().Err(err).Msg("error stopping server")
+			w.log.Info().Msg("exit with error code 1")
+
+			w.Exit(1)
+		}
+	}
+	w.log.Info().Msg("exit with error code 0")
+	w.Exit(0)
+}
+
+// TODO: Ideally this would call exit() but properly return an error. The
+// exit() is problematic (i.e. racey) especiaily when orchestrating multiple
+// reva services from some external runtime (like in the "ocis server" case
+func hardShutdown(w *Watcher) {
+	w.log.Info().Msg("preparing for hard shutdown, aborting all conns")
+	for _, s := range w.ss {
+		w.log.Info().Msgf("fd to %s:%s abruptly closed", s.Network(), s.Address())
+		err := s.Stop()
+		if err != nil {
+			w.log.Error().Err(err).Msg("error stopping server")
+		}
+	}
+	w.Exit(0)
+}
+
 func getListenerFile(ln net.Listener) (*os.File, error) {
 	switch t := ln.(type) {
 	case *net.TCPListener:
@@ -72,6 +72,8 @@ type coreConf struct {

 	// TracingService specifies the service. i.e OpenCensus, OpenTelemetry, OpenTracing...
 	TracingService string `mapstructure:"tracing_service"`
+
+	GracefulShutdownTimeout int `mapstructure:"graceful_shutdown_timeout"`
 }

 func run(
@@ -92,7 +94,7 @@ func run(
 	initCPUCount(coreConf, logger)

 	servers := initServers(mainConf, logger, tp)
-	watcher, err := initWatcher(logger, filename)
+	watcher, err := initWatcher(logger, filename, coreConf.GracefulShutdownTimeout)
 	if err != nil {
 		log.Panic(err)
 	}
@@ -110,8 +112,8 @@ func initListeners(watcher *grace.Watcher, servers map[string]grace.Server, log
 	return listeners
 }

-func initWatcher(log *zerolog.Logger, filename string) (*grace.Watcher, error) {
-	watcher, err := handlePIDFlag(log, filename)
+func initWatcher(log *zerolog.Logger, filename string, gracefulShutdownTimeout int) (*grace.Watcher, error) {
+	watcher, err := handlePIDFlag(log, filename, gracefulShutdownTimeout)
 	// TODO(labkode): maybe pidfile can be created later on? like once a server is going to be created?
 	if err != nil {
 		log.Error().Err(err).Msg("error creating grace watcher")
@@ -187,11 +189,11 @@ func initLogger(conf *logConf) *zerolog.Logger {
 	return log
 }

-func handlePIDFlag(l *zerolog.Logger, pidFile string) (*grace.Watcher, error) {
-	var opts []grace.Option
-	opts = append(opts, grace.WithPIDFile(pidFile))
-	opts = append(opts, grace.WithLogger(l.With().Str("pkg", "grace").Logger()))
-	w := grace.NewWatcher(opts...)
+func handlePIDFlag(l *zerolog.Logger, pidFile string, gracefulShutdownTimeout int) (*grace.Watcher, error) {
+	w := grace.NewWatcher(grace.WithPIDFile(pidFile),
+		grace.WithLogger(l.With().Str("pkg", "grace").Logger()),
+		grace.WithGracefuleShutdownTimeout(gracefulShutdownTimeout),
+	)
 	err := w.WritePID()
 	if err != nil {
 		return nil, err
@@ -49,6 +49,16 @@ func (md Attributes) SetInt64(key string, val int64) {
 	md[key] = []byte(strconv.FormatInt(val, 10))
 }

+// UInt64 reads an uint64 value
+func (md Attributes) UInt64(key string) (uint64, error) {
+	return strconv.ParseUint(string(md[key]), 10, 64)
+}
+
+// SetInt64 sets an uint64 value
+func (md Attributes) SetUInt64(key string, val uint64) {
+	md[key] = []byte(strconv.FormatUint(val, 10))
+}
+
 // SetXattrs sets multiple extended attributes on the write-through cache/node
 func (n *Node) SetXattrsWithContext(ctx context.Context, attribs map[string][]byte, acquireLock bool) (err error) {
 	if n.xattrsCache != nil {
@@ -122,6 +122,9 @@ func (fs *Decomposedfs) CreateStorageSpace(ctx context.Context, req *provider.Cr
 	metadata.SetString(prefixes.NameAttr, req.Name)
 	metadata.SetString(prefixes.SpaceNameAttr, req.Name)

+	// This space is empty so set initial treesize to 0
+	metadata.SetUInt64(prefixes.TreesizeAttr, 0)
+
 	if req.Type != "" {
 		metadata.SetString(prefixes.SpaceTypeAttr, req.Type)
 	}
@@ -352,7 +352,7 @@ github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1
 github.com/cs3org/go-cs3apis/cs3/storage/registry/v1beta1
 github.com/cs3org/go-cs3apis/cs3/tx/v1beta1
 github.com/cs3org/go-cs3apis/cs3/types/v1beta1
-# github.com/cs3org/reva/v2 v2.15.0
+# github.com/cs3org/reva/v2 v2.15.1-0.20230718140539-0af2a07c7fd9
 ## explicit; go 1.20
 github.com/cs3org/reva/v2/cmd/revad/internal/grace
 github.com/cs3org/reva/v2/cmd/revad/runtime