From 11c11b06dc686bb0e8f0cd7de9aa53dbcc6498b0 Mon Sep 17 00:00:00 2001 From: "A.Unger" Date: Fri, 23 Apr 2021 14:13:58 +0200 Subject: [PATCH 1/3] stop supervisor if a service fails to start --- ocis/pkg/runtime/service/service.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/ocis/pkg/runtime/service/service.go b/ocis/pkg/runtime/service/service.go index cfab669058..464df996cf 100644 --- a/ocis/pkg/runtime/service/service.go +++ b/ocis/pkg/runtime/service/service.go @@ -123,6 +123,10 @@ func Start(o ...Option) error { return err } + // halt listens for interrupt signals and blocks. + halt := make(chan os.Signal, 1) + signal.Notify(halt, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT, syscall.SIGHUP) + // notify goroutines that they are running on supervised mode s.cfg.Mode = ociscfg.SUPERVISED @@ -131,6 +135,9 @@ func Start(o ...Option) error { // Start creates its own supervisor. Running services under `ocis server` will create its own supervision tree. s.Supervisor = suture.New("ocis", suture.Spec{ EventHook: func(e suture.Event) { + if e.Type() == suture.EventTypeServiceTerminate { + halt <- os.Interrupt + } s.Log.Info().Str("event", e.String()).Msg(fmt.Sprintf("supervisor: %v", e.Map()["supervisor_name"])) }, }) @@ -148,10 +155,6 @@ func Start(o ...Option) error { } rpc.HandleHTTP() - // halt listens for interrupt signals and blocks. - halt := make(chan os.Signal, 1) - signal.Notify(halt, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT, syscall.SIGHUP) - l, err := net.Listen("tcp", net.JoinHostPort(s.cfg.Runtime.Host, s.cfg.Runtime.Port)) if err != nil { s.Log.Fatal().Err(err) From 8da06d32b95a717a34cf5fddbbec25cde49ab40d Mon Sep 17 00:00:00 2001 From: "A.Unger" Date: Fri, 23 Apr 2021 14:15:30 +0200 Subject: [PATCH 2/3] add changelog --- changelog/unreleased/stop-supervisor-if-failure.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 changelog/unreleased/stop-supervisor-if-failure.md diff --git a/changelog/unreleased/stop-supervisor-if-failure.md b/changelog/unreleased/stop-supervisor-if-failure.md new file mode 100644 index 0000000000..8bccf7a53d --- /dev/null +++ b/changelog/unreleased/stop-supervisor-if-failure.md @@ -0,0 +1,7 @@ +Bugfix: Stop the supervisor if a service fails to start + +Steps to make the supervisor fail: + +`PROXY_HTTP_ADDR=0.0.0.0:9144 bin/ocis server` + +https://github.com/owncloud/ocis/pull/1963 From 420447009bac7e6e491d6f5d40c2deb3550d11fa Mon Sep 17 00:00:00 2001 From: "A.Unger" Date: Fri, 23 Apr 2021 15:19:03 +0200 Subject: [PATCH 3/3] tweak backoff tolerance --- ocis/pkg/runtime/service/service.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/ocis/pkg/runtime/service/service.go b/ocis/pkg/runtime/service/service.go index 464df996cf..268d7473fd 100644 --- a/ocis/pkg/runtime/service/service.go +++ b/ocis/pkg/runtime/service/service.go @@ -132,14 +132,23 @@ func Start(o ...Option) error { setMicroLogger() + // tolerance controls backoff cycles from the supervisor. + tolerance := 5 + totalBackoff := 0 + // Start creates its own supervisor. Running services under `ocis server` will create its own supervision tree. s.Supervisor = suture.New("ocis", suture.Spec{ EventHook: func(e suture.Event) { - if e.Type() == suture.EventTypeServiceTerminate { - halt <- os.Interrupt + if e.Type() == suture.EventTypeBackoff { + totalBackoff++ + if totalBackoff == tolerance { + halt <- os.Interrupt + } } s.Log.Info().Str("event", e.String()).Msg(fmt.Sprintf("supervisor: %v", e.Map()["supervisor_name"])) }, + FailureThreshold: 5, + FailureBackoff: 3 * time.Second, }) // reva storages have their own logging. For consistency sake the top level logging will cascade to reva.