Files
munki/code/client/supervisor
Greg Neagle b9f9fffccc Revert "munki: rename "/usr/local/munki/python" symlink to "munki-python" (#997)"
This change is still a good future goal, but is causing problems that are too difficult to work around right now and is delaying the vital release of Munki 5.1 for Big Sur compatibility.

This reverts commit 3bb91cabca.
2020-09-15 09:04:47 -07:00

378 lines
12 KiB
Python
Executable File

#!/usr/local/munki/python
# encoding: utf-8
#
# Copyright 2011-2013 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tool to supervise launch other binaries."""
from __future__ import absolute_import, print_function
import errno
import getopt
import logging
import logging.handlers
import os
import random
import signal
import subprocess
import sys
import tempfile
import time
class Error(Exception):
"""Base error."""
class ExecuteError(Error):
"""Error executing."""
class OptionError(Error):
"""Option error."""
class TimeoutError(Error):
"""Timeout while execute() running."""
DEFAULT_ERROR_EXEC_EXIT_CODES = [1]
EXIT_STATUS_TIMEOUT = -99
KILL_WAIT_SECS = 1
class Supervisor(object):
"""Class to handle our supervisiory functions"""
def __init__(self, delayrandom_abort=False):
"""Init.
Args:
delayrandom_abort: bool, default False. If True, sending
a SIGUSR1 to the process will stop any initial delayrandom
from continuing to countdown, and will immediately end the
delay. Note that setting this on multiple Supervisor instances
in one process might not work too well depending on the
timing of the execute() calls, see below.
"""
self.options = {
'error-exec': None,
'error-exec-exit-codes': None,
'timeout': None,
'delayrandom': None,
'stdout': None,
'stderr': None,
'debug': None,
}
self.continue_sleeping = True
self.error_exec_codes = []
self.exit_status = None
self.delayrandom_abort = delayrandom_abort
self.stderr = None
self.stdout = None
def set_options(self, **kwargs):
"""Set keyword options"""
for k in kwargs:
self.options[k] = kwargs[k]
def signal_handler(self, signum, _frame):
"""Handle USR1 signal"""
if signum == signal.SIGUSR1:
self.continue_sleeping = False
def execute(self, args):
"""Exec.
Args:
args: list, arguments to execute, args[0] is binary name
"""
logging.debug('execute(%s)', str(args))
if self.delayrandom_abort:
# A second Supervisor process will not take over the previous
# Supervisor process who is holding this signal now.
if signal.getsignal(signal.SIGUSR1) == signal.SIG_DFL:
signal.signal(signal.SIGUSR1, self.signal_handler)
self.continue_sleeping = True
if 'delayrandom' in self.options and self.options['delayrandom']:
max_secs = self.options['delayrandom']
random_secs = random.randrange(0, max_secs)
logging.debug('Applying random delay up to %s seconds: %s',
max_secs, random_secs)
time.sleep(random_secs)
if self.delayrandom_abort:
if not self.continue_sleeping:
logging.debug('Awoken from random delay by signal')
signal.signal(signal.SIGUSR1, signal.SIG_DFL)
if self.options['error-exec']:
self.stdout = tempfile.NamedTemporaryFile()
stdout_pipe = self.stdout
self.stderr = tempfile.NamedTemporaryFile()
stderr_pipe = self.stderr
# Parse error-exec-exit-codes, or set default if not provided.
exit_codes = self.options['error-exec-exit-codes']
if exit_codes:
self.error_exec_codes = [int(i) for i in exit_codes.split(',')]
else:
self.error_exec_codes = DEFAULT_ERROR_EXEC_EXIT_CODES
else:
stdout_pipe = None
stderr_pipe = None
try:
proc = subprocess.Popen(
args,
preexec_fn=lambda: os.setpgid(os.getpid(), os.getpid()),
stdout=stdout_pipe,
stderr=stderr_pipe,
)
except OSError as err:
self.exit_status = 127
raise ExecuteError(str(err))
self.exit_status = None
self.continue_sleeping = True
start_time = time.time()
try:
while 1:
slept = 0
exit_status = proc.poll()
if exit_status is not None:
self.exit_status = exit_status
break
if 'timeout' in self.options and self.options['timeout']:
if (time.time() - start_time) > self.options['timeout']:
raise TimeoutError
# this loop is constructed this way, rather than using alarm or
# something, to facilitate future features, e.g. pipe
# stderr/stdout to syslog.
if slept < 1:
time.sleep(1)
slept += 1
except TimeoutError:
logging.critical('Timeout error executing %s', ' '.join(args))
self.kill_pid(proc.pid)
self.exit_status = EXIT_STATUS_TIMEOUT
raise
def kill_pid(self, pid):
"""Kill a pid, aggressively if necessary."""
# pylint: disable=no-self-use
exited = {}
class __ChildExit(Exception):
"""Child exited."""
def __sigchld_handler(signum, _frame):
if signum == signal.SIGCHLD:
os.waitpid(pid, os.WNOHANG)
exited[pid] = True
try:
signal.signal(signal.SIGCHLD, __sigchld_handler)
logging.warning('Sending SIGTERM to %d', pid)
os.kill(-1 * pid, signal.SIGTERM) # *-1 = entire process group
time.sleep(KILL_WAIT_SECS)
if pid in exited:
return
logging.warning('Sending SIGKILL to %d', pid)
os.kill(-1 * pid, signal.SIGKILL)
time.sleep(KILL_WAIT_SECS)
except OSError as err:
if err.args[0] == errno.ESRCH:
logging.warning('pid %d died on its own')
else:
logging.critical('killPid: %s', str(err))
if pid in exited:
return
logging.debug('pid %d will not die', pid)
def get_exit_status(self):
"""Return supervised process exit status"""
return self.exit_status
def cleanup(self):
"""Handle errors and call error-exec specified bin."""
if not self.options['error-exec']:
return
if self.exit_status in self.error_exec_codes:
did_timeout = int(bool(self.exit_status is EXIT_STATUS_TIMEOUT))
arg_str = self.options['error-exec']
arg_str = arg_str.replace('{EXIT}', str(self.exit_status))
arg_str = arg_str.replace('{TIMEOUT}', str(did_timeout))
arg_str = arg_str.replace('{STDOUT}', self.stdout.name)
arg_str = arg_str.replace('{STDERR}', self.stderr.name)
args = ('/bin/sh', '-c', arg_str)
error_supv = Supervisor()
error_supv.set_options(timeout=5 * 3600)
error_supv.execute(args)
self.stdout.close()
self.stdout = None
self.stderr.close()
self.stderr = None
def parse_opts(argv):
"""Parse argv and return options and arguments.
Args:
argv: list, all argv parameters
Returns:
(dict of options, list extra args besides options)
"""
try:
argopts, args = getopt.gnu_getopt(
argv, '',
[
'timeout=', 'delayrandom=', 'debug', 'help',
'error-exec=', 'error-exec-exit-codes=',
])
except getopt.GetoptError as err:
raise OptionError(str(err))
options = {}
for key, val in argopts:
if key in ['--timeout', '--delayrandom']:
options[key[2:]] = int(val)
else:
options[key[2:]] = val
return options, args
def usage():
"""Print usage."""
print("""supervisor [options] [--] [path to executable] [arguments]
options:
--timeout n
after n seconds, terminate the executable
--delayrandom n
delay the execution of executable by random seconds up to n
--error-exec "path and options string"
exec path when executable returns non zero exit status.
in this mode the stdout and stderr from the supervised
executable are recorded to temp files.
the path and options string can include tokens which will be
replaced with values. note the braces {} should be included.
{EXIT} = exit status
{TIMEOUT} = 1 or 0, timeout did or did not occur
{STDOUT} = path to stdout file
{STDERR} = path to stderr file
the error-exec bin may use the stdin, stderr files while it is
executing, but it should assume they will disappear when
the error-exec bin returns with any exit status.
the bin should not run more than 5 minutes or it will be
terminated.
--error-exec-exit-codes "1,100,203"
comma-delimited list of integer exit status codes. If the
supervised script exits with one of these codes, the error-exec
executable will be run. Default: "1"
--debug
enable debugging output, all logs to stderr and not syslog.
--help
this text
--
use the -- to separate supervisor options from arguments to the
executable which will appear as options.
""")
def process_opts(options, args):
"""Process options for validity etc.
Args:
options: dict, options
args: list, extra args
Returns:
True if supervisor startup should occur, False if not.
Raises:
OptionError: if there is an error in options
"""
if not args or options.get('help', None) is not None:
usage()
return False
if options.get('debug', None) is not None:
logging.getLogger().setLevel(logging.DEBUG)
return True
def setup_syslog():
"""Setup syslog as a logger."""
logger = logging.getLogger()
syslog = logging.handlers.SysLogHandler('/var/run/syslog')
formatter = logging.Formatter(
'%(filename)s[%(process)d]: %(levelname)s %(message)s')
syslog.setFormatter(formatter)
syslog.setLevel(logging.DEBUG)
logger.addHandler(syslog)
def main(argv):
"""Main function!"""
try:
options, args = parse_opts(argv[1:])
if not process_opts(options, args):
return 0
except OptionError as err:
logging.error(str(err))
return 1
if options.get('debug', None) is None:
setup_syslog()
try:
supervisor = Supervisor(delayrandom_abort=True)
supervisor.set_options(**options)
except Error as err:
logging.exception('%s %s', err.__class__.__name__, str(err))
return 1
exit_code = 0
try:
supervisor.execute(args)
exit_code = supervisor.get_exit_status()
except TimeoutError:
exit_code = 1
except Error as err:
logging.exception('%s %s', err.__class__.__name__, str(err))
exit_code = 1
supervisor.cleanup()
return exit_code
if __name__ == '__main__':
sys.exit(main(sys.argv))