Implement a simplified xargs in python

This commit is contained in:
Anthony Sottile
2016-03-21 19:30:47 -07:00
parent a5b56bd9e3
commit b7d395410b
13 changed files with 130 additions and 62 deletions

View File

@@ -86,7 +86,7 @@ def _run_single_hook(hook, repo, args, write, skips=frozenset()):
sys.stdout.flush()
diff_before = cmd_output('git', 'diff', retcode=None, encoding=None)
retcode, stdout, stderr = repo.run_hook(hook, filenames)
retcode, stdout, stderr = repo.run_hook(hook, tuple(filenames))
diff_after = cmd_output('git', 'diff', retcode=None, encoding=None)
file_modifications = diff_before != diff_after

View File

@@ -12,18 +12,3 @@ def environment_dir(ENVIRONMENT_DIR, language_version):
return None
else:
return '{0}-{1}'.format(ENVIRONMENT_DIR, language_version)
def file_args_to_stdin(file_args):
return '\0'.join(list(file_args) + [''])
def run_hook(cmd_args, file_args):
return cmd_output(
# Use -s 4000 (slightly less than posix mandated minimum)
# This is to prevent "xargs: ... Bad file number" on windows
'xargs', '-0', '-s4000', *cmd_args,
stdin=file_args_to_stdin(file_args),
retcode=None,
encoding=None
)

View File

@@ -8,6 +8,7 @@ from pre_commit.envcontext import envcontext
from pre_commit.envcontext import Var
from pre_commit.languages import helpers
from pre_commit.util import clean_path_on_failure
from pre_commit.xargs import xargs
ENVIRONMENT_DIR = 'node_env'
@@ -63,6 +64,4 @@ def install_environment(
def run_hook(repo_cmd_runner, hook, file_args):
with in_env(repo_cmd_runner, hook['language_version']):
return helpers.run_hook(
(hook['entry'],) + tuple(hook['args']), file_args,
)
return xargs((hook['entry'],) + tuple(hook['args']), file_args)

View File

@@ -2,8 +2,8 @@ from __future__ import unicode_literals
from sys import platform
from pre_commit.languages import helpers
from pre_commit.util import shell_escape
from pre_commit.xargs import xargs
ENVIRONMENT_DIR = None
@@ -24,7 +24,7 @@ def run_hook(repo_cmd_runner, hook, file_args):
)
# For PCRE the entry is the regular expression to match
return helpers.run_hook(
return xargs(
(
'sh', '-c',
# Grep usually returns 0 for matches, and nonzero for non-matches

View File

@@ -10,6 +10,7 @@ from pre_commit.envcontext import UNSET
from pre_commit.envcontext import Var
from pre_commit.languages import helpers
from pre_commit.util import clean_path_on_failure
from pre_commit.xargs import xargs
ENVIRONMENT_DIR = 'py_env'
@@ -80,6 +81,4 @@ def install_environment(
def run_hook(repo_cmd_runner, hook, file_args):
with in_env(repo_cmd_runner, hook['language_version']):
return helpers.run_hook(
(hook['entry'],) + tuple(hook['args']), file_args,
)
return xargs((hook['entry'],) + tuple(hook['args']), file_args)

View File

@@ -12,6 +12,7 @@ from pre_commit.util import CalledProcessError
from pre_commit.util import clean_path_on_failure
from pre_commit.util import resource_filename
from pre_commit.util import tarfile_open
from pre_commit.xargs import xargs
ENVIRONMENT_DIR = 'rbenv'
@@ -125,6 +126,4 @@ def install_environment(
def run_hook(repo_cmd_runner, hook, file_args):
with in_env(repo_cmd_runner, hook['language_version']):
return helpers.run_hook(
(hook['entry'],) + tuple(hook['args']), file_args,
)
return xargs((hook['entry'],) + tuple(hook['args']), file_args)

View File

@@ -1,6 +1,6 @@
from __future__ import unicode_literals
from pre_commit.languages import helpers
from pre_commit.xargs import xargs
ENVIRONMENT_DIR = None
@@ -16,7 +16,7 @@ def install_environment(
def run_hook(repo_cmd_runner, hook, file_args):
return helpers.run_hook(
return xargs(
(repo_cmd_runner.prefix_dir + hook['entry'],) + tuple(hook['args']),
file_args,
)

View File

@@ -2,7 +2,7 @@ from __future__ import unicode_literals
import shlex
from pre_commit.languages import helpers
from pre_commit.xargs import xargs
ENVIRONMENT_DIR = None
@@ -18,6 +18,6 @@ def install_environment(
def run_hook(repo_cmd_runner, hook, file_args):
return helpers.run_hook(
return xargs(
tuple(shlex.split(hook['entry'])) + tuple(hook['args']), file_args,
)

View File

@@ -160,7 +160,6 @@ class CalledProcessError(RuntimeError):
def cmd_output(*cmd, **kwargs):
retcode = kwargs.pop('retcode', 0)
stdin = kwargs.pop('stdin', None)
encoding = kwargs.pop('encoding', 'UTF-8')
__popen = kwargs.pop('__popen', subprocess.Popen)
@@ -170,9 +169,6 @@ def cmd_output(*cmd, **kwargs):
'stderr': subprocess.PIPE,
}
if stdin is not None:
stdin = stdin.encode('UTF-8')
# py2/py3 on windows are more strict about the types here
cmd = [five.n(arg) for arg in cmd]
kwargs['env'] = dict(
@@ -182,7 +178,7 @@ def cmd_output(*cmd, **kwargs):
popen_kwargs.update(kwargs)
proc = __popen(cmd, **popen_kwargs)
stdout, stderr = proc.communicate(stdin)
stdout, stderr = proc.communicate()
if encoding is not None and stdout is not None:
stdout = stdout.decode(encoding)
if encoding is not None and stderr is not None:

59
pre_commit/xargs.py Normal file
View File

@@ -0,0 +1,59 @@
from __future__ import absolute_import
from __future__ import unicode_literals
from pre_commit.util import cmd_output
# Limit used previously to avoid "xargs ... Bad file number" on windows
# This is slightly less than the posix mandated minimum
MAX_LENGTH = 4000
class ArgumentTooLongError(RuntimeError):
pass
def partition(cmd, varargs, _max_length=MAX_LENGTH):
cmd = tuple(cmd)
ret = []
ret_cmd = []
total_len = len(' '.join(cmd))
# Reversed so arguments are in order
varargs = list(reversed(varargs))
while varargs:
arg = varargs.pop()
if total_len + 1 + len(arg) <= _max_length:
ret_cmd.append(arg)
total_len += len(arg)
elif not ret_cmd:
raise ArgumentTooLongError(arg)
else:
# We've exceeded the length, yield a command
ret.append(cmd + tuple(ret_cmd))
ret_cmd = []
total_len = len(' '.join(cmd))
varargs.append(arg)
ret.append(cmd + tuple(ret_cmd))
return tuple(ret)
def xargs(cmd, varargs):
"""A simplified implementation of xargs."""
retcode = 0
stdout = b''
stderr = b''
for run_cmd in partition(cmd, varargs):
proc_retcode, proc_out, proc_err = cmd_output(
*run_cmd, encoding=None, retcode=None
)
retcode |= proc_retcode
stdout += proc_out
stderr += proc_err
return retcode, stdout, stderr

View File

@@ -1,16 +0,0 @@
from __future__ import absolute_import
from __future__ import unicode_literals
from pre_commit.languages.helpers import file_args_to_stdin
def test_file_args_to_stdin_empty():
assert file_args_to_stdin([]) == ''
def test_file_args_to_stdin_some():
assert file_args_to_stdin(['foo', 'bar']) == 'foo\0bar\0'
def test_file_args_to_stdin_tuple():
assert file_args_to_stdin(('foo', 'bar')) == 'foo\0bar\0'

View File

@@ -234,13 +234,13 @@ def test_pcre_hook_matching(tempdir_factory, store):
_test_hook_repo(
tempdir_factory, store, 'pcre_hooks_repo',
'regex-with-quotes', ['herp', 'derp'], b"herp:2:herpfoo'bard\n",
expected_return_code=123,
expected_return_code=1,
)
_test_hook_repo(
tempdir_factory, store, 'pcre_hooks_repo',
'other-regex', ['herp', 'derp'], b'derp:1:[INFO] information yo\n',
expected_return_code=123,
expected_return_code=1,
)
@@ -255,7 +255,7 @@ def test_pcre_hook_case_insensitive_option(tempdir_factory, store):
_test_hook_repo(
tempdir_factory, store, 'pcre_hooks_repo',
'regex-with-grep-args', ['herp'], b'herp:1:FoOoOoObar\n',
expected_return_code=123,
expected_return_code=1,
)
@@ -264,7 +264,7 @@ def test_pcre_hook_case_insensitive_option(tempdir_factory, store):
def test_pcre_many_files(tempdir_factory, store):
# This is intended to simulate lots of passing files and one failing file
# to make sure it still fails. This is not the case when naively using
# a system hook with `grep -H -n '...'` and expected_return_code=123.
# a system hook with `grep -H -n '...'` and expected_return_code=1.
path = git_dir(tempdir_factory)
with cwd(path):
with io.open('herp', 'w') as herp:
@@ -275,7 +275,7 @@ def test_pcre_many_files(tempdir_factory, store):
'other-regex',
['/dev/null'] * 15000 + ['herp'],
b'herp:1:[INFO] info\n',
expected_return_code=123,
expected_return_code=1,
)
@@ -355,7 +355,7 @@ def test_additional_python_dependencies_installed(tempdir_factory, store):
config = make_config_from_repo(path)
config['hooks'][0]['additional_dependencies'] = ['mccabe']
repo = Repository.create(config, store)
repo.run_hook(repo.hooks[0][1], [])
repo.require_installed()
with python.in_env(repo.cmd_runner, 'default'):
output = cmd_output('pip', 'freeze', '-l')[1]
assert 'mccabe' in output
@@ -367,11 +367,11 @@ def test_additional_dependencies_roll_forward(tempdir_factory, store):
config = make_config_from_repo(path)
# Run the repo once without additional_dependencies
repo = Repository.create(config, store)
repo.run_hook(repo.hooks[0][1], [])
repo.require_installed()
# Now run it with additional_dependencies
config['hooks'][0]['additional_dependencies'] = ['mccabe']
repo = Repository.create(config, store)
repo.run_hook(repo.hooks[0][1], [])
repo.require_installed()
# We should see our additional dependency installed
with python.in_env(repo.cmd_runner, 'default'):
output = cmd_output('pip', 'freeze', '-l')[1]
@@ -388,7 +388,7 @@ def test_additional_ruby_dependencies_installed(
config = make_config_from_repo(path)
config['hooks'][0]['additional_dependencies'] = ['thread_safe']
repo = Repository.create(config, store)
repo.run_hook(repo.hooks[0][1], [])
repo.require_installed()
with ruby.in_env(repo.cmd_runner, 'default'):
output = cmd_output('gem', 'list', '--local')[1]
assert 'thread_safe' in output
@@ -405,7 +405,7 @@ def test_additional_node_dependencies_installed(
# Careful to choose a small package that's not depped by npm
config['hooks'][0]['additional_dependencies'] = ['lodash']
repo = Repository.create(config, store)
repo.run_hook(repo.hooks[0][1], [])
repo.require_installed()
with node.in_env(repo.cmd_runner, 'default'):
cmd_output('npm', 'config', 'set', 'global', 'true')
output = cmd_output('npm', 'ls')[1]

47
tests/xargs_test.py Normal file
View File

@@ -0,0 +1,47 @@
from __future__ import absolute_import
from __future__ import unicode_literals
import pytest
from pre_commit import xargs
def test_partition_trivial():
assert xargs.partition(('cmd',), ()) == (('cmd',),)
def test_partition_simple():
assert xargs.partition(('cmd',), ('foo',)) == (('cmd', 'foo'),)
def test_partition_limits():
ret = xargs.partition(
('ninechars',), (
# Just match the end (with spaces)
'.' * 5, '.' * 4,
# Just match the end (single arg)
'.' * 10,
# Goes over the end
'.' * 5,
'.' * 6,
),
_max_length=20,
)
assert ret == (
('ninechars', '.' * 5, '.' * 4),
('ninechars', '.' * 10),
('ninechars', '.' * 5),
('ninechars', '.' * 6),
)
def test_argument_too_long():
with pytest.raises(xargs.ArgumentTooLongError):
xargs.partition(('a' * 5,), ('a' * 5,), _max_length=10)
def test_xargs_smoke():
ret, out, err = xargs.xargs(('echo',), ('hello', 'world'))
assert ret == 0
assert out == b'hello world\n'
assert err == b''