diff --git a/pre_commit/commands/run.py b/pre_commit/commands/run.py index f45e7089..9c219008 100644 --- a/pre_commit/commands/run.py +++ b/pre_commit/commands/run.py @@ -86,7 +86,7 @@ def _run_single_hook(hook, repo, args, write, skips=frozenset()): sys.stdout.flush() diff_before = cmd_output('git', 'diff', retcode=None, encoding=None) - retcode, stdout, stderr = repo.run_hook(hook, filenames) + retcode, stdout, stderr = repo.run_hook(hook, tuple(filenames)) diff_after = cmd_output('git', 'diff', retcode=None, encoding=None) file_modifications = diff_before != diff_after diff --git a/pre_commit/languages/helpers.py b/pre_commit/languages/helpers.py index 5887d3e2..322c55f1 100644 --- a/pre_commit/languages/helpers.py +++ b/pre_commit/languages/helpers.py @@ -12,18 +12,3 @@ def environment_dir(ENVIRONMENT_DIR, language_version): return None else: return '{0}-{1}'.format(ENVIRONMENT_DIR, language_version) - - -def file_args_to_stdin(file_args): - return '\0'.join(list(file_args) + ['']) - - -def run_hook(cmd_args, file_args): - return cmd_output( - # Use -s 4000 (slightly less than posix mandated minimum) - # This is to prevent "xargs: ... Bad file number" on windows - 'xargs', '-0', '-s4000', *cmd_args, - stdin=file_args_to_stdin(file_args), - retcode=None, - encoding=None - ) diff --git a/pre_commit/languages/node.py b/pre_commit/languages/node.py index 1c1108ac..2a23fe10 100644 --- a/pre_commit/languages/node.py +++ b/pre_commit/languages/node.py @@ -8,6 +8,7 @@ from pre_commit.envcontext import envcontext from pre_commit.envcontext import Var from pre_commit.languages import helpers from pre_commit.util import clean_path_on_failure +from pre_commit.xargs import xargs ENVIRONMENT_DIR = 'node_env' @@ -63,6 +64,4 @@ def install_environment( def run_hook(repo_cmd_runner, hook, file_args): with in_env(repo_cmd_runner, hook['language_version']): - return helpers.run_hook( - (hook['entry'],) + tuple(hook['args']), file_args, - ) + return xargs((hook['entry'],) + tuple(hook['args']), file_args) diff --git a/pre_commit/languages/pcre.py b/pre_commit/languages/pcre.py index d8b7fd3e..0b2cfd18 100644 --- a/pre_commit/languages/pcre.py +++ b/pre_commit/languages/pcre.py @@ -2,8 +2,8 @@ from __future__ import unicode_literals from sys import platform -from pre_commit.languages import helpers from pre_commit.util import shell_escape +from pre_commit.xargs import xargs ENVIRONMENT_DIR = None @@ -24,7 +24,7 @@ def run_hook(repo_cmd_runner, hook, file_args): ) # For PCRE the entry is the regular expression to match - return helpers.run_hook( + return xargs( ( 'sh', '-c', # Grep usually returns 0 for matches, and nonzero for non-matches diff --git a/pre_commit/languages/python.py b/pre_commit/languages/python.py index 8bfc83b3..5c8a60bf 100644 --- a/pre_commit/languages/python.py +++ b/pre_commit/languages/python.py @@ -10,6 +10,7 @@ from pre_commit.envcontext import UNSET from pre_commit.envcontext import Var from pre_commit.languages import helpers from pre_commit.util import clean_path_on_failure +from pre_commit.xargs import xargs ENVIRONMENT_DIR = 'py_env' @@ -80,6 +81,4 @@ def install_environment( def run_hook(repo_cmd_runner, hook, file_args): with in_env(repo_cmd_runner, hook['language_version']): - return helpers.run_hook( - (hook['entry'],) + tuple(hook['args']), file_args, - ) + return xargs((hook['entry'],) + tuple(hook['args']), file_args) diff --git a/pre_commit/languages/ruby.py b/pre_commit/languages/ruby.py index ed494c24..dc320b3f 100644 --- a/pre_commit/languages/ruby.py +++ b/pre_commit/languages/ruby.py @@ -12,6 +12,7 @@ from pre_commit.util import CalledProcessError from pre_commit.util import clean_path_on_failure from pre_commit.util import resource_filename from pre_commit.util import tarfile_open +from pre_commit.xargs import xargs ENVIRONMENT_DIR = 'rbenv' @@ -125,6 +126,4 @@ def install_environment( def run_hook(repo_cmd_runner, hook, file_args): with in_env(repo_cmd_runner, hook['language_version']): - return helpers.run_hook( - (hook['entry'],) + tuple(hook['args']), file_args, - ) + return xargs((hook['entry'],) + tuple(hook['args']), file_args) diff --git a/pre_commit/languages/script.py b/pre_commit/languages/script.py index 7c413c36..5c652846 100644 --- a/pre_commit/languages/script.py +++ b/pre_commit/languages/script.py @@ -1,6 +1,6 @@ from __future__ import unicode_literals -from pre_commit.languages import helpers +from pre_commit.xargs import xargs ENVIRONMENT_DIR = None @@ -16,7 +16,7 @@ def install_environment( def run_hook(repo_cmd_runner, hook, file_args): - return helpers.run_hook( + return xargs( (repo_cmd_runner.prefix_dir + hook['entry'],) + tuple(hook['args']), file_args, ) diff --git a/pre_commit/languages/system.py b/pre_commit/languages/system.py index 340f4feb..8be45855 100644 --- a/pre_commit/languages/system.py +++ b/pre_commit/languages/system.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals import shlex -from pre_commit.languages import helpers +from pre_commit.xargs import xargs ENVIRONMENT_DIR = None @@ -18,6 +18,6 @@ def install_environment( def run_hook(repo_cmd_runner, hook, file_args): - return helpers.run_hook( + return xargs( tuple(shlex.split(hook['entry'])) + tuple(hook['args']), file_args, ) diff --git a/pre_commit/util.py b/pre_commit/util.py index 559ab703..853a95b1 100644 --- a/pre_commit/util.py +++ b/pre_commit/util.py @@ -160,7 +160,6 @@ class CalledProcessError(RuntimeError): def cmd_output(*cmd, **kwargs): retcode = kwargs.pop('retcode', 0) - stdin = kwargs.pop('stdin', None) encoding = kwargs.pop('encoding', 'UTF-8') __popen = kwargs.pop('__popen', subprocess.Popen) @@ -170,9 +169,6 @@ def cmd_output(*cmd, **kwargs): 'stderr': subprocess.PIPE, } - if stdin is not None: - stdin = stdin.encode('UTF-8') - # py2/py3 on windows are more strict about the types here cmd = [five.n(arg) for arg in cmd] kwargs['env'] = dict( @@ -182,7 +178,7 @@ def cmd_output(*cmd, **kwargs): popen_kwargs.update(kwargs) proc = __popen(cmd, **popen_kwargs) - stdout, stderr = proc.communicate(stdin) + stdout, stderr = proc.communicate() if encoding is not None and stdout is not None: stdout = stdout.decode(encoding) if encoding is not None and stderr is not None: diff --git a/pre_commit/xargs.py b/pre_commit/xargs.py new file mode 100644 index 00000000..2b8aff56 --- /dev/null +++ b/pre_commit/xargs.py @@ -0,0 +1,59 @@ +from __future__ import absolute_import +from __future__ import unicode_literals + +from pre_commit.util import cmd_output + + +# Limit used previously to avoid "xargs ... Bad file number" on windows +# This is slightly less than the posix mandated minimum +MAX_LENGTH = 4000 + + +class ArgumentTooLongError(RuntimeError): + pass + + +def partition(cmd, varargs, _max_length=MAX_LENGTH): + cmd = tuple(cmd) + ret = [] + + ret_cmd = [] + total_len = len(' '.join(cmd)) + # Reversed so arguments are in order + varargs = list(reversed(varargs)) + + while varargs: + arg = varargs.pop() + + if total_len + 1 + len(arg) <= _max_length: + ret_cmd.append(arg) + total_len += len(arg) + elif not ret_cmd: + raise ArgumentTooLongError(arg) + else: + # We've exceeded the length, yield a command + ret.append(cmd + tuple(ret_cmd)) + ret_cmd = [] + total_len = len(' '.join(cmd)) + varargs.append(arg) + + ret.append(cmd + tuple(ret_cmd)) + + return tuple(ret) + + +def xargs(cmd, varargs): + """A simplified implementation of xargs.""" + retcode = 0 + stdout = b'' + stderr = b'' + + for run_cmd in partition(cmd, varargs): + proc_retcode, proc_out, proc_err = cmd_output( + *run_cmd, encoding=None, retcode=None + ) + retcode |= proc_retcode + stdout += proc_out + stderr += proc_err + + return retcode, stdout, stderr diff --git a/tests/languages/helpers_test.py b/tests/languages/helpers_test.py deleted file mode 100644 index 8497ceb0..00000000 --- a/tests/languages/helpers_test.py +++ /dev/null @@ -1,16 +0,0 @@ -from __future__ import absolute_import -from __future__ import unicode_literals - -from pre_commit.languages.helpers import file_args_to_stdin - - -def test_file_args_to_stdin_empty(): - assert file_args_to_stdin([]) == '' - - -def test_file_args_to_stdin_some(): - assert file_args_to_stdin(['foo', 'bar']) == 'foo\0bar\0' - - -def test_file_args_to_stdin_tuple(): - assert file_args_to_stdin(('foo', 'bar')) == 'foo\0bar\0' diff --git a/tests/repository_test.py b/tests/repository_test.py index 9642b4c5..978b42ce 100644 --- a/tests/repository_test.py +++ b/tests/repository_test.py @@ -234,13 +234,13 @@ def test_pcre_hook_matching(tempdir_factory, store): _test_hook_repo( tempdir_factory, store, 'pcre_hooks_repo', 'regex-with-quotes', ['herp', 'derp'], b"herp:2:herpfoo'bard\n", - expected_return_code=123, + expected_return_code=1, ) _test_hook_repo( tempdir_factory, store, 'pcre_hooks_repo', 'other-regex', ['herp', 'derp'], b'derp:1:[INFO] information yo\n', - expected_return_code=123, + expected_return_code=1, ) @@ -255,7 +255,7 @@ def test_pcre_hook_case_insensitive_option(tempdir_factory, store): _test_hook_repo( tempdir_factory, store, 'pcre_hooks_repo', 'regex-with-grep-args', ['herp'], b'herp:1:FoOoOoObar\n', - expected_return_code=123, + expected_return_code=1, ) @@ -264,7 +264,7 @@ def test_pcre_hook_case_insensitive_option(tempdir_factory, store): def test_pcre_many_files(tempdir_factory, store): # This is intended to simulate lots of passing files and one failing file # to make sure it still fails. This is not the case when naively using - # a system hook with `grep -H -n '...'` and expected_return_code=123. + # a system hook with `grep -H -n '...'` and expected_return_code=1. path = git_dir(tempdir_factory) with cwd(path): with io.open('herp', 'w') as herp: @@ -275,7 +275,7 @@ def test_pcre_many_files(tempdir_factory, store): 'other-regex', ['/dev/null'] * 15000 + ['herp'], b'herp:1:[INFO] info\n', - expected_return_code=123, + expected_return_code=1, ) @@ -355,7 +355,7 @@ def test_additional_python_dependencies_installed(tempdir_factory, store): config = make_config_from_repo(path) config['hooks'][0]['additional_dependencies'] = ['mccabe'] repo = Repository.create(config, store) - repo.run_hook(repo.hooks[0][1], []) + repo.require_installed() with python.in_env(repo.cmd_runner, 'default'): output = cmd_output('pip', 'freeze', '-l')[1] assert 'mccabe' in output @@ -367,11 +367,11 @@ def test_additional_dependencies_roll_forward(tempdir_factory, store): config = make_config_from_repo(path) # Run the repo once without additional_dependencies repo = Repository.create(config, store) - repo.run_hook(repo.hooks[0][1], []) + repo.require_installed() # Now run it with additional_dependencies config['hooks'][0]['additional_dependencies'] = ['mccabe'] repo = Repository.create(config, store) - repo.run_hook(repo.hooks[0][1], []) + repo.require_installed() # We should see our additional dependency installed with python.in_env(repo.cmd_runner, 'default'): output = cmd_output('pip', 'freeze', '-l')[1] @@ -388,7 +388,7 @@ def test_additional_ruby_dependencies_installed( config = make_config_from_repo(path) config['hooks'][0]['additional_dependencies'] = ['thread_safe'] repo = Repository.create(config, store) - repo.run_hook(repo.hooks[0][1], []) + repo.require_installed() with ruby.in_env(repo.cmd_runner, 'default'): output = cmd_output('gem', 'list', '--local')[1] assert 'thread_safe' in output @@ -405,7 +405,7 @@ def test_additional_node_dependencies_installed( # Careful to choose a small package that's not depped by npm config['hooks'][0]['additional_dependencies'] = ['lodash'] repo = Repository.create(config, store) - repo.run_hook(repo.hooks[0][1], []) + repo.require_installed() with node.in_env(repo.cmd_runner, 'default'): cmd_output('npm', 'config', 'set', 'global', 'true') output = cmd_output('npm', 'ls')[1] diff --git a/tests/xargs_test.py b/tests/xargs_test.py new file mode 100644 index 00000000..239f1a2d --- /dev/null +++ b/tests/xargs_test.py @@ -0,0 +1,47 @@ +from __future__ import absolute_import +from __future__ import unicode_literals + +import pytest + +from pre_commit import xargs + + +def test_partition_trivial(): + assert xargs.partition(('cmd',), ()) == (('cmd',),) + + +def test_partition_simple(): + assert xargs.partition(('cmd',), ('foo',)) == (('cmd', 'foo'),) + + +def test_partition_limits(): + ret = xargs.partition( + ('ninechars',), ( + # Just match the end (with spaces) + '.' * 5, '.' * 4, + # Just match the end (single arg) + '.' * 10, + # Goes over the end + '.' * 5, + '.' * 6, + ), + _max_length=20, + ) + assert ret == ( + ('ninechars', '.' * 5, '.' * 4), + ('ninechars', '.' * 10), + ('ninechars', '.' * 5), + ('ninechars', '.' * 6), + ) + + +def test_argument_too_long(): + with pytest.raises(xargs.ArgumentTooLongError): + xargs.partition(('a' * 5,), ('a' * 5,), _max_length=10) + + +def test_xargs_smoke(): + ret, out, err = xargs.xargs(('echo',), ('hello', 'world')) + assert ret == 0 + assert out == b'hello world\n' + assert err == b''