Implement a simplified xargs in python

2026-01-15 13:30:17 -06:00 · 2016-03-21 19:30:47 -07:00
parent a5b56bd9e3
commit b7d395410b
13 changed files with 130 additions and 62 deletions
--- a/pre_commit/commands/run.py
+++ b/pre_commit/commands/run.py
@@ -86,7 +86,7 @@ def _run_single_hook(hook, repo, args, write, skips=frozenset()):
    sys.stdout.flush()

    diff_before = cmd_output('git', 'diff', retcode=None, encoding=None)
-    retcode, stdout, stderr = repo.run_hook(hook, filenames)
+    retcode, stdout, stderr = repo.run_hook(hook, tuple(filenames))
    diff_after = cmd_output('git', 'diff', retcode=None, encoding=None)

    file_modifications = diff_before != diff_after
--- a/pre_commit/languages/helpers.py
+++ b/pre_commit/languages/helpers.py
@@ -12,18 +12,3 @@ def environment_dir(ENVIRONMENT_DIR, language_version):
        return None
    else:
        return '{0}-{1}'.format(ENVIRONMENT_DIR, language_version)
-
-
-def file_args_to_stdin(file_args):
-    return '\0'.join(list(file_args) + [''])
-
-
-def run_hook(cmd_args, file_args):
-    return cmd_output(
-        # Use -s 4000 (slightly less than posix mandated minimum)
-        # This is to prevent "xargs: ... Bad file number" on windows
-        'xargs', '-0', '-s4000', *cmd_args,
-        stdin=file_args_to_stdin(file_args),
-        retcode=None,
-        encoding=None
-    )
--- a/pre_commit/languages/node.py
+++ b/pre_commit/languages/node.py
@@ -8,6 +8,7 @@ from pre_commit.envcontext import envcontext
 from pre_commit.envcontext import Var
 from pre_commit.languages import helpers
 from pre_commit.util import clean_path_on_failure
+from pre_commit.xargs import xargs


 ENVIRONMENT_DIR = 'node_env'
@@ -63,6 +64,4 @@ def install_environment(

 def run_hook(repo_cmd_runner, hook, file_args):
    with in_env(repo_cmd_runner, hook['language_version']):
-        return helpers.run_hook(
-            (hook['entry'],) + tuple(hook['args']), file_args,
-        )
+        return xargs((hook['entry'],) + tuple(hook['args']), file_args)
--- a/pre_commit/languages/pcre.py
+++ b/pre_commit/languages/pcre.py
@@ -2,8 +2,8 @@ from __future__ import unicode_literals

 from sys import platform

-from pre_commit.languages import helpers
 from pre_commit.util import shell_escape
+from pre_commit.xargs import xargs


 ENVIRONMENT_DIR = None
@@ -24,7 +24,7 @@ def run_hook(repo_cmd_runner, hook, file_args):
    )

    # For PCRE the entry is the regular expression to match
-    return helpers.run_hook(
+    return xargs(
        (
            'sh', '-c',
            # Grep usually returns 0 for matches, and nonzero for non-matches
--- a/pre_commit/languages/python.py
+++ b/pre_commit/languages/python.py
@@ -10,6 +10,7 @@ from pre_commit.envcontext import UNSET
 from pre_commit.envcontext import Var
 from pre_commit.languages import helpers
 from pre_commit.util import clean_path_on_failure
+from pre_commit.xargs import xargs


 ENVIRONMENT_DIR = 'py_env'
@@ -80,6 +81,4 @@ def install_environment(

 def run_hook(repo_cmd_runner, hook, file_args):
    with in_env(repo_cmd_runner, hook['language_version']):
-        return helpers.run_hook(
-            (hook['entry'],) + tuple(hook['args']), file_args,
-        )
+        return xargs((hook['entry'],) + tuple(hook['args']), file_args)
--- a/pre_commit/languages/ruby.py
+++ b/pre_commit/languages/ruby.py
@@ -12,6 +12,7 @@ from pre_commit.util import CalledProcessError
 from pre_commit.util import clean_path_on_failure
 from pre_commit.util import resource_filename
 from pre_commit.util import tarfile_open
+from pre_commit.xargs import xargs


 ENVIRONMENT_DIR = 'rbenv'
@@ -125,6 +126,4 @@ def install_environment(

 def run_hook(repo_cmd_runner, hook, file_args):
    with in_env(repo_cmd_runner, hook['language_version']):
-        return helpers.run_hook(
-            (hook['entry'],) + tuple(hook['args']), file_args,
-        )
+        return xargs((hook['entry'],) + tuple(hook['args']), file_args)
--- a/pre_commit/languages/script.py
+++ b/pre_commit/languages/script.py
@@ -1,6 +1,6 @@
 from __future__ import unicode_literals

-from pre_commit.languages import helpers
+from pre_commit.xargs import xargs


 ENVIRONMENT_DIR = None
@@ -16,7 +16,7 @@ def install_environment(


 def run_hook(repo_cmd_runner, hook, file_args):
-    return helpers.run_hook(
+    return xargs(
        (repo_cmd_runner.prefix_dir + hook['entry'],) + tuple(hook['args']),
        file_args,
    )
--- a/pre_commit/languages/system.py
+++ b/pre_commit/languages/system.py
@@ -2,7 +2,7 @@ from __future__ import unicode_literals

 import shlex

-from pre_commit.languages import helpers
+from pre_commit.xargs import xargs


 ENVIRONMENT_DIR = None
@@ -18,6 +18,6 @@ def install_environment(


 def run_hook(repo_cmd_runner, hook, file_args):
-    return helpers.run_hook(
+    return xargs(
        tuple(shlex.split(hook['entry'])) + tuple(hook['args']), file_args,
    )
--- a/pre_commit/util.py
+++ b/pre_commit/util.py
@@ -160,7 +160,6 @@ class CalledProcessError(RuntimeError):

 def cmd_output(*cmd, **kwargs):
    retcode = kwargs.pop('retcode', 0)
-    stdin = kwargs.pop('stdin', None)
    encoding = kwargs.pop('encoding', 'UTF-8')
    __popen = kwargs.pop('__popen', subprocess.Popen)

@@ -170,9 +169,6 @@ def cmd_output(*cmd, **kwargs):
        'stderr': subprocess.PIPE,
    }

-    if stdin is not None:
-        stdin = stdin.encode('UTF-8')
-
    # py2/py3 on windows are more strict about the types here
    cmd = [five.n(arg) for arg in cmd]
    kwargs['env'] = dict(
@@ -182,7 +178,7 @@ def cmd_output(*cmd, **kwargs):

    popen_kwargs.update(kwargs)
    proc = __popen(cmd, **popen_kwargs)
-    stdout, stderr = proc.communicate(stdin)
+    stdout, stderr = proc.communicate()
    if encoding is not None and stdout is not None:
        stdout = stdout.decode(encoding)
    if encoding is not None and stderr is not None:
--- a/pre_commit/xargs.py
+++ b/pre_commit/xargs.py
@@ -0,0 +1,59 @@
+from __future__ import absolute_import
+from __future__ import unicode_literals
+
+from pre_commit.util import cmd_output
+
+
+# Limit used previously to avoid "xargs ... Bad file number" on windows
+# This is slightly less than the posix mandated minimum
+MAX_LENGTH = 4000
+
+
+class ArgumentTooLongError(RuntimeError):
+    pass
+
+
+def partition(cmd, varargs, _max_length=MAX_LENGTH):
+    cmd = tuple(cmd)
+    ret = []
+
+    ret_cmd = []
+    total_len = len(' '.join(cmd))
+    # Reversed so arguments are in order
+    varargs = list(reversed(varargs))
+
+    while varargs:
+        arg = varargs.pop()
+
+        if total_len + 1 + len(arg) <= _max_length:
+            ret_cmd.append(arg)
+            total_len += len(arg)
+        elif not ret_cmd:
+            raise ArgumentTooLongError(arg)
+        else:
+            # We've exceeded the length, yield a command
+            ret.append(cmd + tuple(ret_cmd))
+            ret_cmd = []
+            total_len = len(' '.join(cmd))
+            varargs.append(arg)
+
+    ret.append(cmd + tuple(ret_cmd))
+
+    return tuple(ret)
+
+
+def xargs(cmd, varargs):
+    """A simplified implementation of xargs."""
+    retcode = 0
+    stdout = b''
+    stderr = b''
+
+    for run_cmd in partition(cmd, varargs):
+        proc_retcode, proc_out, proc_err = cmd_output(
+            *run_cmd, encoding=None, retcode=None
+        )
+        retcode |= proc_retcode
+        stdout += proc_out
+        stderr += proc_err
+
+    return retcode, stdout, stderr
--- a/tests/languages/helpers_test.py
+++ b/tests/languages/helpers_test.py
@@ -1,16 +0,0 @@
-from __future__ import absolute_import
-from __future__ import unicode_literals
-
-from pre_commit.languages.helpers import file_args_to_stdin
-
-
-def test_file_args_to_stdin_empty():
-    assert file_args_to_stdin([]) == ''
-
-
-def test_file_args_to_stdin_some():
-    assert file_args_to_stdin(['foo', 'bar']) == 'foo\0bar\0'
-
-
-def test_file_args_to_stdin_tuple():
-    assert file_args_to_stdin(('foo', 'bar')) == 'foo\0bar\0'
--- a/tests/repository_test.py
+++ b/tests/repository_test.py
@@ -234,13 +234,13 @@ def test_pcre_hook_matching(tempdir_factory, store):
        _test_hook_repo(
            tempdir_factory, store, 'pcre_hooks_repo',
            'regex-with-quotes', ['herp', 'derp'], b"herp:2:herpfoo'bard\n",
-            expected_return_code=123,
+            expected_return_code=1,
        )

        _test_hook_repo(
            tempdir_factory, store, 'pcre_hooks_repo',
            'other-regex', ['herp', 'derp'], b'derp:1:[INFO] information yo\n',
-            expected_return_code=123,
+            expected_return_code=1,
        )


@@ -255,7 +255,7 @@ def test_pcre_hook_case_insensitive_option(tempdir_factory, store):
        _test_hook_repo(
            tempdir_factory, store, 'pcre_hooks_repo',
            'regex-with-grep-args', ['herp'], b'herp:1:FoOoOoObar\n',
-            expected_return_code=123,
+            expected_return_code=1,
        )


@@ -264,7 +264,7 @@ def test_pcre_hook_case_insensitive_option(tempdir_factory, store):
 def test_pcre_many_files(tempdir_factory, store):
    # This is intended to simulate lots of passing files and one failing file
    # to make sure it still fails.  This is not the case when naively using
-    # a system hook with `grep -H -n '...'` and expected_return_code=123.
+    # a system hook with `grep -H -n '...'` and expected_return_code=1.
    path = git_dir(tempdir_factory)
    with cwd(path):
        with io.open('herp', 'w') as herp:
@@ -275,7 +275,7 @@ def test_pcre_many_files(tempdir_factory, store):
            'other-regex',
            ['/dev/null'] * 15000 + ['herp'],
            b'herp:1:[INFO] info\n',
-            expected_return_code=123,
+            expected_return_code=1,
        )


@@ -355,7 +355,7 @@ def test_additional_python_dependencies_installed(tempdir_factory, store):
    config = make_config_from_repo(path)
    config['hooks'][0]['additional_dependencies'] = ['mccabe']
    repo = Repository.create(config, store)
-    repo.run_hook(repo.hooks[0][1], [])
+    repo.require_installed()
    with python.in_env(repo.cmd_runner, 'default'):
        output = cmd_output('pip', 'freeze', '-l')[1]
        assert 'mccabe' in output
@@ -367,11 +367,11 @@ def test_additional_dependencies_roll_forward(tempdir_factory, store):
    config = make_config_from_repo(path)
    # Run the repo once without additional_dependencies
    repo = Repository.create(config, store)
-    repo.run_hook(repo.hooks[0][1], [])
+    repo.require_installed()
    # Now run it with additional_dependencies
    config['hooks'][0]['additional_dependencies'] = ['mccabe']
    repo = Repository.create(config, store)
-    repo.run_hook(repo.hooks[0][1], [])
+    repo.require_installed()
    # We should see our additional dependency installed
    with python.in_env(repo.cmd_runner, 'default'):
        output = cmd_output('pip', 'freeze', '-l')[1]
@@ -388,7 +388,7 @@ def test_additional_ruby_dependencies_installed(
    config = make_config_from_repo(path)
    config['hooks'][0]['additional_dependencies'] = ['thread_safe']
    repo = Repository.create(config, store)
-    repo.run_hook(repo.hooks[0][1], [])
+    repo.require_installed()
    with ruby.in_env(repo.cmd_runner, 'default'):
        output = cmd_output('gem', 'list', '--local')[1]
        assert 'thread_safe' in output
@@ -405,7 +405,7 @@ def test_additional_node_dependencies_installed(
    # Careful to choose a small package that's not depped by npm
    config['hooks'][0]['additional_dependencies'] = ['lodash']
    repo = Repository.create(config, store)
-    repo.run_hook(repo.hooks[0][1], [])
+    repo.require_installed()
    with node.in_env(repo.cmd_runner, 'default'):
        cmd_output('npm', 'config', 'set', 'global', 'true')
        output = cmd_output('npm', 'ls')[1]
--- a/tests/xargs_test.py
+++ b/tests/xargs_test.py
@@ -0,0 +1,47 @@
+from __future__ import absolute_import
+from __future__ import unicode_literals
+
+import pytest
+
+from pre_commit import xargs
+
+
+def test_partition_trivial():
+    assert xargs.partition(('cmd',), ()) == (('cmd',),)
+
+
+def test_partition_simple():
+    assert xargs.partition(('cmd',), ('foo',)) == (('cmd', 'foo'),)
+
+
+def test_partition_limits():
+    ret = xargs.partition(
+        ('ninechars',), (
+            # Just match the end (with spaces)
+            '.' * 5, '.' * 4,
+            # Just match the end (single arg)
+            '.' * 10,
+            # Goes over the end
+            '.' * 5,
+            '.' * 6,
+        ),
+        _max_length=20,
+    )
+    assert ret == (
+        ('ninechars', '.' * 5, '.' * 4),
+        ('ninechars', '.' * 10),
+        ('ninechars', '.' * 5),
+        ('ninechars', '.' * 6),
+    )
+
+
+def test_argument_too_long():
+    with pytest.raises(xargs.ArgumentTooLongError):
+        xargs.partition(('a' * 5,), ('a' * 5,), _max_length=10)
+
+
+def test_xargs_smoke():
+    ret, out, err = xargs.xargs(('echo',), ('hello', 'world'))
+    assert ret == 0
+    assert out == b'hello world\n'
+    assert err == b''