diff --git a/pre_commit/xargs.py b/pre_commit/xargs.py index bd9205b7..a382759c 100644 --- a/pre_commit/xargs.py +++ b/pre_commit/xargs.py @@ -5,6 +5,7 @@ from __future__ import unicode_literals import concurrent.futures import contextlib import math +import os import sys import six @@ -13,10 +14,24 @@ from pre_commit import parse_shebang from pre_commit.util import cmd_output -# TODO: properly compute max_length value -def _get_platform_max_length(): - # posix minimum - return 4 * 1024 +def _environ_size(_env=None): + environ = _env if _env is not None else getattr(os, 'environb', os.environ) + size = 8 * len(environ) # number of pointers in `envp` + for k, v in environ.items(): + size += len(k) + len(v) + 2 # c strings in `envp` + return size + + +def _get_platform_max_length(): # pragma: no cover (platform specific) + if os.name == 'posix': + maximum = os.sysconf(str('SC_ARG_MAX')) - 2048 - _environ_size() + maximum = min(maximum, 2 ** 17) + return maximum + elif os.name == 'nt': + return 2 ** 15 - 2048 # UNICODE_STRING max - headroom + else: + # posix minimum + return 2 ** 12 def _command_length(*cmd): @@ -52,7 +67,7 @@ def partition(cmd, varargs, target_concurrency, _max_length=None): # Reversed so arguments are in order varargs = list(reversed(varargs)) - total_length = _command_length(*cmd) + total_length = _command_length(*cmd) + 1 while varargs: arg = varargs.pop() @@ -69,7 +84,7 @@ def partition(cmd, varargs, target_concurrency, _max_length=None): # We've exceeded the length, yield a command ret.append(cmd + tuple(ret_cmd)) ret_cmd = [] - total_length = _command_length(*cmd) + total_length = _command_length(*cmd) + 1 varargs.append(arg) ret.append(cmd + tuple(ret_cmd)) @@ -99,7 +114,7 @@ def xargs(cmd, varargs, **kwargs): stderr = b'' try: - parse_shebang.normexe(cmd[0]) + cmd = parse_shebang.normalize_cmd(cmd) except parse_shebang.ExecutableNotFoundError as e: return e.to_output() diff --git a/tests/xargs_test.py b/tests/xargs_test.py index 0e91f9be..a6cffd72 100644 --- a/tests/xargs_test.py +++ b/tests/xargs_test.py @@ -10,9 +10,24 @@ import mock import pytest import six +from pre_commit import parse_shebang from pre_commit import xargs +@pytest.mark.parametrize( + ('env', 'expected'), + ( + ({}, 0), + ({b'x': b'1'}, 12), + ({b'x': b'12'}, 13), + ({b'x': b'1', b'y': b'2'}, 24), + ), +) +def test_environ_size(env, expected): + # normalize integer sizing + assert xargs._environ_size(_env=env) == expected + + @pytest.fixture def win32_py2_mock(): with mock.patch.object(sys, 'getfilesystemencoding', return_value='utf-8'): @@ -56,7 +71,7 @@ def test_partition_limits(): '.' * 6, ), 1, - _max_length=20, + _max_length=21, ) assert ret == ( ('ninechars', '.' * 5, '.' * 4), @@ -70,21 +85,21 @@ def test_partition_limit_win32_py3(win32_py3_mock): cmd = ('ninechars',) # counted as half because of utf-16 encode varargs = ('😑' * 5,) - ret = xargs.partition(cmd, varargs, 1, _max_length=20) + ret = xargs.partition(cmd, varargs, 1, _max_length=21) assert ret == (cmd + varargs,) def test_partition_limit_win32_py2(win32_py2_mock): cmd = ('ninechars',) varargs = ('😑' * 5,) # 4 bytes * 5 - ret = xargs.partition(cmd, varargs, 1, _max_length=30) + ret = xargs.partition(cmd, varargs, 1, _max_length=31) assert ret == (cmd + varargs,) def test_partition_limit_linux(linux_mock): cmd = ('ninechars',) varargs = ('😑' * 5,) - ret = xargs.partition(cmd, varargs, 1, _max_length=30) + ret = xargs.partition(cmd, varargs, 1, _max_length=31) assert ret == (cmd + varargs,) @@ -134,9 +149,9 @@ def test_xargs_smoke(): assert err == b'' -exit_cmd = ('bash', '-c', 'exit $1', '--') +exit_cmd = parse_shebang.normalize_cmd(('bash', '-c', 'exit $1', '--')) # Abuse max_length to control the exit code -max_length = len(' '.join(exit_cmd)) + 2 +max_length = len(' '.join(exit_cmd)) + 3 def test_xargs_negate(): @@ -165,14 +180,14 @@ def test_xargs_retcode_normal(): def test_xargs_concurrency(): - bash_cmd = ('bash', '-c') + bash_cmd = parse_shebang.normalize_cmd(('bash', '-c')) print_pid = ('sleep 0.5 && echo $$',) start = time.time() ret, stdout, _ = xargs.xargs( bash_cmd, print_pid * 5, target_concurrency=5, - _max_length=len(' '.join(bash_cmd + print_pid)), + _max_length=len(' '.join(bash_cmd + print_pid)) + 1, ) elapsed = time.time() - start assert ret == 0