diff --git a/pre_commit/xargs.py b/pre_commit/xargs.py index eea3acdb..1b237a38 100644 --- a/pre_commit/xargs.py +++ b/pre_commit/xargs.py @@ -1,41 +1,54 @@ from __future__ import absolute_import from __future__ import unicode_literals +import sys + from pre_commit import parse_shebang from pre_commit.util import cmd_output -# Limit used previously to avoid "xargs ... Bad file number" on windows -# This is slightly less than the posix mandated minimum -MAX_LENGTH = 4000 +# TODO: properly compute max_length value +def _get_platform_max_length(): + # posix minimum + return 4 * 1024 + + +def _get_command_length(command, arg): + parts = command + (arg,) + full_cmd = ' '.join(parts) + + # win32 uses the amount of characters, more details at: + # https://blogs.msdn.microsoft.com/oldnewthing/20031210-00/?p=41553/ + if sys.platform == 'win32': + return len(full_cmd) + + return len(full_cmd.encode(sys.getdefaultencoding())) class ArgumentTooLongError(RuntimeError): pass -def partition(cmd, varargs, _max_length=MAX_LENGTH): +def partition(cmd, varargs, _max_length=None): + _max_length = _max_length or _get_platform_max_length() cmd = tuple(cmd) ret = [] ret_cmd = [] - total_len = len(' '.join(cmd)) # Reversed so arguments are in order varargs = list(reversed(varargs)) while varargs: arg = varargs.pop() - if total_len + 1 + len(arg) <= _max_length: + if _get_command_length(cmd + tuple(ret_cmd), arg) <= _max_length: ret_cmd.append(arg) - total_len += len(arg) elif not ret_cmd: raise ArgumentTooLongError(arg) else: # We've exceeded the length, yield a command ret.append(cmd + tuple(ret_cmd)) ret_cmd = [] - total_len = len(' '.join(cmd)) varargs.append(arg) ret.append(cmd + tuple(ret_cmd)) diff --git a/tests/xargs_test.py b/tests/xargs_test.py index 529eb197..84d4899b 100644 --- a/tests/xargs_test.py +++ b/tests/xargs_test.py @@ -1,11 +1,29 @@ from __future__ import absolute_import from __future__ import unicode_literals +from unittest import mock + import pytest from pre_commit import xargs +@pytest.fixture +def sys_win32_mock(): + return mock.Mock( + platform='win32', + getdefaultencoding=mock.Mock(return_value='utf-8'), + ) + + +@pytest.fixture +def sys_linux_mock(): + return mock.Mock( + platform='linux', + getdefaultencoding=mock.Mock(return_value='utf-8'), + ) + + def test_partition_trivial(): assert xargs.partition(('cmd',), ()) == (('cmd',),) @@ -35,6 +53,32 @@ def test_partition_limits(): ) +def test_partition_limit_win32(sys_win32_mock): + cmd = ('ninechars',) + varargs = ('😑' * 10,) + with mock.patch('pre_commit.xargs.sys', sys_win32_mock): + ret = xargs.partition(cmd, varargs, _max_length=20) + + assert ret == (cmd + varargs,) + + +def test_partition_limit_linux(sys_linux_mock): + cmd = ('ninechars',) + varargs = ('😑' * 5,) + with mock.patch('pre_commit.xargs.sys', sys_linux_mock): + ret = xargs.partition(cmd, varargs, _max_length=30) + + assert ret == (cmd + varargs,) + + +def test_argument_too_long_with_large_unicode(sys_linux_mock): + cmd = ('ninechars',) + varargs = ('😑' * 10,) # 4 bytes * 10 + with mock.patch('pre_commit.xargs.sys', sys_linux_mock): + with pytest.raises(xargs.ArgumentTooLongError): + xargs.partition(cmd, varargs, _max_length=20) + + def test_argument_too_long(): with pytest.raises(xargs.ArgumentTooLongError): xargs.partition(('a' * 5,), ('a' * 5,), _max_length=10)