From ba5e27e4ec087f80e07c646c365578d63ee39ee9 Mon Sep 17 00:00:00 2001 From: Chris Kuehl Date: Sat, 20 Oct 2018 13:05:55 -0700 Subject: [PATCH 1/7] Implement concurrent execution of individual hooks --- pre_commit/clientlib.py | 1 + pre_commit/languages/docker.py | 6 +++++- pre_commit/languages/docker_image.py | 6 +++++- pre_commit/languages/golang.py | 6 +++++- pre_commit/languages/helpers.py | 9 +++++++++ pre_commit/languages/node.py | 6 +++++- pre_commit/languages/python.py | 6 +++++- pre_commit/languages/ruby.py | 6 +++++- pre_commit/languages/rust.py | 6 +++++- pre_commit/languages/script.py | 6 +++++- pre_commit/languages/swift.py | 6 +++++- pre_commit/languages/system.py | 6 +++++- pre_commit/xargs.py | 29 ++++++++++++++++++++++++---- tests/repository_test.py | 1 + tests/xargs_test.py | 18 +++++++++++++++++ 15 files changed, 104 insertions(+), 14 deletions(-) diff --git a/pre_commit/clientlib.py b/pre_commit/clientlib.py index 4570e107..2fa7b153 100644 --- a/pre_commit/clientlib.py +++ b/pre_commit/clientlib.py @@ -56,6 +56,7 @@ MANIFEST_HOOK_DICT = cfgv.Map( cfgv.Optional('language_version', cfgv.check_string, 'default'), cfgv.Optional('log_file', cfgv.check_string, ''), cfgv.Optional('minimum_pre_commit_version', cfgv.check_string, '0'), + cfgv.Optional('require_serial', cfgv.check_bool, False), cfgv.Optional('stages', cfgv.check_array(cfgv.check_one_of(C.STAGES)), []), cfgv.Optional('verbose', cfgv.check_bool, False), ) diff --git a/pre_commit/languages/docker.py b/pre_commit/languages/docker.py index f3c46a33..7f00fe60 100644 --- a/pre_commit/languages/docker.py +++ b/pre_commit/languages/docker.py @@ -97,4 +97,8 @@ def run_hook(prefix, hook, file_args): # pragma: windows no cover entry_tag = ('--entrypoint', entry_exe, docker_tag(prefix)) cmd = docker_cmd() + entry_tag + cmd_rest - return xargs(cmd, file_args) + return xargs( + cmd, + file_args, + target_concurrency=helpers.target_concurrency(hook), + ) diff --git a/pre_commit/languages/docker_image.py b/pre_commit/languages/docker_image.py index 6301970c..e990f18a 100644 --- a/pre_commit/languages/docker_image.py +++ b/pre_commit/languages/docker_image.py @@ -16,4 +16,8 @@ install_environment = helpers.no_install def run_hook(prefix, hook, file_args): # pragma: windows no cover assert_docker_available() cmd = docker_cmd() + helpers.to_cmd(hook) - return xargs(cmd, file_args) + return xargs( + cmd, + file_args, + target_concurrency=helpers.target_concurrency(hook), + ) diff --git a/pre_commit/languages/golang.py b/pre_commit/languages/golang.py index 14354e0c..7d273e75 100644 --- a/pre_commit/languages/golang.py +++ b/pre_commit/languages/golang.py @@ -81,4 +81,8 @@ def install_environment(prefix, version, additional_dependencies): def run_hook(prefix, hook, file_args): with in_env(prefix): - return xargs(helpers.to_cmd(hook), file_args) + return xargs( + helpers.to_cmd(hook), + file_args, + target_concurrency=helpers.target_concurrency(hook), + ) diff --git a/pre_commit/languages/helpers.py b/pre_commit/languages/helpers.py index ddbe2e80..b6a3fc2d 100644 --- a/pre_commit/languages/helpers.py +++ b/pre_commit/languages/helpers.py @@ -1,5 +1,6 @@ from __future__ import unicode_literals +import multiprocessing import shlex from pre_commit.util import cmd_output @@ -45,3 +46,11 @@ def basic_healthy(prefix, language_version): def no_install(prefix, version, additional_dependencies): raise AssertionError('This type is not installable') + + +def target_concurrency(hook): + if hook['require_serial']: + return 1 + else: + # TODO: something smart! + return multiprocessing.cpu_count() diff --git a/pre_commit/languages/node.py b/pre_commit/languages/node.py index 7b464930..494ca878 100644 --- a/pre_commit/languages/node.py +++ b/pre_commit/languages/node.py @@ -71,4 +71,8 @@ def install_environment(prefix, version, additional_dependencies): def run_hook(prefix, hook, file_args): with in_env(prefix, hook['language_version']): - return xargs(helpers.to_cmd(hook), file_args) + return xargs( + helpers.to_cmd(hook), + file_args, + target_concurrency=helpers.target_concurrency(hook), + ) diff --git a/pre_commit/languages/python.py b/pre_commit/languages/python.py index ee7b2a4f..bb8a81a6 100644 --- a/pre_commit/languages/python.py +++ b/pre_commit/languages/python.py @@ -127,7 +127,11 @@ def py_interface(_dir, _make_venv): def run_hook(prefix, hook, file_args): with in_env(prefix, hook['language_version']): - return xargs(helpers.to_cmd(hook), file_args) + return xargs( + helpers.to_cmd(hook), + file_args, + target_concurrency=helpers.target_concurrency(hook), + ) def install_environment(prefix, version, additional_dependencies): additional_dependencies = tuple(additional_dependencies) diff --git a/pre_commit/languages/ruby.py b/pre_commit/languages/ruby.py index bef3fe38..3c5745df 100644 --- a/pre_commit/languages/ruby.py +++ b/pre_commit/languages/ruby.py @@ -126,4 +126,8 @@ def install_environment( def run_hook(prefix, hook, file_args): # pragma: windows no cover with in_env(prefix, hook['language_version']): - return xargs(helpers.to_cmd(hook), file_args) + return xargs( + helpers.to_cmd(hook), + file_args, + target_concurrency=helpers.target_concurrency(hook), + ) diff --git a/pre_commit/languages/rust.py b/pre_commit/languages/rust.py index 41053f88..e602adcc 100644 --- a/pre_commit/languages/rust.py +++ b/pre_commit/languages/rust.py @@ -91,4 +91,8 @@ def install_environment(prefix, version, additional_dependencies): def run_hook(prefix, hook, file_args): with in_env(prefix): - return xargs(helpers.to_cmd(hook), file_args) + return xargs( + helpers.to_cmd(hook), + file_args, + target_concurrency=helpers.target_concurrency(hook), + ) diff --git a/pre_commit/languages/script.py b/pre_commit/languages/script.py index 551b4d80..d242694f 100644 --- a/pre_commit/languages/script.py +++ b/pre_commit/languages/script.py @@ -13,4 +13,8 @@ install_environment = helpers.no_install def run_hook(prefix, hook, file_args): cmd = helpers.to_cmd(hook) cmd = (prefix.path(cmd[0]),) + cmd[1:] - return xargs(cmd, file_args) + return xargs( + cmd, + file_args, + target_concurrency=helpers.target_concurrency(hook), + ) diff --git a/pre_commit/languages/swift.py b/pre_commit/languages/swift.py index 2863fbee..eff4f9b0 100644 --- a/pre_commit/languages/swift.py +++ b/pre_commit/languages/swift.py @@ -53,4 +53,8 @@ def install_environment( def run_hook(prefix, hook, file_args): # pragma: windows no cover with in_env(prefix): - return xargs(helpers.to_cmd(hook), file_args) + return xargs( + helpers.to_cmd(hook), + file_args, + target_concurrency=helpers.target_concurrency(hook), + ) diff --git a/pre_commit/languages/system.py b/pre_commit/languages/system.py index 84cd1fe4..70a42ddc 100644 --- a/pre_commit/languages/system.py +++ b/pre_commit/languages/system.py @@ -11,4 +11,8 @@ install_environment = helpers.no_install def run_hook(prefix, hook, file_args): - return xargs(helpers.to_cmd(hook), file_args) + return xargs( + helpers.to_cmd(hook), + file_args, + target_concurrency=helpers.target_concurrency(hook), + ) diff --git a/pre_commit/xargs.py b/pre_commit/xargs.py index 2fe8a454..aa4f27e0 100644 --- a/pre_commit/xargs.py +++ b/pre_commit/xargs.py @@ -1,8 +1,11 @@ from __future__ import absolute_import from __future__ import unicode_literals +import contextlib +import multiprocessing.pool import sys +import concurrent.futures import six from pre_commit import parse_shebang @@ -65,12 +68,23 @@ def partition(cmd, varargs, _max_length=None): return tuple(ret) +@contextlib.contextmanager +def _threadpool(size): + pool = multiprocessing.pool.ThreadPool(size) + try: + yield pool + finally: + pool.terminate() + + def xargs(cmd, varargs, **kwargs): """A simplified implementation of xargs. negate: Make nonzero successful and zero a failure + target_concurrency: Target number of partitions to run concurrently """ negate = kwargs.pop('negate', False) + target_concurrency = kwargs.pop('target_concurrency', 1) retcode = 0 stdout = b'' stderr = b'' @@ -80,10 +94,17 @@ def xargs(cmd, varargs, **kwargs): except parse_shebang.ExecutableNotFoundError as e: return e.to_output() - for run_cmd in partition(cmd, varargs, **kwargs): - proc_retcode, proc_out, proc_err = cmd_output( - *run_cmd, encoding=None, retcode=None - ) + # TODO: teach partition to intelligently target our desired concurrency + # while still respecting max_length. + partitions = partition(cmd, varargs, **kwargs) + + def run_cmd_partition(run_cmd): + return cmd_output(*run_cmd, encoding=None, retcode=None) + + with _threadpool(min(len(partitions), target_concurrency)) as pool: + results = pool.map(run_cmd_partition, partitions) + + for proc_retcode, proc_out, proc_err in results: # This is *slightly* too clever so I'll explain it. # First the xor boolean table: # T | F | diff --git a/tests/repository_test.py b/tests/repository_test.py index 8d578f39..f1b0f6e0 100644 --- a/tests/repository_test.py +++ b/tests/repository_test.py @@ -837,6 +837,7 @@ def test_manifest_hooks(tempdir_factory, store): 'minimum_pre_commit_version': '0', 'name': 'Bash hook', 'pass_filenames': True, + 'require_serial': False, 'stages': [], 'types': ['file'], 'exclude_types': [], diff --git a/tests/xargs_test.py b/tests/xargs_test.py index bf685e16..b60a37d6 100644 --- a/tests/xargs_test.py +++ b/tests/xargs_test.py @@ -3,6 +3,7 @@ from __future__ import absolute_import from __future__ import unicode_literals import sys +import time import mock import pytest @@ -132,3 +133,20 @@ def test_xargs_retcode_normal(): ret, _, _ = xargs.xargs(exit_cmd, ('0', '1'), _max_length=max_length) assert ret == 1 + + +def test_xargs_concurrency(): + bash_cmd = ('bash', '-c') + print_pid = ('sleep 0.5 && echo $$',) + + start = time.time() + ret, stdout, _ = xargs.xargs( + bash_cmd, print_pid * 5, + target_concurrency=5, + _max_length=len(' '.join(bash_cmd + print_pid)), + ) + elapsed = time.time() - start + assert ret == 0 + pids = stdout.splitlines() + assert len(pids) == 5 + assert elapsed < 1 From ec0ed8aef5a904becf5facde6d90045a6f90e6cd Mon Sep 17 00:00:00 2001 From: Chris Kuehl Date: Sat, 20 Oct 2018 17:13:57 -0700 Subject: [PATCH 2/7] Handle CPU detection errors and running on Travis --- pre_commit/languages/helpers.py | 11 +++++++++-- tests/languages/helpers_test.py | 25 +++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/pre_commit/languages/helpers.py b/pre_commit/languages/helpers.py index b6a3fc2d..abd28fa0 100644 --- a/pre_commit/languages/helpers.py +++ b/pre_commit/languages/helpers.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals import multiprocessing +import os import shlex from pre_commit.util import cmd_output @@ -52,5 +53,11 @@ def target_concurrency(hook): if hook['require_serial']: return 1 else: - # TODO: something smart! - return multiprocessing.cpu_count() + # Travis appears to have a bunch of CPUs, but we can't use them all. + if 'TRAVIS' in os.environ: + return 2 + else: + try: + return multiprocessing.cpu_count() + except NotImplementedError: + return 1 diff --git a/tests/languages/helpers_test.py b/tests/languages/helpers_test.py index ada2095b..f1c1497f 100644 --- a/tests/languages/helpers_test.py +++ b/tests/languages/helpers_test.py @@ -1,8 +1,11 @@ from __future__ import absolute_import from __future__ import unicode_literals +import multiprocessing +import os import sys +import mock import pytest from pre_commit.languages import helpers @@ -28,3 +31,25 @@ def test_failed_setup_command_does_not_unicode_error(): # an assertion that this does not raise `UnicodeError` with pytest.raises(CalledProcessError): helpers.run_setup_cmd(Prefix('.'), (sys.executable, '-c', script)) + + +def test_target_concurrency_normal(): + with mock.patch.object(multiprocessing, 'cpu_count', return_value=123): + with mock.patch.dict(os.environ, {}, clear=True): + assert helpers.target_concurrency({'require_serial': False}) == 123 + + +def test_target_concurrency_cpu_count_require_serial_true(): + assert helpers.target_concurrency({'require_serial': True}) == 1 + + +def test_target_concurrency_on_travis(): + with mock.patch.dict(os.environ, {'TRAVIS': '1'}, clear=True): + assert helpers.target_concurrency({'require_serial': False}) == 2 + + +def test_target_concurrency_cpu_count_not_implemented(): + with mock.patch.object( + multiprocessing, 'cpu_count', side_effect=NotImplementedError, + ): + assert helpers.target_concurrency({'require_serial': False}) == 1 From b6926e8e2ef50d709945f75252e7c6b9cacda290 Mon Sep 17 00:00:00 2001 From: Chris Kuehl Date: Sat, 20 Oct 2018 17:14:50 -0700 Subject: [PATCH 3/7] Attempt to partition files to use all possible cores --- pre_commit/xargs.py | 18 +++++++++++++----- tests/xargs_test.py | 42 +++++++++++++++++++++++++++++++++++------- 2 files changed, 48 insertions(+), 12 deletions(-) diff --git a/pre_commit/xargs.py b/pre_commit/xargs.py index aa4f27e0..9c4bc78a 100644 --- a/pre_commit/xargs.py +++ b/pre_commit/xargs.py @@ -1,7 +1,9 @@ from __future__ import absolute_import +from __future__ import division from __future__ import unicode_literals import contextlib +import math import multiprocessing.pool import sys @@ -37,8 +39,13 @@ class ArgumentTooLongError(RuntimeError): pass -def partition(cmd, varargs, _max_length=None): +def partition(cmd, varargs, target_concurrency, _max_length=None): _max_length = _max_length or _get_platform_max_length() + + # Generally, we try to partition evenly into at least `target_concurrency` + # partitions, but we don't want a bunch of tiny partitions. + max_args = max(4, math.ceil(len(varargs) / target_concurrency)) + cmd = tuple(cmd) ret = [] @@ -51,7 +58,10 @@ def partition(cmd, varargs, _max_length=None): arg = varargs.pop() arg_length = _command_length(arg) + 1 - if total_length + arg_length <= _max_length: + if ( + total_length + arg_length <= _max_length + and len(ret_cmd) < max_args + ): ret_cmd.append(arg) total_length += arg_length elif not ret_cmd: @@ -94,9 +104,7 @@ def xargs(cmd, varargs, **kwargs): except parse_shebang.ExecutableNotFoundError as e: return e.to_output() - # TODO: teach partition to intelligently target our desired concurrency - # while still respecting max_length. - partitions = partition(cmd, varargs, **kwargs) + partitions = partition(cmd, varargs, target_concurrency, **kwargs) def run_cmd_partition(run_cmd): return cmd_output(*run_cmd, encoding=None, retcode=None) diff --git a/tests/xargs_test.py b/tests/xargs_test.py index b60a37d6..3dcb6e8a 100644 --- a/tests/xargs_test.py +++ b/tests/xargs_test.py @@ -36,11 +36,11 @@ def linux_mock(): def test_partition_trivial(): - assert xargs.partition(('cmd',), ()) == (('cmd',),) + assert xargs.partition(('cmd',), (), 1) == (('cmd',),) def test_partition_simple(): - assert xargs.partition(('cmd',), ('foo',)) == (('cmd', 'foo'),) + assert xargs.partition(('cmd',), ('foo',), 1) == (('cmd', 'foo'),) def test_partition_limits(): @@ -54,6 +54,7 @@ def test_partition_limits(): '.' * 5, '.' * 6, ), + 1, _max_length=20, ) assert ret == ( @@ -68,21 +69,21 @@ def test_partition_limit_win32_py3(win32_py3_mock): cmd = ('ninechars',) # counted as half because of utf-16 encode varargs = ('😑' * 5,) - ret = xargs.partition(cmd, varargs, _max_length=20) + ret = xargs.partition(cmd, varargs, 1, _max_length=20) assert ret == (cmd + varargs,) def test_partition_limit_win32_py2(win32_py2_mock): cmd = ('ninechars',) varargs = ('😑' * 5,) # 4 bytes * 5 - ret = xargs.partition(cmd, varargs, _max_length=30) + ret = xargs.partition(cmd, varargs, 1, _max_length=30) assert ret == (cmd + varargs,) def test_partition_limit_linux(linux_mock): cmd = ('ninechars',) varargs = ('😑' * 5,) - ret = xargs.partition(cmd, varargs, _max_length=30) + ret = xargs.partition(cmd, varargs, 1, _max_length=30) assert ret == (cmd + varargs,) @@ -90,12 +91,39 @@ def test_argument_too_long_with_large_unicode(linux_mock): cmd = ('ninechars',) varargs = ('😑' * 10,) # 4 bytes * 10 with pytest.raises(xargs.ArgumentTooLongError): - xargs.partition(cmd, varargs, _max_length=20) + xargs.partition(cmd, varargs, 1, _max_length=20) + + +def test_partition_target_concurrency(): + ret = xargs.partition( + ('foo',), ('A',) * 22, + 4, + _max_length=50, + ) + assert ret == ( + ('foo',) + ('A',) * 6, + ('foo',) + ('A',) * 6, + ('foo',) + ('A',) * 6, + ('foo',) + ('A',) * 4, + ) + + +def test_partition_target_concurrency_wont_make_tiny_partitions(): + ret = xargs.partition( + ('foo',), ('A',) * 10, + 4, + _max_length=50, + ) + assert ret == ( + ('foo',) + ('A',) * 4, + ('foo',) + ('A',) * 4, + ('foo',) + ('A',) * 2, + ) def test_argument_too_long(): with pytest.raises(xargs.ArgumentTooLongError): - xargs.partition(('a' * 5,), ('a' * 5,), _max_length=10) + xargs.partition(('a' * 5,), ('a' * 5,), 1, _max_length=10) def test_xargs_smoke(): From 231f6013bbadbf4c0e77f980ce359a4cd01063b2 Mon Sep 17 00:00:00 2001 From: Chris Kuehl Date: Mon, 22 Oct 2018 09:21:37 -0700 Subject: [PATCH 4/7] Allow more time on the concurrency test Spawning processes is apparently really slow on Windows, and the test is occasionally taking slightly more than a second on AppVeyor. I think we can allow up to the full 2.5 seconds without losing the valuable bits of the test. --- tests/xargs_test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/xargs_test.py b/tests/xargs_test.py index 3dcb6e8a..da3cc74d 100644 --- a/tests/xargs_test.py +++ b/tests/xargs_test.py @@ -177,4 +177,6 @@ def test_xargs_concurrency(): assert ret == 0 pids = stdout.splitlines() assert len(pids) == 5 - assert elapsed < 1 + # It would take 0.5*5=2.5 seconds ot run all of these in serial, so if it + # takes less, they must have run concurrently. + assert elapsed < 2.5 From aa50a8cde0919f0cf98b66b415403f04e54c7f05 Mon Sep 17 00:00:00 2001 From: Chris Kuehl Date: Mon, 22 Oct 2018 09:50:46 -0700 Subject: [PATCH 5/7] Switch to using concurrent.futures --- pre_commit/xargs.py | 47 +++++++++++++++++++++++---------------------- setup.py | 5 ++++- tests/xargs_test.py | 13 +++++++++++++ 3 files changed, 41 insertions(+), 24 deletions(-) diff --git a/pre_commit/xargs.py b/pre_commit/xargs.py index 9c4bc78a..5222d553 100644 --- a/pre_commit/xargs.py +++ b/pre_commit/xargs.py @@ -4,7 +4,6 @@ from __future__ import unicode_literals import contextlib import math -import multiprocessing.pool import sys import concurrent.futures @@ -79,12 +78,12 @@ def partition(cmd, varargs, target_concurrency, _max_length=None): @contextlib.contextmanager -def _threadpool(size): - pool = multiprocessing.pool.ThreadPool(size) - try: - yield pool - finally: - pool.terminate() +def _thread_mapper(maxsize): + if maxsize == 1: + yield map + else: + with concurrent.futures.ThreadPoolExecutor(maxsize) as ex: + yield ex.map def xargs(cmd, varargs, **kwargs): @@ -109,22 +108,24 @@ def xargs(cmd, varargs, **kwargs): def run_cmd_partition(run_cmd): return cmd_output(*run_cmd, encoding=None, retcode=None) - with _threadpool(min(len(partitions), target_concurrency)) as pool: - results = pool.map(run_cmd_partition, partitions) + with _thread_mapper( + min(len(partitions), target_concurrency), + ) as thread_map: + results = thread_map(run_cmd_partition, partitions) - for proc_retcode, proc_out, proc_err in results: - # This is *slightly* too clever so I'll explain it. - # First the xor boolean table: - # T | F | - # +-------+ - # T | F | T | - # --+-------+ - # F | T | F | - # --+-------+ - # When negate is True, it has the effect of flipping the return code - # Otherwise, the retuncode is unchanged - retcode |= bool(proc_retcode) ^ negate - stdout += proc_out - stderr += proc_err + for proc_retcode, proc_out, proc_err in results: + # This is *slightly* too clever so I'll explain it. + # First the xor boolean table: + # T | F | + # +-------+ + # T | F | T | + # --+-------+ + # F | T | F | + # --+-------+ + # When negate is True, it has the effect of flipping the return + # code. Otherwise, the returncode is unchanged. + retcode |= bool(proc_retcode) ^ negate + stdout += proc_out + stderr += proc_err return retcode, stdout, stderr diff --git a/setup.py b/setup.py index 7c0a958f..dd3eb425 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,10 @@ setup( 'toml', 'virtualenv', ], - extras_require={':python_version<"3.7"': ['importlib-resources']}, + extras_require={ + ':python_version<"3.2"': ['futures'], + ':python_version<"3.7"': ['importlib-resources'], + }, entry_points={ 'console_scripts': [ 'pre-commit = pre_commit.main:main', diff --git a/tests/xargs_test.py b/tests/xargs_test.py index da3cc74d..ed65ed46 100644 --- a/tests/xargs_test.py +++ b/tests/xargs_test.py @@ -5,6 +5,7 @@ from __future__ import unicode_literals import sys import time +import concurrent.futures import mock import pytest import six @@ -180,3 +181,15 @@ def test_xargs_concurrency(): # It would take 0.5*5=2.5 seconds ot run all of these in serial, so if it # takes less, they must have run concurrently. assert elapsed < 2.5 + + +def test_thread_mapper_concurrency_uses_threadpoolexecutor_map(): + with xargs._thread_mapper(10) as thread_map: + assert isinstance( + thread_map.__self__, concurrent.futures.ThreadPoolExecutor, + ) is True + + +def test_thread_mapper_concurrency_uses_regular_map(): + with xargs._thread_mapper(1) as thread_map: + assert thread_map is map From 9125439c3a6b7549bcf6d82c36fc2b89d1283cb2 Mon Sep 17 00:00:00 2001 From: Chris Kuehl Date: Mon, 22 Oct 2018 09:51:14 -0700 Subject: [PATCH 6/7] Force serial hook runs during tests --- pre_commit/languages/helpers.py | 2 +- tests/languages/helpers_test.py | 13 +++++++++++-- tox.ini | 1 + 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/pre_commit/languages/helpers.py b/pre_commit/languages/helpers.py index abd28fa0..8b3e590d 100644 --- a/pre_commit/languages/helpers.py +++ b/pre_commit/languages/helpers.py @@ -50,7 +50,7 @@ def no_install(prefix, version, additional_dependencies): def target_concurrency(hook): - if hook['require_serial']: + if hook['require_serial'] or 'PRE_COMMIT_NO_CONCURRENCY' in os.environ: return 1 else: # Travis appears to have a bunch of CPUs, but we can't use them all. diff --git a/tests/languages/helpers_test.py b/tests/languages/helpers_test.py index f1c1497f..e7bd4702 100644 --- a/tests/languages/helpers_test.py +++ b/tests/languages/helpers_test.py @@ -40,7 +40,15 @@ def test_target_concurrency_normal(): def test_target_concurrency_cpu_count_require_serial_true(): - assert helpers.target_concurrency({'require_serial': True}) == 1 + with mock.patch.dict(os.environ, {}, clear=True): + assert helpers.target_concurrency({'require_serial': True}) == 1 + + +def test_target_concurrency_testing_env_var(): + with mock.patch.dict( + os.environ, {'PRE_COMMIT_NO_CONCURRENCY': '1'}, clear=True, + ): + assert helpers.target_concurrency({'require_serial': False}) == 1 def test_target_concurrency_on_travis(): @@ -52,4 +60,5 @@ def test_target_concurrency_cpu_count_not_implemented(): with mock.patch.object( multiprocessing, 'cpu_count', side_effect=NotImplementedError, ): - assert helpers.target_concurrency({'require_serial': False}) == 1 + with mock.patch.dict(os.environ, {}, clear=True): + assert helpers.target_concurrency({'require_serial': False}) == 1 diff --git a/tox.ini b/tox.ini index d4b590bf..52f3d3ee 100644 --- a/tox.ini +++ b/tox.ini @@ -27,3 +27,4 @@ env = GIT_AUTHOR_EMAIL=test@example.com GIT_COMMITTER_EMAIL=test@example.com VIRTUALENV_NO_DOWNLOAD=1 + PRE_COMMIT_NO_CONCURRENCY=1 From 6bac405d40b25409cbfb36cfedf4d6113ad19014 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Thu, 1 Nov 2018 18:05:36 -0700 Subject: [PATCH 7/7] Minor cleanups --- pre_commit/languages/docker.py | 7 +------ pre_commit/languages/docker_image.py | 7 +------ pre_commit/languages/golang.py | 7 +------ pre_commit/languages/helpers.py | 5 +++++ pre_commit/languages/node.py | 7 +------ pre_commit/languages/python.py | 7 +------ pre_commit/languages/ruby.py | 7 +------ pre_commit/languages/rust.py | 7 +------ pre_commit/languages/script.py | 7 +------ pre_commit/languages/swift.py | 7 +------ pre_commit/languages/system.py | 7 +------ pre_commit/xargs.py | 5 ++--- 12 files changed, 17 insertions(+), 63 deletions(-) diff --git a/pre_commit/languages/docker.py b/pre_commit/languages/docker.py index 7f00fe60..bfdd3585 100644 --- a/pre_commit/languages/docker.py +++ b/pre_commit/languages/docker.py @@ -9,7 +9,6 @@ from pre_commit.languages import helpers from pre_commit.util import CalledProcessError from pre_commit.util import clean_path_on_failure from pre_commit.util import cmd_output -from pre_commit.xargs import xargs ENVIRONMENT_DIR = 'docker' @@ -97,8 +96,4 @@ def run_hook(prefix, hook, file_args): # pragma: windows no cover entry_tag = ('--entrypoint', entry_exe, docker_tag(prefix)) cmd = docker_cmd() + entry_tag + cmd_rest - return xargs( - cmd, - file_args, - target_concurrency=helpers.target_concurrency(hook), - ) + return helpers.run_xargs(hook, cmd, file_args) diff --git a/pre_commit/languages/docker_image.py b/pre_commit/languages/docker_image.py index e990f18a..e7ebad7f 100644 --- a/pre_commit/languages/docker_image.py +++ b/pre_commit/languages/docker_image.py @@ -4,7 +4,6 @@ from __future__ import unicode_literals from pre_commit.languages import helpers from pre_commit.languages.docker import assert_docker_available from pre_commit.languages.docker import docker_cmd -from pre_commit.xargs import xargs ENVIRONMENT_DIR = None @@ -16,8 +15,4 @@ install_environment = helpers.no_install def run_hook(prefix, hook, file_args): # pragma: windows no cover assert_docker_available() cmd = docker_cmd() + helpers.to_cmd(hook) - return xargs( - cmd, - file_args, - target_concurrency=helpers.target_concurrency(hook), - ) + return helpers.run_xargs(hook, cmd, file_args) diff --git a/pre_commit/languages/golang.py b/pre_commit/languages/golang.py index 7d273e75..09e3476c 100644 --- a/pre_commit/languages/golang.py +++ b/pre_commit/languages/golang.py @@ -11,7 +11,6 @@ from pre_commit.languages import helpers from pre_commit.util import clean_path_on_failure from pre_commit.util import cmd_output from pre_commit.util import rmtree -from pre_commit.xargs import xargs ENVIRONMENT_DIR = 'golangenv' @@ -81,8 +80,4 @@ def install_environment(prefix, version, additional_dependencies): def run_hook(prefix, hook, file_args): with in_env(prefix): - return xargs( - helpers.to_cmd(hook), - file_args, - target_concurrency=helpers.target_concurrency(hook), - ) + return helpers.run_xargs(hook, helpers.to_cmd(hook), file_args) diff --git a/pre_commit/languages/helpers.py b/pre_commit/languages/helpers.py index 8b3e590d..aa5a5d13 100644 --- a/pre_commit/languages/helpers.py +++ b/pre_commit/languages/helpers.py @@ -5,6 +5,7 @@ import os import shlex from pre_commit.util import cmd_output +from pre_commit.xargs import xargs def run_setup_cmd(prefix, cmd): @@ -61,3 +62,7 @@ def target_concurrency(hook): return multiprocessing.cpu_count() except NotImplementedError: return 1 + + +def run_xargs(hook, cmd, file_args): + return xargs(cmd, file_args, target_concurrency=target_concurrency(hook)) diff --git a/pre_commit/languages/node.py b/pre_commit/languages/node.py index 494ca878..8e5dc7e5 100644 --- a/pre_commit/languages/node.py +++ b/pre_commit/languages/node.py @@ -10,7 +10,6 @@ from pre_commit.languages import helpers from pre_commit.languages.python import bin_dir from pre_commit.util import clean_path_on_failure from pre_commit.util import cmd_output -from pre_commit.xargs import xargs ENVIRONMENT_DIR = 'node_env' @@ -71,8 +70,4 @@ def install_environment(prefix, version, additional_dependencies): def run_hook(prefix, hook, file_args): with in_env(prefix, hook['language_version']): - return xargs( - helpers.to_cmd(hook), - file_args, - target_concurrency=helpers.target_concurrency(hook), - ) + return helpers.run_xargs(hook, helpers.to_cmd(hook), file_args) diff --git a/pre_commit/languages/python.py b/pre_commit/languages/python.py index bb8a81a6..4b7580a4 100644 --- a/pre_commit/languages/python.py +++ b/pre_commit/languages/python.py @@ -12,7 +12,6 @@ from pre_commit.parse_shebang import find_executable from pre_commit.util import CalledProcessError from pre_commit.util import clean_path_on_failure from pre_commit.util import cmd_output -from pre_commit.xargs import xargs ENVIRONMENT_DIR = 'py_env' @@ -127,11 +126,7 @@ def py_interface(_dir, _make_venv): def run_hook(prefix, hook, file_args): with in_env(prefix, hook['language_version']): - return xargs( - helpers.to_cmd(hook), - file_args, - target_concurrency=helpers.target_concurrency(hook), - ) + return helpers.run_xargs(hook, helpers.to_cmd(hook), file_args) def install_environment(prefix, version, additional_dependencies): additional_dependencies = tuple(additional_dependencies) diff --git a/pre_commit/languages/ruby.py b/pre_commit/languages/ruby.py index 3c5745df..0330ae8d 100644 --- a/pre_commit/languages/ruby.py +++ b/pre_commit/languages/ruby.py @@ -12,7 +12,6 @@ from pre_commit.languages import helpers from pre_commit.util import CalledProcessError from pre_commit.util import clean_path_on_failure from pre_commit.util import resource_bytesio -from pre_commit.xargs import xargs ENVIRONMENT_DIR = 'rbenv' @@ -126,8 +125,4 @@ def install_environment( def run_hook(prefix, hook, file_args): # pragma: windows no cover with in_env(prefix, hook['language_version']): - return xargs( - helpers.to_cmd(hook), - file_args, - target_concurrency=helpers.target_concurrency(hook), - ) + return helpers.run_xargs(hook, helpers.to_cmd(hook), file_args) diff --git a/pre_commit/languages/rust.py b/pre_commit/languages/rust.py index e602adcc..8a5a0704 100644 --- a/pre_commit/languages/rust.py +++ b/pre_commit/languages/rust.py @@ -10,7 +10,6 @@ from pre_commit.envcontext import Var from pre_commit.languages import helpers from pre_commit.util import clean_path_on_failure from pre_commit.util import cmd_output -from pre_commit.xargs import xargs ENVIRONMENT_DIR = 'rustenv' @@ -91,8 +90,4 @@ def install_environment(prefix, version, additional_dependencies): def run_hook(prefix, hook, file_args): with in_env(prefix): - return xargs( - helpers.to_cmd(hook), - file_args, - target_concurrency=helpers.target_concurrency(hook), - ) + return helpers.run_xargs(hook, helpers.to_cmd(hook), file_args) diff --git a/pre_commit/languages/script.py b/pre_commit/languages/script.py index d242694f..809efb85 100644 --- a/pre_commit/languages/script.py +++ b/pre_commit/languages/script.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals from pre_commit.languages import helpers -from pre_commit.xargs import xargs ENVIRONMENT_DIR = None @@ -13,8 +12,4 @@ install_environment = helpers.no_install def run_hook(prefix, hook, file_args): cmd = helpers.to_cmd(hook) cmd = (prefix.path(cmd[0]),) + cmd[1:] - return xargs( - cmd, - file_args, - target_concurrency=helpers.target_concurrency(hook), - ) + return helpers.run_xargs(hook, cmd, file_args) diff --git a/pre_commit/languages/swift.py b/pre_commit/languages/swift.py index eff4f9b0..c282de5d 100644 --- a/pre_commit/languages/swift.py +++ b/pre_commit/languages/swift.py @@ -8,7 +8,6 @@ from pre_commit.envcontext import Var from pre_commit.languages import helpers from pre_commit.util import clean_path_on_failure from pre_commit.util import cmd_output -from pre_commit.xargs import xargs ENVIRONMENT_DIR = 'swift_env' get_default_version = helpers.basic_get_default_version @@ -53,8 +52,4 @@ def install_environment( def run_hook(prefix, hook, file_args): # pragma: windows no cover with in_env(prefix): - return xargs( - helpers.to_cmd(hook), - file_args, - target_concurrency=helpers.target_concurrency(hook), - ) + return helpers.run_xargs(hook, helpers.to_cmd(hook), file_args) diff --git a/pre_commit/languages/system.py b/pre_commit/languages/system.py index 70a42ddc..e590d486 100644 --- a/pre_commit/languages/system.py +++ b/pre_commit/languages/system.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals from pre_commit.languages import helpers -from pre_commit.xargs import xargs ENVIRONMENT_DIR = None @@ -11,8 +10,4 @@ install_environment = helpers.no_install def run_hook(prefix, hook, file_args): - return xargs( - helpers.to_cmd(hook), - file_args, - target_concurrency=helpers.target_concurrency(hook), - ) + return helpers.run_xargs(hook, helpers.to_cmd(hook), file_args) diff --git a/pre_commit/xargs.py b/pre_commit/xargs.py index 5222d553..3b4a25f9 100644 --- a/pre_commit/xargs.py +++ b/pre_commit/xargs.py @@ -108,9 +108,8 @@ def xargs(cmd, varargs, **kwargs): def run_cmd_partition(run_cmd): return cmd_output(*run_cmd, encoding=None, retcode=None) - with _thread_mapper( - min(len(partitions), target_concurrency), - ) as thread_map: + threads = min(len(partitions), target_concurrency) + with _thread_mapper(threads) as thread_map: results = thread_map(run_cmd_partition, partitions) for proc_retcode, proc_out, proc_err in results: