diff --git a/pre_commit/commands/run.py b/pre_commit/commands/run.py index 97d56b8d..651c7f3f 100644 --- a/pre_commit/commands/run.py +++ b/pre_commit/commands/run.py @@ -17,14 +17,47 @@ from pre_commit.repository import all_hooks from pre_commit.repository import install_hook_envs from pre_commit.staged_files_only import staged_files_only from pre_commit.util import cmd_output -from pre_commit.util import memoize_by_cwd from pre_commit.util import noop_context logger = logging.getLogger('pre_commit') -tags_from_path = memoize_by_cwd(tags_from_path) +def filter_by_include_exclude(names, include, exclude): + include_re, exclude_re = re.compile(include), re.compile(exclude) + return [ + filename for filename in names + if include_re.search(filename) + if not exclude_re.search(filename) + ] + + +class Classifier(object): + def __init__(self, filenames): + self.filenames = [f for f in filenames if os.path.lexists(f)] + self._types_cache = {} + + def _types_for_file(self, filename): + try: + return self._types_cache[filename] + except KeyError: + ret = self._types_cache[filename] = tags_from_path(filename) + return ret + + def by_types(self, names, types, exclude_types): + types, exclude_types = frozenset(types), frozenset(exclude_types) + ret = [] + for filename in names: + tags = self._types_for_file(filename) + if tags >= types and not tags & exclude_types: + ret.append(filename) + return ret + + def filenames_for_hook(self, hook): + names = self.filenames + names = filter_by_include_exclude(names, hook.files, hook.exclude) + names = self.by_types(names, hook.types, hook.exclude_types) + return names def _get_skips(environ): @@ -36,37 +69,12 @@ def _hook_msg_start(hook, verbose): return '{}{}'.format('[{}] '.format(hook.id) if verbose else '', hook.name) -def _filter_by_include_exclude(filenames, include, exclude): - include_re, exclude_re = re.compile(include), re.compile(exclude) - return [ - filename for filename in filenames - if ( - include_re.search(filename) and - not exclude_re.search(filename) and - os.path.lexists(filename) - ) - ] - - -def _filter_by_types(filenames, types, exclude_types): - types, exclude_types = frozenset(types), frozenset(exclude_types) - ret = [] - for filename in filenames: - tags = tags_from_path(filename) - if tags >= types and not tags & exclude_types: - ret.append(filename) - return tuple(ret) - - SKIPPED = 'Skipped' NO_FILES = '(no files to check)' -def _run_single_hook(filenames, hook, args, skips, cols): - include, exclude = hook.files, hook.exclude - filenames = _filter_by_include_exclude(filenames, include, exclude) - types, exclude_types = hook.types, hook.exclude_types - filenames = _filter_by_types(filenames, types, exclude_types) +def _run_single_hook(classifier, hook, args, skips, cols): + filenames = classifier.filenames_for_hook(hook) if hook.language == 'pcre': logger.warning( @@ -193,10 +201,11 @@ def _run_hooks(config, hooks, args, environ): skips = _get_skips(environ) cols = _compute_cols(hooks, args.verbose) filenames = _all_filenames(args) - filenames = _filter_by_include_exclude(filenames, '', config['exclude']) + filenames = filter_by_include_exclude(filenames, '', config['exclude']) + classifier = Classifier(filenames) retval = 0 for hook in hooks: - retval |= _run_single_hook(filenames, hook, args, skips, cols) + retval |= _run_single_hook(classifier, hook, args, skips, cols) if retval and config['fail_fast']: break if retval and args.show_diff_on_failure and git.has_diff(): diff --git a/pre_commit/meta_hooks/check_hooks_apply.py b/pre_commit/meta_hooks/check_hooks_apply.py index b17a9d6f..b1ccdac3 100644 --- a/pre_commit/meta_hooks/check_hooks_apply.py +++ b/pre_commit/meta_hooks/check_hooks_apply.py @@ -3,24 +3,19 @@ import argparse import pre_commit.constants as C from pre_commit import git from pre_commit.clientlib import load_config -from pre_commit.commands.run import _filter_by_include_exclude -from pre_commit.commands.run import _filter_by_types +from pre_commit.commands.run import Classifier from pre_commit.repository import all_hooks from pre_commit.store import Store def check_all_hooks_match_files(config_file): - files = git.get_all_files() + classifier = Classifier(git.get_all_files()) retv = 0 for hook in all_hooks(load_config(config_file), Store()): if hook.always_run or hook.language == 'fail': continue - include, exclude = hook.files, hook.exclude - filtered = _filter_by_include_exclude(files, include, exclude) - types, exclude_types = hook.types, hook.exclude_types - filtered = _filter_by_types(filtered, types, exclude_types) - if not filtered: + elif not classifier.filenames_for_hook(hook): print('{} does not apply to this repository'.format(hook.id)) retv = 1 diff --git a/pre_commit/meta_hooks/check_useless_excludes.py b/pre_commit/meta_hooks/check_useless_excludes.py index 18b9f163..c4860db3 100644 --- a/pre_commit/meta_hooks/check_useless_excludes.py +++ b/pre_commit/meta_hooks/check_useless_excludes.py @@ -9,7 +9,7 @@ import pre_commit.constants as C from pre_commit import git from pre_commit.clientlib import load_config from pre_commit.clientlib import MANIFEST_HOOK_DICT -from pre_commit.commands.run import _filter_by_types +from pre_commit.commands.run import Classifier def exclude_matches_any(filenames, include, exclude): @@ -24,11 +24,11 @@ def exclude_matches_any(filenames, include, exclude): def check_useless_excludes(config_file): config = load_config(config_file) - files = git.get_all_files() + classifier = Classifier(git.get_all_files()) retv = 0 exclude = config['exclude'] - if not exclude_matches_any(files, '', exclude): + if not exclude_matches_any(classifier.filenames, '', exclude): print( 'The global exclude pattern {!r} does not match any files' .format(exclude), @@ -40,10 +40,11 @@ def check_useless_excludes(config_file): # Not actually a manifest dict, but this more accurately reflects # the defaults applied during runtime hook = apply_defaults(hook, MANIFEST_HOOK_DICT) + names = classifier.filenames types, exclude_types = hook['types'], hook['exclude_types'] - filtered_by_types = _filter_by_types(files, types, exclude_types) + names = classifier.by_types(names, types, exclude_types) include, exclude = hook['files'], hook['exclude'] - if not exclude_matches_any(filtered_by_types, include, exclude): + if not exclude_matches_any(names, include, exclude): print( 'The exclude pattern {!r} for {} does not match any files' .format(exclude, hook['id']), diff --git a/pre_commit/util.py b/pre_commit/util.py index c38af5a2..4c390289 100644 --- a/pre_commit/util.py +++ b/pre_commit/util.py @@ -2,7 +2,6 @@ from __future__ import unicode_literals import contextlib import errno -import functools import os.path import shutil import stat @@ -31,23 +30,6 @@ def mkdirp(path): raise -def memoize_by_cwd(func): - """Memoize a function call based on os.getcwd().""" - @functools.wraps(func) - def wrapper(*args): - cwd = os.getcwd() - key = (cwd,) + args - try: - return wrapper._cache[key] - except KeyError: - ret = wrapper._cache[key] = func(*args) - return ret - - wrapper._cache = {} - - return wrapper - - @contextlib.contextmanager def clean_path_on_failure(path): """Cleans up the directory on an exceptional failure.""" diff --git a/tests/commands/run_test.py b/tests/commands/run_test.py index 2426068a..e37eca64 100644 --- a/tests/commands/run_test.py +++ b/tests/commands/run_test.py @@ -11,9 +11,10 @@ import pytest import pre_commit.constants as C from pre_commit.commands.install_uninstall import install from pre_commit.commands.run import _compute_cols -from pre_commit.commands.run import _filter_by_include_exclude from pre_commit.commands.run import _get_skips from pre_commit.commands.run import _has_unmerged_paths +from pre_commit.commands.run import Classifier +from pre_commit.commands.run import filter_by_include_exclude from pre_commit.commands.run import run from pre_commit.util import cmd_output from pre_commit.util import make_executable @@ -748,18 +749,22 @@ def test_fail_fast(cap_out, store, repo_with_failing_hook): assert printed.count(b'Failing hook') == 1 +def test_classifier_removes_dne(): + classifier = Classifier(('this_file_does_not_exist',)) + assert classifier.filenames == [] + + @pytest.fixture def some_filenames(): return ( '.pre-commit-hooks.yaml', - 'im_a_file_that_doesnt_exist.py', 'pre_commit/git.py', 'pre_commit/main.py', ) def test_include_exclude_base_case(some_filenames): - ret = _filter_by_include_exclude(some_filenames, '', '^$') + ret = filter_by_include_exclude(some_filenames, '', '^$') assert ret == [ '.pre-commit-hooks.yaml', 'pre_commit/git.py', @@ -771,22 +776,22 @@ def test_include_exclude_base_case(some_filenames): def test_matches_broken_symlink(tmpdir): with tmpdir.as_cwd(): os.symlink('does-not-exist', 'link') - ret = _filter_by_include_exclude({'link'}, '', '^$') + ret = filter_by_include_exclude({'link'}, '', '^$') assert ret == ['link'] def test_include_exclude_total_match(some_filenames): - ret = _filter_by_include_exclude(some_filenames, r'^.*\.py$', '^$') + ret = filter_by_include_exclude(some_filenames, r'^.*\.py$', '^$') assert ret == ['pre_commit/git.py', 'pre_commit/main.py'] def test_include_exclude_does_search_instead_of_match(some_filenames): - ret = _filter_by_include_exclude(some_filenames, r'\.yaml$', '^$') + ret = filter_by_include_exclude(some_filenames, r'\.yaml$', '^$') assert ret == ['.pre-commit-hooks.yaml'] def test_include_exclude_exclude_removes_files(some_filenames): - ret = _filter_by_include_exclude(some_filenames, '', r'\.py$') + ret = filter_by_include_exclude(some_filenames, '', r'\.py$') assert ret == ['.pre-commit-hooks.yaml'] diff --git a/tests/util_test.py b/tests/util_test.py index 56eb5aaa..8178bb4b 100644 --- a/tests/util_test.py +++ b/tests/util_test.py @@ -1,17 +1,14 @@ from __future__ import unicode_literals import os.path -import random import pytest from pre_commit.util import CalledProcessError from pre_commit.util import clean_path_on_failure from pre_commit.util import cmd_output -from pre_commit.util import memoize_by_cwd from pre_commit.util import parse_version from pre_commit.util import tmpdir -from testing.util import cwd def test_CalledProcessError_str(): @@ -42,37 +39,6 @@ def test_CalledProcessError_str_nooutput(): ) -@pytest.fixture -def memoized_by_cwd(): - @memoize_by_cwd - def func(arg): - return arg + str(random.getrandbits(64)) - - return func - - -def test_memoized_by_cwd_returns_same_twice_in_a_row(memoized_by_cwd): - ret = memoized_by_cwd('baz') - ret2 = memoized_by_cwd('baz') - assert ret is ret2 - - -def test_memoized_by_cwd_returns_different_for_different_args(memoized_by_cwd): - ret = memoized_by_cwd('baz') - ret2 = memoized_by_cwd('bar') - assert ret.startswith('baz') - assert ret2.startswith('bar') - assert ret != ret2 - - -def test_memoized_by_cwd_changes_with_different_cwd(memoized_by_cwd): - ret = memoized_by_cwd('baz') - with cwd('.git'): - ret2 = memoized_by_cwd('baz') - - assert ret != ret2 - - def test_clean_on_failure_noop(in_tmpdir): with clean_path_on_failure('foo'): pass