diff --git a/pre_commit/languages/all.py b/pre_commit/languages/all.py index 67b7ddea..514ba611 100644 --- a/pre_commit/languages/all.py +++ b/pre_commit/languages/all.py @@ -5,6 +5,7 @@ from pre_commit.languages import docker_image from pre_commit.languages import golang from pre_commit.languages import node from pre_commit.languages import pcre +from pre_commit.languages import pygrep from pre_commit.languages import python from pre_commit.languages import ruby from pre_commit.languages import script @@ -54,6 +55,7 @@ languages = { 'golang': golang, 'node': node, 'pcre': pcre, + 'pygrep': pygrep, 'python': python, 'ruby': ruby, 'script': script, diff --git a/pre_commit/languages/pygrep.py b/pre_commit/languages/pygrep.py new file mode 100644 index 00000000..4914fd66 --- /dev/null +++ b/pre_commit/languages/pygrep.py @@ -0,0 +1,59 @@ +from __future__ import absolute_import +from __future__ import unicode_literals + +import argparse +import re +import sys + +from pre_commit import output +from pre_commit.languages import helpers +from pre_commit.xargs import xargs + + +ENVIRONMENT_DIR = None +get_default_version = helpers.basic_get_default_version +healthy = helpers.basic_healthy +install_environment = helpers.no_install + + +def _process_filename_by_line(pattern, filename): + retv = 0 + with open(filename, 'rb') as f: + for line_no, line in enumerate(f, start=1): + if pattern.search(line): + retv = 1 + output.write('{}:{}:'.format(filename, line_no)) + output.write_line(line.rstrip(b'\r\n')) + return retv + + +def run_hook(repo_cmd_runner, hook, file_args): + exe = (sys.executable, '-m', __name__) + exe += tuple(hook['args']) + (hook['entry'],) + return xargs(exe, file_args) + + +def main(argv=None): + parser = argparse.ArgumentParser( + description=( + 'grep-like finder using python regexes. Unlike grep, this tool ' + 'returns nonzero when it finds a match and zero otherwise. The ' + 'idea here being that matches are "problems".' + ), + ) + parser.add_argument('-i', '--ignore-case', action='store_true') + parser.add_argument('pattern', help='python regex pattern.') + parser.add_argument('filenames', nargs='*') + args = parser.parse_args(argv) + + flags = re.IGNORECASE if args.ignore_case else 0 + pattern = re.compile(args.pattern.encode(), flags) + + retv = 0 + for filename in args.filenames: + retv |= _process_filename_by_line(pattern, filename) + return retv + + +if __name__ == '__main__': + exit(main()) diff --git a/pre_commit/repository.py b/pre_commit/repository.py index 675c4716..6955a73e 100644 --- a/pre_commit/repository.py +++ b/pre_commit/repository.py @@ -202,8 +202,8 @@ class LocalRepository(Repository): def _cmd_runner_from_deps(self, language_name, deps): """local repositories have a cmd runner per hook""" language = languages[language_name] - # pcre / script / system / docker_image do not have environments so - # they work out of the current directory + # pcre / pygrep / script / system / docker_image do not have + # environments so they work out of the current directory if language.ENVIRONMENT_DIR is None: return PrefixedCommandRunner(git.get_root()) else: diff --git a/testing/fixtures.py b/testing/fixtures.py index befc3f53..388b344b 100644 --- a/testing/fixtures.py +++ b/testing/fixtures.py @@ -73,7 +73,7 @@ def config_with_local_hooks(): ('id', 'do_not_commit'), ('name', 'Block if "DO NOT COMMIT" is found'), ('entry', 'DO NOT COMMIT'), - ('language', 'pcre'), + ('language', 'pygrep'), ('files', '^(.*)$'), ))], ), diff --git a/testing/resources/pcre_hooks_repo/.pre-commit-hooks.yaml b/testing/resources/pcre_hooks_repo/.pre-commit-hooks.yaml deleted file mode 100644 index 709d8df3..00000000 --- a/testing/resources/pcre_hooks_repo/.pre-commit-hooks.yaml +++ /dev/null @@ -1,16 +0,0 @@ -- id: regex-with-quotes - name: Regex with quotes - entry: "foo'bar" - language: pcre - files: '' -- id: other-regex - name: Other regex - entry: ^\[INFO\] - language: pcre - files: '' -- id: regex-with-grep-args - name: Regex with grep extra arguments - entry: foo.+bar - language: pcre - files: '' - args: [-i] diff --git a/tests/languages/pygrep_test.py b/tests/languages/pygrep_test.py new file mode 100644 index 00000000..048a5908 --- /dev/null +++ b/tests/languages/pygrep_test.py @@ -0,0 +1,40 @@ +from __future__ import absolute_import +from __future__ import unicode_literals + +import pytest + +from pre_commit.languages import pygrep + + +@pytest.fixture +def some_files(tmpdir): + tmpdir.join('f1').write_binary(b'foo\nbar\n') + tmpdir.join('f2').write_binary(b'[INFO] hi\n') + tmpdir.join('f3').write_binary(b"with'quotes\n") + with tmpdir.as_cwd(): + yield + + +@pytest.mark.usefixtures('some_files') +@pytest.mark.parametrize( + ('pattern', 'expected_retcode', 'expected_out'), + ( + ('baz', 0, ''), + ('foo', 1, 'f1:1:foo\n'), + ('bar', 1, 'f1:2:bar\n'), + (r'(?i)\[info\]', 1, 'f2:1:[INFO] hi\n'), + ("h'q", 1, "f3:1:with'quotes\n"), + ), +) +def test_main(some_files, cap_out, pattern, expected_retcode, expected_out): + ret = pygrep.main((pattern, 'f1', 'f2', 'f3')) + out = cap_out.get() + assert ret == expected_retcode + assert out == expected_out + + +def test_ignore_case(some_files, cap_out): + ret = pygrep.main(('--ignore-case', 'info', 'f1', 'f2', 'f3')) + out = cap_out.get() + assert ret == 1 + assert out == 'f2:1:[INFO] hi\n' diff --git a/tests/repository_test.py b/tests/repository_test.py index 8ff9db4c..37a609ba 100644 --- a/tests/repository_test.py +++ b/tests/repository_test.py @@ -1,6 +1,7 @@ from __future__ import absolute_import from __future__ import unicode_literals +import collections import io import os.path import re @@ -36,6 +37,10 @@ from testing.util import xfailif_windows_no_node from testing.util import xfailif_windows_no_ruby +def _norm_out(b): + return b.replace(b'\r\n', b'\n') + + def _test_hook_repo( tempdir_factory, store, @@ -54,7 +59,7 @@ def _test_hook_repo( ] ret = repo.run_hook(hook_dict, args) assert ret[0] == expected_return_code - assert ret[1].replace(b'\r\n', b'\n') == expected + assert _norm_out(ret[1]) == expected @pytest.mark.integration @@ -114,7 +119,7 @@ def test_switch_language_versions_doesnt_clobber(tempdir_factory, store): ] ret = repo.run_hook(hook_dict, []) assert ret[0] == 0 - assert ret[1].replace(b'\r\n', b'\n') == expected_output + assert _norm_out(ret[1]) == expected_output run_on_version('python3.4', b'3.4\n[]\nHello World\n') run_on_version('python3.5', b'3.5\n[]\nHello World\n') @@ -277,25 +282,6 @@ def test_missing_executable(tempdir_factory, store): ) -@pytest.mark.integration -def test_missing_pcre_support(tempdir_factory, store): - orig_find_executable = parse_shebang.find_executable - - def no_grep(exe, **kwargs): - if exe == pcre.GREP: - return None - else: - return orig_find_executable(exe, **kwargs) - - with mock.patch.object(parse_shebang, 'find_executable', no_grep): - _test_hook_repo( - tempdir_factory, store, 'pcre_hooks_repo', - 'regex-with-quotes', ['/dev/null'], - 'Executable `{}` not found'.format(pcre.GREP).encode('UTF-8'), - expected_return_code=1, - ) - - @pytest.mark.integration def test_run_a_script_hook(tempdir_factory, store): _test_hook_repo( @@ -330,85 +316,88 @@ def test_run_hook_with_curly_braced_arguments(tempdir_factory, store): ) -@xfailif_no_pcre_support -@pytest.mark.integration -def test_pcre_hook_no_match(tempdir_factory, store): - path = git_dir(tempdir_factory) - with cwd(path): - with io.open('herp', 'w') as herp: - herp.write('foo') +def _make_grep_repo(language, entry, store, args=()): + config = collections.OrderedDict(( + ('repo', 'local'), + ( + 'hooks', [ + collections.OrderedDict(( + ('id', 'grep-hook'), + ('name', 'grep-hook'), + ('language', language), + ('entry', entry), + ('args', args), + ('types', ['text']), + )), + ], + ), + )) + repo = Repository.create(config, store) + (_, hook), = repo.hooks + return repo, hook - with io.open('derp', 'w') as derp: - derp.write('bar') - _test_hook_repo( - tempdir_factory, store, 'pcre_hooks_repo', - 'regex-with-quotes', ['herp', 'derp'], b'', - ) +@pytest.fixture +def greppable_files(tmpdir): + with tmpdir.as_cwd(): + cmd_output('git', 'init', '.') + tmpdir.join('f1').write_binary(b"hello'hi\nworld\n") + tmpdir.join('f2').write_binary(b'foo\nbar\nbaz\n') + tmpdir.join('f3').write_binary(b'[WARN] hi\n') + yield tmpdir - _test_hook_repo( - tempdir_factory, store, 'pcre_hooks_repo', - 'other-regex', ['herp', 'derp'], b'', - ) + +class TestPygrep(object): + language = 'pygrep' + + def test_grep_hook_matching(self, greppable_files, store): + repo, hook = _make_grep_repo(self.language, 'ello', store) + ret, out, _ = repo.run_hook(hook, ('f1', 'f2', 'f3')) + assert ret == 1 + assert _norm_out(out) == b"f1:1:hello'hi\n" + + def test_grep_hook_case_insensitive(self, greppable_files, store): + repo, hook = _make_grep_repo(self.language, 'ELLO', store, args=['-i']) + ret, out, _ = repo.run_hook(hook, ('f1', 'f2', 'f3')) + assert ret == 1 + assert _norm_out(out) == b"f1:1:hello'hi\n" + + @pytest.mark.parametrize('regex', ('nope', "foo'bar", r'^\[INFO\]')) + def test_grep_hook_not_matching(self, regex, greppable_files, store): + repo, hook = _make_grep_repo(self.language, regex, store) + ret, out, _ = repo.run_hook(hook, ('f1', 'f2', 'f3')) + assert (ret, out) == (0, b'') @xfailif_no_pcre_support -@pytest.mark.integration -def test_pcre_hook_matching(tempdir_factory, store): - path = git_dir(tempdir_factory) - with cwd(path): - with io.open('herp', 'w') as herp: - herp.write("\nherpfoo'bard\n") +class TestPCRE(TestPygrep): + """organized as a class for xfailing pcre""" + language = 'pcre' - with io.open('derp', 'w') as derp: - derp.write('[INFO] information yo\n') + def test_pcre_hook_many_files(self, greppable_files, store): + # This is intended to simulate lots of passing files and one failing + # file to make sure it still fails. This is not the case when naively + # using a system hook with `grep -H -n '...'` + repo, hook = _make_grep_repo('pcre', 'ello', store) + ret, out, _ = repo.run_hook(hook, (os.devnull,) * 15000 + ('f1',)) + assert ret == 1 + assert _norm_out(out) == b"f1:1:hello'hi\n" - _test_hook_repo( - tempdir_factory, store, 'pcre_hooks_repo', - 'regex-with-quotes', ['herp', 'derp'], b"herp:2:herpfoo'bard\n", - expected_return_code=1, - ) + def test_missing_pcre_support(self, greppable_files, store): + orig_find_executable = parse_shebang.find_executable - _test_hook_repo( - tempdir_factory, store, 'pcre_hooks_repo', - 'other-regex', ['herp', 'derp'], b'derp:1:[INFO] information yo\n', - expected_return_code=1, - ) + def no_grep(exe, **kwargs): + if exe == pcre.GREP: + return None + else: + return orig_find_executable(exe, **kwargs) - -@xfailif_no_pcre_support -@pytest.mark.integration -def test_pcre_hook_case_insensitive_option(tempdir_factory, store): - path = git_dir(tempdir_factory) - with cwd(path): - with io.open('herp', 'w') as herp: - herp.write('FoOoOoObar\n') - - _test_hook_repo( - tempdir_factory, store, 'pcre_hooks_repo', - 'regex-with-grep-args', ['herp'], b'herp:1:FoOoOoObar\n', - expected_return_code=1, - ) - - -@xfailif_no_pcre_support -@pytest.mark.integration -def test_pcre_many_files(tempdir_factory, store): - # This is intended to simulate lots of passing files and one failing file - # to make sure it still fails. This is not the case when naively using - # a system hook with `grep -H -n '...'` and expected_return_code=1. - path = git_dir(tempdir_factory) - with cwd(path): - with io.open('herp', 'w') as herp: - herp.write('[INFO] info\n') - - _test_hook_repo( - tempdir_factory, store, 'pcre_hooks_repo', - 'other-regex', - ['/dev/null'] * 15000 + ['herp'], - b'herp:1:[INFO] info\n', - expected_return_code=1, - ) + with mock.patch.object(parse_shebang, 'find_executable', no_grep): + repo, hook = _make_grep_repo('pcre', 'ello', store) + ret, out, _ = repo.run_hook(hook, ('f1', 'f2', 'f3')) + assert ret == 1 + expected = 'Executable `{}` not found'.format(pcre.GREP).encode() + assert out == expected def _norm_pwd(path): @@ -703,7 +692,7 @@ def test_local_python_repo(store): (_, hook), = repo.hooks ret = repo.run_hook(hook, ('filename',)) assert ret[0] == 0 - assert ret[1].replace(b'\r\n', b'\n') == b"['filename']\nHello World\n" + assert _norm_out(ret[1]) == b"['filename']\nHello World\n" def test_hook_id_not_present(tempdir_factory, store, fake_log_handler): diff --git a/tests/runner_test.py b/tests/runner_test.py index cfca44f3..b5c0ce75 100644 --- a/tests/runner_test.py +++ b/tests/runner_test.py @@ -70,7 +70,7 @@ def test_local_hooks(tempdir_factory, mock_out_store_directory): ('id', 'do_not_commit'), ('name', 'Block if "DO NOT COMMIT" is found'), ('entry', 'DO NOT COMMIT'), - ('language', 'pcre'), + ('language', 'pygrep'), ('files', '^(.*)$'), )), ), @@ -105,7 +105,7 @@ def test_local_hooks_alt_config(tempdir_factory, mock_out_store_directory): ('id', 'do_not_commit'), ('name', 'Block if "DO NOT COMMIT" is found'), ('entry', 'DO NOT COMMIT'), - ('language', 'pcre'), + ('language', 'pygrep'), ('files', '^(.*)$'), )), ),