Merge pull request #630 from pre-commit/pygrep

Implement pygrep language as a replacement for pcre
This commit is contained in:
Anthony Sottile
2017-09-24 11:31:09 -07:00
committed by GitHub
8 changed files with 186 additions and 112 deletions

View File

@@ -5,6 +5,7 @@ from pre_commit.languages import docker_image
from pre_commit.languages import golang
from pre_commit.languages import node
from pre_commit.languages import pcre
from pre_commit.languages import pygrep
from pre_commit.languages import python
from pre_commit.languages import ruby
from pre_commit.languages import script
@@ -54,6 +55,7 @@ languages = {
'golang': golang,
'node': node,
'pcre': pcre,
'pygrep': pygrep,
'python': python,
'ruby': ruby,
'script': script,

View File

@@ -0,0 +1,59 @@
from __future__ import absolute_import
from __future__ import unicode_literals
import argparse
import re
import sys
from pre_commit import output
from pre_commit.languages import helpers
from pre_commit.xargs import xargs
ENVIRONMENT_DIR = None
get_default_version = helpers.basic_get_default_version
healthy = helpers.basic_healthy
install_environment = helpers.no_install
def _process_filename_by_line(pattern, filename):
retv = 0
with open(filename, 'rb') as f:
for line_no, line in enumerate(f, start=1):
if pattern.search(line):
retv = 1
output.write('{}:{}:'.format(filename, line_no))
output.write_line(line.rstrip(b'\r\n'))
return retv
def run_hook(repo_cmd_runner, hook, file_args):
exe = (sys.executable, '-m', __name__)
exe += tuple(hook['args']) + (hook['entry'],)
return xargs(exe, file_args)
def main(argv=None):
parser = argparse.ArgumentParser(
description=(
'grep-like finder using python regexes. Unlike grep, this tool '
'returns nonzero when it finds a match and zero otherwise. The '
'idea here being that matches are "problems".'
),
)
parser.add_argument('-i', '--ignore-case', action='store_true')
parser.add_argument('pattern', help='python regex pattern.')
parser.add_argument('filenames', nargs='*')
args = parser.parse_args(argv)
flags = re.IGNORECASE if args.ignore_case else 0
pattern = re.compile(args.pattern.encode(), flags)
retv = 0
for filename in args.filenames:
retv |= _process_filename_by_line(pattern, filename)
return retv
if __name__ == '__main__':
exit(main())

View File

@@ -202,8 +202,8 @@ class LocalRepository(Repository):
def _cmd_runner_from_deps(self, language_name, deps):
"""local repositories have a cmd runner per hook"""
language = languages[language_name]
# pcre / script / system / docker_image do not have environments so
# they work out of the current directory
# pcre / pygrep / script / system / docker_image do not have
# environments so they work out of the current directory
if language.ENVIRONMENT_DIR is None:
return PrefixedCommandRunner(git.get_root())
else:

View File

@@ -73,7 +73,7 @@ def config_with_local_hooks():
('id', 'do_not_commit'),
('name', 'Block if "DO NOT COMMIT" is found'),
('entry', 'DO NOT COMMIT'),
('language', 'pcre'),
('language', 'pygrep'),
('files', '^(.*)$'),
))],
),

View File

@@ -1,16 +0,0 @@
- id: regex-with-quotes
name: Regex with quotes
entry: "foo'bar"
language: pcre
files: ''
- id: other-regex
name: Other regex
entry: ^\[INFO\]
language: pcre
files: ''
- id: regex-with-grep-args
name: Regex with grep extra arguments
entry: foo.+bar
language: pcre
files: ''
args: [-i]

View File

@@ -0,0 +1,40 @@
from __future__ import absolute_import
from __future__ import unicode_literals
import pytest
from pre_commit.languages import pygrep
@pytest.fixture
def some_files(tmpdir):
tmpdir.join('f1').write_binary(b'foo\nbar\n')
tmpdir.join('f2').write_binary(b'[INFO] hi\n')
tmpdir.join('f3').write_binary(b"with'quotes\n")
with tmpdir.as_cwd():
yield
@pytest.mark.usefixtures('some_files')
@pytest.mark.parametrize(
('pattern', 'expected_retcode', 'expected_out'),
(
('baz', 0, ''),
('foo', 1, 'f1:1:foo\n'),
('bar', 1, 'f1:2:bar\n'),
(r'(?i)\[info\]', 1, 'f2:1:[INFO] hi\n'),
("h'q", 1, "f3:1:with'quotes\n"),
),
)
def test_main(some_files, cap_out, pattern, expected_retcode, expected_out):
ret = pygrep.main((pattern, 'f1', 'f2', 'f3'))
out = cap_out.get()
assert ret == expected_retcode
assert out == expected_out
def test_ignore_case(some_files, cap_out):
ret = pygrep.main(('--ignore-case', 'info', 'f1', 'f2', 'f3'))
out = cap_out.get()
assert ret == 1
assert out == 'f2:1:[INFO] hi\n'

View File

@@ -1,6 +1,7 @@
from __future__ import absolute_import
from __future__ import unicode_literals
import collections
import io
import os.path
import re
@@ -36,6 +37,10 @@ from testing.util import xfailif_windows_no_node
from testing.util import xfailif_windows_no_ruby
def _norm_out(b):
return b.replace(b'\r\n', b'\n')
def _test_hook_repo(
tempdir_factory,
store,
@@ -54,7 +59,7 @@ def _test_hook_repo(
]
ret = repo.run_hook(hook_dict, args)
assert ret[0] == expected_return_code
assert ret[1].replace(b'\r\n', b'\n') == expected
assert _norm_out(ret[1]) == expected
@pytest.mark.integration
@@ -114,7 +119,7 @@ def test_switch_language_versions_doesnt_clobber(tempdir_factory, store):
]
ret = repo.run_hook(hook_dict, [])
assert ret[0] == 0
assert ret[1].replace(b'\r\n', b'\n') == expected_output
assert _norm_out(ret[1]) == expected_output
run_on_version('python3.4', b'3.4\n[]\nHello World\n')
run_on_version('python3.5', b'3.5\n[]\nHello World\n')
@@ -277,25 +282,6 @@ def test_missing_executable(tempdir_factory, store):
)
@pytest.mark.integration
def test_missing_pcre_support(tempdir_factory, store):
orig_find_executable = parse_shebang.find_executable
def no_grep(exe, **kwargs):
if exe == pcre.GREP:
return None
else:
return orig_find_executable(exe, **kwargs)
with mock.patch.object(parse_shebang, 'find_executable', no_grep):
_test_hook_repo(
tempdir_factory, store, 'pcre_hooks_repo',
'regex-with-quotes', ['/dev/null'],
'Executable `{}` not found'.format(pcre.GREP).encode('UTF-8'),
expected_return_code=1,
)
@pytest.mark.integration
def test_run_a_script_hook(tempdir_factory, store):
_test_hook_repo(
@@ -330,85 +316,88 @@ def test_run_hook_with_curly_braced_arguments(tempdir_factory, store):
)
@xfailif_no_pcre_support
@pytest.mark.integration
def test_pcre_hook_no_match(tempdir_factory, store):
path = git_dir(tempdir_factory)
with cwd(path):
with io.open('herp', 'w') as herp:
herp.write('foo')
def _make_grep_repo(language, entry, store, args=()):
config = collections.OrderedDict((
('repo', 'local'),
(
'hooks', [
collections.OrderedDict((
('id', 'grep-hook'),
('name', 'grep-hook'),
('language', language),
('entry', entry),
('args', args),
('types', ['text']),
)),
],
),
))
repo = Repository.create(config, store)
(_, hook), = repo.hooks
return repo, hook
with io.open('derp', 'w') as derp:
derp.write('bar')
_test_hook_repo(
tempdir_factory, store, 'pcre_hooks_repo',
'regex-with-quotes', ['herp', 'derp'], b'',
)
@pytest.fixture
def greppable_files(tmpdir):
with tmpdir.as_cwd():
cmd_output('git', 'init', '.')
tmpdir.join('f1').write_binary(b"hello'hi\nworld\n")
tmpdir.join('f2').write_binary(b'foo\nbar\nbaz\n')
tmpdir.join('f3').write_binary(b'[WARN] hi\n')
yield tmpdir
_test_hook_repo(
tempdir_factory, store, 'pcre_hooks_repo',
'other-regex', ['herp', 'derp'], b'',
)
class TestPygrep(object):
language = 'pygrep'
def test_grep_hook_matching(self, greppable_files, store):
repo, hook = _make_grep_repo(self.language, 'ello', store)
ret, out, _ = repo.run_hook(hook, ('f1', 'f2', 'f3'))
assert ret == 1
assert _norm_out(out) == b"f1:1:hello'hi\n"
def test_grep_hook_case_insensitive(self, greppable_files, store):
repo, hook = _make_grep_repo(self.language, 'ELLO', store, args=['-i'])
ret, out, _ = repo.run_hook(hook, ('f1', 'f2', 'f3'))
assert ret == 1
assert _norm_out(out) == b"f1:1:hello'hi\n"
@pytest.mark.parametrize('regex', ('nope', "foo'bar", r'^\[INFO\]'))
def test_grep_hook_not_matching(self, regex, greppable_files, store):
repo, hook = _make_grep_repo(self.language, regex, store)
ret, out, _ = repo.run_hook(hook, ('f1', 'f2', 'f3'))
assert (ret, out) == (0, b'')
@xfailif_no_pcre_support
@pytest.mark.integration
def test_pcre_hook_matching(tempdir_factory, store):
path = git_dir(tempdir_factory)
with cwd(path):
with io.open('herp', 'w') as herp:
herp.write("\nherpfoo'bard\n")
class TestPCRE(TestPygrep):
"""organized as a class for xfailing pcre"""
language = 'pcre'
with io.open('derp', 'w') as derp:
derp.write('[INFO] information yo\n')
def test_pcre_hook_many_files(self, greppable_files, store):
# This is intended to simulate lots of passing files and one failing
# file to make sure it still fails. This is not the case when naively
# using a system hook with `grep -H -n '...'`
repo, hook = _make_grep_repo('pcre', 'ello', store)
ret, out, _ = repo.run_hook(hook, (os.devnull,) * 15000 + ('f1',))
assert ret == 1
assert _norm_out(out) == b"f1:1:hello'hi\n"
_test_hook_repo(
tempdir_factory, store, 'pcre_hooks_repo',
'regex-with-quotes', ['herp', 'derp'], b"herp:2:herpfoo'bard\n",
expected_return_code=1,
)
def test_missing_pcre_support(self, greppable_files, store):
orig_find_executable = parse_shebang.find_executable
_test_hook_repo(
tempdir_factory, store, 'pcre_hooks_repo',
'other-regex', ['herp', 'derp'], b'derp:1:[INFO] information yo\n',
expected_return_code=1,
)
def no_grep(exe, **kwargs):
if exe == pcre.GREP:
return None
else:
return orig_find_executable(exe, **kwargs)
@xfailif_no_pcre_support
@pytest.mark.integration
def test_pcre_hook_case_insensitive_option(tempdir_factory, store):
path = git_dir(tempdir_factory)
with cwd(path):
with io.open('herp', 'w') as herp:
herp.write('FoOoOoObar\n')
_test_hook_repo(
tempdir_factory, store, 'pcre_hooks_repo',
'regex-with-grep-args', ['herp'], b'herp:1:FoOoOoObar\n',
expected_return_code=1,
)
@xfailif_no_pcre_support
@pytest.mark.integration
def test_pcre_many_files(tempdir_factory, store):
# This is intended to simulate lots of passing files and one failing file
# to make sure it still fails. This is not the case when naively using
# a system hook with `grep -H -n '...'` and expected_return_code=1.
path = git_dir(tempdir_factory)
with cwd(path):
with io.open('herp', 'w') as herp:
herp.write('[INFO] info\n')
_test_hook_repo(
tempdir_factory, store, 'pcre_hooks_repo',
'other-regex',
['/dev/null'] * 15000 + ['herp'],
b'herp:1:[INFO] info\n',
expected_return_code=1,
)
with mock.patch.object(parse_shebang, 'find_executable', no_grep):
repo, hook = _make_grep_repo('pcre', 'ello', store)
ret, out, _ = repo.run_hook(hook, ('f1', 'f2', 'f3'))
assert ret == 1
expected = 'Executable `{}` not found'.format(pcre.GREP).encode()
assert out == expected
def _norm_pwd(path):
@@ -703,7 +692,7 @@ def test_local_python_repo(store):
(_, hook), = repo.hooks
ret = repo.run_hook(hook, ('filename',))
assert ret[0] == 0
assert ret[1].replace(b'\r\n', b'\n') == b"['filename']\nHello World\n"
assert _norm_out(ret[1]) == b"['filename']\nHello World\n"
def test_hook_id_not_present(tempdir_factory, store, fake_log_handler):

View File

@@ -70,7 +70,7 @@ def test_local_hooks(tempdir_factory, mock_out_store_directory):
('id', 'do_not_commit'),
('name', 'Block if "DO NOT COMMIT" is found'),
('entry', 'DO NOT COMMIT'),
('language', 'pcre'),
('language', 'pygrep'),
('files', '^(.*)$'),
)),
),
@@ -105,7 +105,7 @@ def test_local_hooks_alt_config(tempdir_factory, mock_out_store_directory):
('id', 'do_not_commit'),
('name', 'Block if "DO NOT COMMIT" is found'),
('entry', 'DO NOT COMMIT'),
('language', 'pcre'),
('language', 'pygrep'),
('files', '^(.*)$'),
)),
),