diff --git a/pre_commit/commands/run.py b/pre_commit/commands/run.py index ac418a78..8b80bef0 100644 --- a/pre_commit/commands/run.py +++ b/pre_commit/commands/run.py @@ -36,13 +36,6 @@ def _hook_msg_start(hook, verbose): ) -def get_changed_files(new, old): - return cmd_output( - 'git', 'diff', '--no-ext-diff', '--name-only', - '{}...{}'.format(old, new), - )[1].splitlines() - - def filter_filenames_by_types(filenames, types, exclude_types): types, exclude_types = frozenset(types), frozenset(exclude_types) ret = [] @@ -56,7 +49,7 @@ def filter_filenames_by_types(filenames, types, exclude_types): def get_filenames(args, include_expr, exclude_expr): if args.origin and args.source: getter = git.get_files_matching( - lambda: get_changed_files(args.origin, args.source), + lambda: git.get_changed_files(args.origin, args.source), ) elif args.hook_stage == 'commit-msg': def getter(*_): diff --git a/pre_commit/git.py b/pre_commit/git.py index 4b519c86..cdf807b5 100644 --- a/pre_commit/git.py +++ b/pre_commit/git.py @@ -15,6 +15,14 @@ from pre_commit.util import memoize_by_cwd logger = logging.getLogger('pre_commit') +def zsplit(s): + s = s.strip('\0') + if s: + return s.split('\0') + else: + return [] + + def get_root(): try: return cmd_output('git', 'rev-parse', '--show-toplevel')[1].strip() @@ -67,25 +75,32 @@ def get_conflicted_files(): # If they resolved the merge conflict by choosing a mesh of both sides # this will also include the conflicted files tree_hash = cmd_output('git', 'write-tree')[1].strip() - merge_diff_filenames = cmd_output( - 'git', 'diff', '--no-ext-diff', - '-m', tree_hash, 'HEAD', 'MERGE_HEAD', '--name-only', - )[1].splitlines() + merge_diff_filenames = zsplit(cmd_output( + 'git', 'diff', '--name-only', '--no-ext-diff', '-z', + '-m', tree_hash, 'HEAD', 'MERGE_HEAD', + )[1]) return set(merge_conflict_filenames) | set(merge_diff_filenames) @memoize_by_cwd def get_staged_files(): - return cmd_output( - 'git', 'diff', '--staged', '--name-only', '--no-ext-diff', + return zsplit(cmd_output( + 'git', 'diff', '--staged', '--name-only', '--no-ext-diff', '-z', # Everything except for D '--diff-filter=ACMRTUXB', - )[1].splitlines() + )[1]) @memoize_by_cwd def get_all_files(): - return cmd_output('git', 'ls-files')[1].splitlines() + return zsplit(cmd_output('git', 'ls-files', '-z')[1]) + + +def get_changed_files(new, old): + return zsplit(cmd_output( + 'git', 'diff', '--name-only', '--no-ext-diff', '-z', + '{}...{}'.format(old, new), + )[1]) def get_files_matching(all_file_list_strategy): diff --git a/tests/commands/run_test.py b/tests/commands/run_test.py index 924d097f..5ed0ad8a 100644 --- a/tests/commands/run_test.py +++ b/tests/commands/run_test.py @@ -15,7 +15,6 @@ from pre_commit.commands.install_uninstall import install from pre_commit.commands.run import _compute_cols from pre_commit.commands.run import _get_skips from pre_commit.commands.run import _has_unmerged_paths -from pre_commit.commands.run import get_changed_files from pre_commit.commands.run import run from pre_commit.runner import Runner from pre_commit.util import cmd_output @@ -501,18 +500,6 @@ def test_hook_install_failure(mock_out_store_directory, tempdir_factory): assert '☃'.encode('UTF-8') + '²'.encode('latin1') in stdout -def test_get_changed_files(): - files = get_changed_files( - '78c682a1d13ba20e7cb735313b9314a74365cd3a', - '3387edbb1288a580b37fe25225aa0b856b18ad1a', - ) - assert files == ['CHANGELOG.md', 'setup.py'] - - # files changed in source but not in origin should not be returned - files = get_changed_files('HEAD~10', 'HEAD') - assert files == [] - - def test_lots_of_files(mock_out_store_directory, tempdir_factory): # windows xargs seems to have a bug, here's a regression test for # our workaround diff --git a/tests/git_test.py b/tests/git_test.py index 0500a42d..4f679119 100644 --- a/tests/git_test.py +++ b/tests/git_test.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import unicode_literals @@ -162,3 +163,63 @@ OTHER_MERGE_MSG = MERGE_MSG + b'\tother_conflict_file\n' def test_parse_merge_msg_for_conflicts(input, expected_output): ret = git.parse_merge_msg_for_conflicts(input) assert ret == expected_output + + +def test_get_changed_files(): + files = git.get_changed_files( + '78c682a1d13ba20e7cb735313b9314a74365cd3a', + '3387edbb1288a580b37fe25225aa0b856b18ad1a', + ) + assert files == ['CHANGELOG.md', 'setup.py'] + + # files changed in source but not in origin should not be returned + files = git.get_changed_files('HEAD~10', 'HEAD') + assert files == [] + + +@pytest.mark.parametrize( + ('s', 'expected'), + ( + ('foo\0bar\0', ['foo', 'bar']), + ('foo\0', ['foo']), + ('', []), + ('foo', ['foo']), + ), +) +def test_zsplit(s, expected): + assert git.zsplit(s) == expected + + +@pytest.fixture +def non_ascii_repo(tmpdir): + repo = tmpdir.join('repo').ensure_dir() + with repo.as_cwd(): + cmd_output('git', 'init', '.') + cmd_output('git', 'commit', '--allow-empty', '-m', 'initial commit') + repo.join('интервью').ensure() + cmd_output('git', 'add', '.') + cmd_output('git', 'commit', '--allow-empty', '-m', 'initial commit') + yield repo + + +def test_all_files_non_ascii(non_ascii_repo): + ret = git.get_all_files() + assert ret == ['интервью'] + + +def test_staged_files_non_ascii(non_ascii_repo): + non_ascii_repo.join('интервью').write('hi') + cmd_output('git', 'add', '.') + assert git.get_staged_files() == ['интервью'] + + +def test_changed_files_non_ascii(non_ascii_repo): + ret = git.get_changed_files('HEAD', 'HEAD^') + assert ret == ['интервью'] + + +def test_get_conflicted_files_non_ascii(in_merge_conflict): + open('интервью', 'a').close() + cmd_output('git', 'add', '.') + ret = git.get_conflicted_files() + assert ret == {'conflict_file', 'интервью'}