Handle non-ascii filenames from git

This commit is contained in:
Anthony Sottile
2017-09-05 08:04:39 -07:00
parent 8b14c6c5ae
commit 0815108242
4 changed files with 85 additions and 29 deletions

View File

@@ -36,13 +36,6 @@ def _hook_msg_start(hook, verbose):
)
def get_changed_files(new, old):
return cmd_output(
'git', 'diff', '--no-ext-diff', '--name-only',
'{}...{}'.format(old, new),
)[1].splitlines()
def filter_filenames_by_types(filenames, types, exclude_types):
types, exclude_types = frozenset(types), frozenset(exclude_types)
ret = []
@@ -56,7 +49,7 @@ def filter_filenames_by_types(filenames, types, exclude_types):
def get_filenames(args, include_expr, exclude_expr):
if args.origin and args.source:
getter = git.get_files_matching(
lambda: get_changed_files(args.origin, args.source),
lambda: git.get_changed_files(args.origin, args.source),
)
elif args.hook_stage == 'commit-msg':
def getter(*_):

View File

@@ -15,6 +15,14 @@ from pre_commit.util import memoize_by_cwd
logger = logging.getLogger('pre_commit')
def zsplit(s):
s = s.strip('\0')
if s:
return s.split('\0')
else:
return []
def get_root():
try:
return cmd_output('git', 'rev-parse', '--show-toplevel')[1].strip()
@@ -67,25 +75,32 @@ def get_conflicted_files():
# If they resolved the merge conflict by choosing a mesh of both sides
# this will also include the conflicted files
tree_hash = cmd_output('git', 'write-tree')[1].strip()
merge_diff_filenames = cmd_output(
'git', 'diff', '--no-ext-diff',
'-m', tree_hash, 'HEAD', 'MERGE_HEAD', '--name-only',
)[1].splitlines()
merge_diff_filenames = zsplit(cmd_output(
'git', 'diff', '--name-only', '--no-ext-diff', '-z',
'-m', tree_hash, 'HEAD', 'MERGE_HEAD',
)[1])
return set(merge_conflict_filenames) | set(merge_diff_filenames)
@memoize_by_cwd
def get_staged_files():
return cmd_output(
'git', 'diff', '--staged', '--name-only', '--no-ext-diff',
return zsplit(cmd_output(
'git', 'diff', '--staged', '--name-only', '--no-ext-diff', '-z',
# Everything except for D
'--diff-filter=ACMRTUXB',
)[1].splitlines()
)[1])
@memoize_by_cwd
def get_all_files():
return cmd_output('git', 'ls-files')[1].splitlines()
return zsplit(cmd_output('git', 'ls-files', '-z')[1])
def get_changed_files(new, old):
return zsplit(cmd_output(
'git', 'diff', '--name-only', '--no-ext-diff', '-z',
'{}...{}'.format(old, new),
)[1])
def get_files_matching(all_file_list_strategy):

View File

@@ -15,7 +15,6 @@ from pre_commit.commands.install_uninstall import install
from pre_commit.commands.run import _compute_cols
from pre_commit.commands.run import _get_skips
from pre_commit.commands.run import _has_unmerged_paths
from pre_commit.commands.run import get_changed_files
from pre_commit.commands.run import run
from pre_commit.runner import Runner
from pre_commit.util import cmd_output
@@ -501,18 +500,6 @@ def test_hook_install_failure(mock_out_store_directory, tempdir_factory):
assert ''.encode('UTF-8') + '²'.encode('latin1') in stdout
def test_get_changed_files():
files = get_changed_files(
'78c682a1d13ba20e7cb735313b9314a74365cd3a',
'3387edbb1288a580b37fe25225aa0b856b18ad1a',
)
assert files == ['CHANGELOG.md', 'setup.py']
# files changed in source but not in origin should not be returned
files = get_changed_files('HEAD~10', 'HEAD')
assert files == []
def test_lots_of_files(mock_out_store_directory, tempdir_factory):
# windows xargs seems to have a bug, here's a regression test for
# our workaround

View File

@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import unicode_literals
@@ -162,3 +163,63 @@ OTHER_MERGE_MSG = MERGE_MSG + b'\tother_conflict_file\n'
def test_parse_merge_msg_for_conflicts(input, expected_output):
ret = git.parse_merge_msg_for_conflicts(input)
assert ret == expected_output
def test_get_changed_files():
files = git.get_changed_files(
'78c682a1d13ba20e7cb735313b9314a74365cd3a',
'3387edbb1288a580b37fe25225aa0b856b18ad1a',
)
assert files == ['CHANGELOG.md', 'setup.py']
# files changed in source but not in origin should not be returned
files = git.get_changed_files('HEAD~10', 'HEAD')
assert files == []
@pytest.mark.parametrize(
('s', 'expected'),
(
('foo\0bar\0', ['foo', 'bar']),
('foo\0', ['foo']),
('', []),
('foo', ['foo']),
),
)
def test_zsplit(s, expected):
assert git.zsplit(s) == expected
@pytest.fixture
def non_ascii_repo(tmpdir):
repo = tmpdir.join('repo').ensure_dir()
with repo.as_cwd():
cmd_output('git', 'init', '.')
cmd_output('git', 'commit', '--allow-empty', '-m', 'initial commit')
repo.join('интервью').ensure()
cmd_output('git', 'add', '.')
cmd_output('git', 'commit', '--allow-empty', '-m', 'initial commit')
yield repo
def test_all_files_non_ascii(non_ascii_repo):
ret = git.get_all_files()
assert ret == ['интервью']
def test_staged_files_non_ascii(non_ascii_repo):
non_ascii_repo.join('интервью').write('hi')
cmd_output('git', 'add', '.')
assert git.get_staged_files() == ['интервью']
def test_changed_files_non_ascii(non_ascii_repo):
ret = git.get_changed_files('HEAD', 'HEAD^')
assert ret == ['интервью']
def test_get_conflicted_files_non_ascii(in_merge_conflict):
open('интервью', 'a').close()
cmd_output('git', 'add', '.')
ret = git.get_conflicted_files()
assert ret == {'conflict_file', 'интервью'}