mirror of
https://github.com/pre-commit/pre-commit.git
synced 2026-01-13 20:40:08 -06:00
Merge pull request #605 from pre-commit/non_ascii_filenames
Handle non-ascii filenames from git
This commit is contained in:
@@ -36,13 +36,6 @@ def _hook_msg_start(hook, verbose):
|
||||
)
|
||||
|
||||
|
||||
def get_changed_files(new, old):
|
||||
return cmd_output(
|
||||
'git', 'diff', '--no-ext-diff', '--name-only',
|
||||
'{}...{}'.format(old, new),
|
||||
)[1].splitlines()
|
||||
|
||||
|
||||
def filter_filenames_by_types(filenames, types, exclude_types):
|
||||
types, exclude_types = frozenset(types), frozenset(exclude_types)
|
||||
ret = []
|
||||
@@ -56,7 +49,7 @@ def filter_filenames_by_types(filenames, types, exclude_types):
|
||||
def get_filenames(args, include_expr, exclude_expr):
|
||||
if args.origin and args.source:
|
||||
getter = git.get_files_matching(
|
||||
lambda: get_changed_files(args.origin, args.source),
|
||||
lambda: git.get_changed_files(args.origin, args.source),
|
||||
)
|
||||
elif args.hook_stage == 'commit-msg':
|
||||
def getter(*_):
|
||||
|
||||
@@ -15,6 +15,14 @@ from pre_commit.util import memoize_by_cwd
|
||||
logger = logging.getLogger('pre_commit')
|
||||
|
||||
|
||||
def zsplit(s):
|
||||
s = s.strip('\0')
|
||||
if s:
|
||||
return s.split('\0')
|
||||
else:
|
||||
return []
|
||||
|
||||
|
||||
def get_root():
|
||||
try:
|
||||
return cmd_output('git', 'rev-parse', '--show-toplevel')[1].strip()
|
||||
@@ -67,25 +75,32 @@ def get_conflicted_files():
|
||||
# If they resolved the merge conflict by choosing a mesh of both sides
|
||||
# this will also include the conflicted files
|
||||
tree_hash = cmd_output('git', 'write-tree')[1].strip()
|
||||
merge_diff_filenames = cmd_output(
|
||||
'git', 'diff', '--no-ext-diff',
|
||||
'-m', tree_hash, 'HEAD', 'MERGE_HEAD', '--name-only',
|
||||
)[1].splitlines()
|
||||
merge_diff_filenames = zsplit(cmd_output(
|
||||
'git', 'diff', '--name-only', '--no-ext-diff', '-z',
|
||||
'-m', tree_hash, 'HEAD', 'MERGE_HEAD',
|
||||
)[1])
|
||||
return set(merge_conflict_filenames) | set(merge_diff_filenames)
|
||||
|
||||
|
||||
@memoize_by_cwd
|
||||
def get_staged_files():
|
||||
return cmd_output(
|
||||
'git', 'diff', '--staged', '--name-only', '--no-ext-diff',
|
||||
return zsplit(cmd_output(
|
||||
'git', 'diff', '--staged', '--name-only', '--no-ext-diff', '-z',
|
||||
# Everything except for D
|
||||
'--diff-filter=ACMRTUXB',
|
||||
)[1].splitlines()
|
||||
)[1])
|
||||
|
||||
|
||||
@memoize_by_cwd
|
||||
def get_all_files():
|
||||
return cmd_output('git', 'ls-files')[1].splitlines()
|
||||
return zsplit(cmd_output('git', 'ls-files', '-z')[1])
|
||||
|
||||
|
||||
def get_changed_files(new, old):
|
||||
return zsplit(cmd_output(
|
||||
'git', 'diff', '--name-only', '--no-ext-diff', '-z',
|
||||
'{}...{}'.format(old, new),
|
||||
)[1])
|
||||
|
||||
|
||||
def get_files_matching(all_file_list_strategy):
|
||||
|
||||
@@ -15,7 +15,6 @@ from pre_commit.commands.install_uninstall import install
|
||||
from pre_commit.commands.run import _compute_cols
|
||||
from pre_commit.commands.run import _get_skips
|
||||
from pre_commit.commands.run import _has_unmerged_paths
|
||||
from pre_commit.commands.run import get_changed_files
|
||||
from pre_commit.commands.run import run
|
||||
from pre_commit.runner import Runner
|
||||
from pre_commit.util import cmd_output
|
||||
@@ -501,18 +500,6 @@ def test_hook_install_failure(mock_out_store_directory, tempdir_factory):
|
||||
assert '☃'.encode('UTF-8') + '²'.encode('latin1') in stdout
|
||||
|
||||
|
||||
def test_get_changed_files():
|
||||
files = get_changed_files(
|
||||
'78c682a1d13ba20e7cb735313b9314a74365cd3a',
|
||||
'3387edbb1288a580b37fe25225aa0b856b18ad1a',
|
||||
)
|
||||
assert files == ['CHANGELOG.md', 'setup.py']
|
||||
|
||||
# files changed in source but not in origin should not be returned
|
||||
files = get_changed_files('HEAD~10', 'HEAD')
|
||||
assert files == []
|
||||
|
||||
|
||||
def test_lots_of_files(mock_out_store_directory, tempdir_factory):
|
||||
# windows xargs seems to have a bug, here's a regression test for
|
||||
# our workaround
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import
|
||||
from __future__ import unicode_literals
|
||||
|
||||
@@ -162,3 +163,63 @@ OTHER_MERGE_MSG = MERGE_MSG + b'\tother_conflict_file\n'
|
||||
def test_parse_merge_msg_for_conflicts(input, expected_output):
|
||||
ret = git.parse_merge_msg_for_conflicts(input)
|
||||
assert ret == expected_output
|
||||
|
||||
|
||||
def test_get_changed_files():
|
||||
files = git.get_changed_files(
|
||||
'78c682a1d13ba20e7cb735313b9314a74365cd3a',
|
||||
'3387edbb1288a580b37fe25225aa0b856b18ad1a',
|
||||
)
|
||||
assert files == ['CHANGELOG.md', 'setup.py']
|
||||
|
||||
# files changed in source but not in origin should not be returned
|
||||
files = git.get_changed_files('HEAD~10', 'HEAD')
|
||||
assert files == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
('s', 'expected'),
|
||||
(
|
||||
('foo\0bar\0', ['foo', 'bar']),
|
||||
('foo\0', ['foo']),
|
||||
('', []),
|
||||
('foo', ['foo']),
|
||||
),
|
||||
)
|
||||
def test_zsplit(s, expected):
|
||||
assert git.zsplit(s) == expected
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def non_ascii_repo(tmpdir):
|
||||
repo = tmpdir.join('repo').ensure_dir()
|
||||
with repo.as_cwd():
|
||||
cmd_output('git', 'init', '.')
|
||||
cmd_output('git', 'commit', '--allow-empty', '-m', 'initial commit')
|
||||
repo.join('интервью').ensure()
|
||||
cmd_output('git', 'add', '.')
|
||||
cmd_output('git', 'commit', '--allow-empty', '-m', 'initial commit')
|
||||
yield repo
|
||||
|
||||
|
||||
def test_all_files_non_ascii(non_ascii_repo):
|
||||
ret = git.get_all_files()
|
||||
assert ret == ['интервью']
|
||||
|
||||
|
||||
def test_staged_files_non_ascii(non_ascii_repo):
|
||||
non_ascii_repo.join('интервью').write('hi')
|
||||
cmd_output('git', 'add', '.')
|
||||
assert git.get_staged_files() == ['интервью']
|
||||
|
||||
|
||||
def test_changed_files_non_ascii(non_ascii_repo):
|
||||
ret = git.get_changed_files('HEAD', 'HEAD^')
|
||||
assert ret == ['интервью']
|
||||
|
||||
|
||||
def test_get_conflicted_files_non_ascii(in_merge_conflict):
|
||||
open('интервью', 'a').close()
|
||||
cmd_output('git', 'add', '.')
|
||||
ret = git.get_conflicted_files()
|
||||
assert ret == {'conflict_file', 'интервью'}
|
||||
|
||||
Reference in New Issue
Block a user