From e40a151e8c51b001d6bb083d03126e79c0bfce86 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Mon, 23 Jun 2014 06:59:15 -0700 Subject: [PATCH] Treat diffs as maybe-not-utf8. --- pre_commit/prefixed_command_runner.py | 12 ++++++------ pre_commit/staged_files_only.py | 9 +++++---- tests/staged_files_only_test.py | 24 +++++++++++++++++++++--- 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/pre_commit/prefixed_command_runner.py b/pre_commit/prefixed_command_runner.py index bcefe8e6..09a93641 100644 --- a/pre_commit/prefixed_command_runner.py +++ b/pre_commit/prefixed_command_runner.py @@ -56,24 +56,24 @@ class PrefixedCommandRunner(object): if not os.path.exists(self.prefix_dir): self.__makedirs(self.prefix_dir) - def run(self, cmd, retcode=0, stdin=None, **kwargs): + def run(self, cmd, retcode=0, stdin=None, encoding='UTF-8', **kwargs): popen_kwargs = { 'stdin': subprocess.PIPE, 'stdout': subprocess.PIPE, 'stderr': subprocess.PIPE, } if stdin is not None: - stdin = stdin.encode('utf-8') + stdin = stdin.encode('UTF-8') popen_kwargs.update(kwargs) self._create_path_if_not_exists() replaced_cmd = _replace_cmd(cmd, prefix=self.prefix_dir) proc = self.__popen(replaced_cmd, **popen_kwargs) stdout, stderr = proc.communicate(stdin) - if isinstance(stdout, bytes): - stdout = stdout.decode('UTF-8') - if isinstance(stderr, bytes): - stderr = stderr.decode('UTF-8') + if encoding is not None: + stdout = stdout.decode(encoding) + if encoding is not None: + stderr = stderr.decode(encoding) returncode = proc.returncode if retcode is not None and retcode != returncode: diff --git a/pre_commit/staged_files_only.py b/pre_commit/staged_files_only.py index 6b68a5ca..ff6f1eb7 100644 --- a/pre_commit/staged_files_only.py +++ b/pre_commit/staged_files_only.py @@ -20,19 +20,20 @@ def staged_files_only(cmd_runner): cmd_runner - PrefixedCommandRunner """ # Determine if there are unstaged files - retcode, diff_stdout, _ = cmd_runner.run( + retcode, diff_stdout_binary, _ = cmd_runner.run( ['git', 'diff', '--ignore-submodules', '--binary', '--exit-code'], retcode=None, + encoding=None, ) - if retcode and diff_stdout.strip(): + if retcode and diff_stdout_binary.strip(): patch_filename = cmd_runner.path('patch{0}'.format(int(time.time()))) logger.warning('Unstaged files detected.') logger.info( 'Stashing unstaged files to {0}.'.format(patch_filename), ) # Save the current unstaged changes as a patch - with io.open(patch_filename, 'w', encoding='utf-8') as patch_file: - patch_file.write(diff_stdout) + with io.open(patch_filename, 'wb') as patch_file: + patch_file.write(diff_stdout_binary) # Clear the working directory of unstaged changes cmd_runner.run(['git', 'checkout', '--', '.']) diff --git a/tests/staged_files_only_test.py b/tests/staged_files_only_test.py index e549bd42..a51d5016 100644 --- a/tests/staged_files_only_test.py +++ b/tests/staged_files_only_test.py @@ -1,3 +1,4 @@ +# -*- coding: UTF-8 -*- from __future__ import absolute_import from __future__ import unicode_literals @@ -34,9 +35,14 @@ def foo_staged(tmpdir_factory): yield auto_namedtuple(path=path, foo_filename=foo_filename) -def _test_foo_state(path, foo_contents=FOO_CONTENTS, status='A'): +def _test_foo_state( + path, + foo_contents=FOO_CONTENTS, + status='A', + encoding='UTF-8', +): assert os.path.exists(path.foo_filename) - assert io.open(path.foo_filename, encoding='utf-8').read() == foo_contents + assert io.open(path.foo_filename, encoding=encoding).read() == foo_contents actual_status = get_short_git_status()['foo'] assert status == actual_status @@ -246,10 +252,22 @@ def test_diff_returns_1_no_diff_though(fake_logging_handler, foo_staged): def test_stage_utf8_changes(foo_staged, cmd_runner): contents = '\u2603' - with io.open('foo', 'w', encoding='utf-8') as foo_file: + with io.open('foo', 'w', encoding='UTF-8') as foo_file: foo_file.write(contents) _test_foo_state(foo_staged, contents, 'AM') with staged_files_only(cmd_runner): _test_foo_state(foo_staged) _test_foo_state(foo_staged, contents, 'AM') + + +def test_stage_non_utf8_changes(foo_staged, cmd_runner): + contents = 'รบ' + # Produce a latin-1 diff + with io.open('foo', 'w', encoding='latin-1') as foo_file: + foo_file.write(contents) + + _test_foo_state(foo_staged, contents, 'AM', encoding='latin-1') + with staged_files_only(cmd_runner): + _test_foo_state(foo_staged) + _test_foo_state(foo_staged, contents, 'AM', encoding='latin-1')