diff --git a/pre_commit/file_lock.py b/pre_commit/file_lock.py new file mode 100644 index 00000000..054ac529 --- /dev/null +++ b/pre_commit/file_lock.py @@ -0,0 +1,53 @@ +import contextlib +import errno + + +try: # pragma: no cover (windows) + import msvcrt + + # https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/locking + + # on windows we lock "regions" of files, we don't care about the actual + # byte region so we'll just pick *some* number here. + _region = 0xffff + + @contextlib.contextmanager + def _locked(fileno): + while True: + try: + msvcrt.locking(fileno, msvcrt.LK_LOCK, _region) + except OSError as e: + # Locking violation. Returned when the _LK_LOCK or _LK_RLCK + # flag is specified and the file cannot be locked after 10 + # attempts. + if e.errno != errno.EDEADLOCK: + raise + else: + break + + try: + yield + finally: + # From cursory testing, it seems to get unlocked when the file is + # closed so this may not be necessary. + # The documentation however states: + # "Regions should be locked only briefly and should be unlocked + # before closing a file or exiting the program." + msvcrt.locking(fileno, msvcrt.LK_UNLCK, _region) +except ImportError: # pragma: no cover (posix) + import fcntl + + @contextlib.contextmanager + def _locked(fileno): + fcntl.flock(fileno, fcntl.LOCK_EX) + try: + yield + finally: + fcntl.flock(fileno, fcntl.LOCK_UN) + + +@contextlib.contextmanager +def lock(path): + with open(path, 'a+') as f: + with _locked(f.fileno()): + yield diff --git a/pre_commit/repository.py b/pre_commit/repository.py index d2d30dfc..18f902cb 100644 --- a/pre_commit/repository.py +++ b/pre_commit/repository.py @@ -64,34 +64,42 @@ def _installed(cmd_runner, language_name, language_version, additional_deps): ) -def _install_all(venvs, repo_url): +def _install_all(venvs, repo_url, store): """Tuple of (cmd_runner, language, version, deps)""" - need_installed = tuple( - (cmd_runner, language_name, version, deps) - for cmd_runner, language_name, version, deps in venvs - if not _installed(cmd_runner, language_name, version, deps) - ) + def _need_installed(): + return tuple( + (cmd_runner, language_name, version, deps) + for cmd_runner, language_name, version, deps in venvs + if not _installed(cmd_runner, language_name, version, deps) + ) + + if not _need_installed(): + return + with store.exclusive_lock(): + # Another process may have already completed this work + need_installed = _need_installed() + if not need_installed: # pragma: no cover (race) + return - if need_installed: logger.info( 'Installing environment for {}.'.format(repo_url), ) logger.info('Once installed this environment will be reused.') logger.info('This may take a few minutes...') - for cmd_runner, language_name, version, deps in need_installed: - language = languages[language_name] - venv = environment_dir(language.ENVIRONMENT_DIR, version) + for cmd_runner, language_name, version, deps in need_installed: + language = languages[language_name] + venv = environment_dir(language.ENVIRONMENT_DIR, version) - # There's potentially incomplete cleanup from previous runs - # Clean it up! - if cmd_runner.exists(venv): - shutil.rmtree(cmd_runner.path(venv)) + # There's potentially incomplete cleanup from previous runs + # Clean it up! + if cmd_runner.exists(venv): + shutil.rmtree(cmd_runner.path(venv)) - language.install_environment(cmd_runner, version, deps) - # Write our state to indicate we're installed - state = _state(deps) - _write_state(cmd_runner, venv, state) + language.install_environment(cmd_runner, version, deps) + # Write our state to indicate we're installed + state = _state(deps) + _write_state(cmd_runner, venv, state) def _validate_minimum_version(hook): @@ -174,7 +182,7 @@ class Repository(object): def require_installed(self): if not self.__installed: - _install_all(self._venvs, self.repo_config['repo']) + _install_all(self._venvs, self.repo_config['repo'], self.store) self.__installed = True def run_hook(self, hook, file_args): diff --git a/pre_commit/store.py b/pre_commit/store.py index 84fc2123..29237870 100644 --- a/pre_commit/store.py +++ b/pre_commit/store.py @@ -10,6 +10,7 @@ import tempfile from cached_property import cached_property import pre_commit.constants as C +from pre_commit import file_lock from pre_commit.prefixed_command_runner import PrefixedCommandRunner from pre_commit.util import clean_path_on_failure from pre_commit.util import cmd_output @@ -37,13 +38,20 @@ def _get_default_directory(): class Store(object): get_default_directory = staticmethod(_get_default_directory) + __created = False def __init__(self, directory=None): if directory is None: directory = self.get_default_directory() self.directory = directory - self.__created = False + + @contextlib.contextmanager + def exclusive_lock(self, quiet=False): + if not quiet: + logger.info('Locking pre-commit directory') + with file_lock.lock(os.path.join(self.directory, '.lock')): + yield def _write_readme(self): with io.open(os.path.join(self.directory, 'README'), 'w') as readme: @@ -75,12 +83,17 @@ class Store(object): os.rename(tmpfile, self.db_path) def _create(self): - if os.path.exists(self.db_path): - return if not os.path.exists(self.directory): os.makedirs(self.directory) self._write_readme() - self._write_sqlite_db() + + if os.path.exists(self.db_path): + return + with self.exclusive_lock(quiet=True): + # Another process may have already completed this work + if os.path.exists(self.db_path): # pragma: no cover (race) + return + self._write_sqlite_db() def require_created(self): """Require the pre-commit file store to be created.""" @@ -91,27 +104,37 @@ class Store(object): def _new_repo(self, repo, ref, make_strategy): self.require_created() - # Check if we already exist - with sqlite3.connect(self.db_path) as db: - result = db.execute( - 'SELECT path FROM repos WHERE repo = ? AND ref = ?', - [repo, ref], - ).fetchone() - if result: - return result[0] + def _get_result(): + # Check if we already exist + with sqlite3.connect(self.db_path) as db: + result = db.execute( + 'SELECT path FROM repos WHERE repo = ? AND ref = ?', + [repo, ref], + ).fetchone() + if result: + return result[0] - logger.info('Initializing environment for {}.'.format(repo)) + result = _get_result() + if result: + return result + with self.exclusive_lock(): + # Another process may have already completed this work + result = _get_result() + if result: # pragma: no cover (race) + return result - directory = tempfile.mkdtemp(prefix='repo', dir=self.directory) - with clean_path_on_failure(directory): - make_strategy(directory) + logger.info('Initializing environment for {}.'.format(repo)) - # Update our db with the created repo - with sqlite3.connect(self.db_path) as db: - db.execute( - 'INSERT INTO repos (repo, ref, path) VALUES (?, ?, ?)', - [repo, ref, directory], - ) + directory = tempfile.mkdtemp(prefix='repo', dir=self.directory) + with clean_path_on_failure(directory): + make_strategy(directory) + + # Update our db with the created repo + with sqlite3.connect(self.db_path) as db: + db.execute( + 'INSERT INTO repos (repo, ref, path) VALUES (?, ?, ?)', + [repo, ref, directory], + ) return directory def clone(self, repo, ref): diff --git a/tests/commands/install_uninstall_test.py b/tests/commands/install_uninstall_test.py index 94d396a9..bcf03076 100644 --- a/tests/commands/install_uninstall_test.py +++ b/tests/commands/install_uninstall_test.py @@ -142,7 +142,8 @@ FILES_CHANGED = ( NORMAL_PRE_COMMIT_RUN = re.compile( - r'^\[INFO\] Initializing environment for .+\.\r?\n' + r'^\[INFO\] Locking pre-commit directory\r?\n' + r'\[INFO\] Initializing environment for .+\.\r?\n' r'Bash hook\.+Passed\r?\n' r'\[master [a-f0-9]{7}\] Commit!\r?\n' + FILES_CHANGED + @@ -254,7 +255,8 @@ def test_environment_not_sourced(tempdir_factory): FAILING_PRE_COMMIT_RUN = re.compile( - r'^\[INFO\] Initializing environment for .+\.\r?\n' + r'^\[INFO\] Locking pre-commit directory\r?\n' + r'\[INFO\] Initializing environment for .+\.\r?\n' r'Failing hook\.+Failed\r?\n' r'hookid: failing_hook\r?\n' r'\r?\n' @@ -332,6 +334,7 @@ def test_install_existing_hook_no_overwrite_idempotent(tempdir_factory): FAIL_OLD_HOOK = re.compile( r'fail!\r?\n' + r'\[INFO\] Locking pre-commit directory\r?\n' r'\[INFO\] Initializing environment for .+\.\r?\n' r'Bash hook\.+Passed\r?\n', ) diff --git a/tests/repository_test.py b/tests/repository_test.py index c35f66a3..9ad28862 100644 --- a/tests/repository_test.py +++ b/tests/repository_test.py @@ -547,8 +547,8 @@ def test_reinstall(tempdir_factory, store, log_info_mock): config = make_config_from_repo(path) repo = Repository.create(config, store) repo.require_installed() - # We print some logging during clone (1) + install (3) - assert log_info_mock.call_count == 4 + # We print some logging during clone (2) + install (4) + assert log_info_mock.call_count == 6 log_info_mock.reset_mock() # Reinstall with same repo should not trigger another install repo.require_installed() diff --git a/tests/store_test.py b/tests/store_test.py index eab4b009..9a76a339 100644 --- a/tests/store_test.py +++ b/tests/store_test.py @@ -88,7 +88,7 @@ def test_clone(store, tempdir_factory, log_info_mock): ret = store.clone(path, sha) # Should have printed some stuff - assert log_info_mock.call_args_list[0][0][0].startswith( + assert log_info_mock.call_args_list[1][0][0].startswith( 'Initializing environment for ', )