Merge pull request #594 from pre-commit/parallel_repository_creation

Limit repository creation to one process
This commit is contained in:
Anthony Sottile
2017-08-24 00:37:50 -04:00
committed by GitHub
6 changed files with 133 additions and 46 deletions

53
pre_commit/file_lock.py Normal file
View File

@@ -0,0 +1,53 @@
import contextlib
import errno
try: # pragma: no cover (windows)
import msvcrt
# https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/locking
# on windows we lock "regions" of files, we don't care about the actual
# byte region so we'll just pick *some* number here.
_region = 0xffff
@contextlib.contextmanager
def _locked(fileno):
while True:
try:
msvcrt.locking(fileno, msvcrt.LK_LOCK, _region)
except OSError as e:
# Locking violation. Returned when the _LK_LOCK or _LK_RLCK
# flag is specified and the file cannot be locked after 10
# attempts.
if e.errno != errno.EDEADLOCK:
raise
else:
break
try:
yield
finally:
# From cursory testing, it seems to get unlocked when the file is
# closed so this may not be necessary.
# The documentation however states:
# "Regions should be locked only briefly and should be unlocked
# before closing a file or exiting the program."
msvcrt.locking(fileno, msvcrt.LK_UNLCK, _region)
except ImportError: # pragma: no cover (posix)
import fcntl
@contextlib.contextmanager
def _locked(fileno):
fcntl.flock(fileno, fcntl.LOCK_EX)
try:
yield
finally:
fcntl.flock(fileno, fcntl.LOCK_UN)
@contextlib.contextmanager
def lock(path):
with open(path, 'a+') as f:
with _locked(f.fileno()):
yield

View File

@@ -64,34 +64,42 @@ def _installed(cmd_runner, language_name, language_version, additional_deps):
)
def _install_all(venvs, repo_url):
def _install_all(venvs, repo_url, store):
"""Tuple of (cmd_runner, language, version, deps)"""
need_installed = tuple(
(cmd_runner, language_name, version, deps)
for cmd_runner, language_name, version, deps in venvs
if not _installed(cmd_runner, language_name, version, deps)
)
def _need_installed():
return tuple(
(cmd_runner, language_name, version, deps)
for cmd_runner, language_name, version, deps in venvs
if not _installed(cmd_runner, language_name, version, deps)
)
if not _need_installed():
return
with store.exclusive_lock():
# Another process may have already completed this work
need_installed = _need_installed()
if not need_installed: # pragma: no cover (race)
return
if need_installed:
logger.info(
'Installing environment for {}.'.format(repo_url),
)
logger.info('Once installed this environment will be reused.')
logger.info('This may take a few minutes...')
for cmd_runner, language_name, version, deps in need_installed:
language = languages[language_name]
venv = environment_dir(language.ENVIRONMENT_DIR, version)
for cmd_runner, language_name, version, deps in need_installed:
language = languages[language_name]
venv = environment_dir(language.ENVIRONMENT_DIR, version)
# There's potentially incomplete cleanup from previous runs
# Clean it up!
if cmd_runner.exists(venv):
shutil.rmtree(cmd_runner.path(venv))
# There's potentially incomplete cleanup from previous runs
# Clean it up!
if cmd_runner.exists(venv):
shutil.rmtree(cmd_runner.path(venv))
language.install_environment(cmd_runner, version, deps)
# Write our state to indicate we're installed
state = _state(deps)
_write_state(cmd_runner, venv, state)
language.install_environment(cmd_runner, version, deps)
# Write our state to indicate we're installed
state = _state(deps)
_write_state(cmd_runner, venv, state)
def _validate_minimum_version(hook):
@@ -174,7 +182,7 @@ class Repository(object):
def require_installed(self):
if not self.__installed:
_install_all(self._venvs, self.repo_config['repo'])
_install_all(self._venvs, self.repo_config['repo'], self.store)
self.__installed = True
def run_hook(self, hook, file_args):

View File

@@ -10,6 +10,7 @@ import tempfile
from cached_property import cached_property
import pre_commit.constants as C
from pre_commit import file_lock
from pre_commit.prefixed_command_runner import PrefixedCommandRunner
from pre_commit.util import clean_path_on_failure
from pre_commit.util import cmd_output
@@ -37,13 +38,20 @@ def _get_default_directory():
class Store(object):
get_default_directory = staticmethod(_get_default_directory)
__created = False
def __init__(self, directory=None):
if directory is None:
directory = self.get_default_directory()
self.directory = directory
self.__created = False
@contextlib.contextmanager
def exclusive_lock(self, quiet=False):
if not quiet:
logger.info('Locking pre-commit directory')
with file_lock.lock(os.path.join(self.directory, '.lock')):
yield
def _write_readme(self):
with io.open(os.path.join(self.directory, 'README'), 'w') as readme:
@@ -75,12 +83,17 @@ class Store(object):
os.rename(tmpfile, self.db_path)
def _create(self):
if os.path.exists(self.db_path):
return
if not os.path.exists(self.directory):
os.makedirs(self.directory)
self._write_readme()
self._write_sqlite_db()
if os.path.exists(self.db_path):
return
with self.exclusive_lock(quiet=True):
# Another process may have already completed this work
if os.path.exists(self.db_path): # pragma: no cover (race)
return
self._write_sqlite_db()
def require_created(self):
"""Require the pre-commit file store to be created."""
@@ -91,27 +104,37 @@ class Store(object):
def _new_repo(self, repo, ref, make_strategy):
self.require_created()
# Check if we already exist
with sqlite3.connect(self.db_path) as db:
result = db.execute(
'SELECT path FROM repos WHERE repo = ? AND ref = ?',
[repo, ref],
).fetchone()
if result:
return result[0]
def _get_result():
# Check if we already exist
with sqlite3.connect(self.db_path) as db:
result = db.execute(
'SELECT path FROM repos WHERE repo = ? AND ref = ?',
[repo, ref],
).fetchone()
if result:
return result[0]
logger.info('Initializing environment for {}.'.format(repo))
result = _get_result()
if result:
return result
with self.exclusive_lock():
# Another process may have already completed this work
result = _get_result()
if result: # pragma: no cover (race)
return result
directory = tempfile.mkdtemp(prefix='repo', dir=self.directory)
with clean_path_on_failure(directory):
make_strategy(directory)
logger.info('Initializing environment for {}.'.format(repo))
# Update our db with the created repo
with sqlite3.connect(self.db_path) as db:
db.execute(
'INSERT INTO repos (repo, ref, path) VALUES (?, ?, ?)',
[repo, ref, directory],
)
directory = tempfile.mkdtemp(prefix='repo', dir=self.directory)
with clean_path_on_failure(directory):
make_strategy(directory)
# Update our db with the created repo
with sqlite3.connect(self.db_path) as db:
db.execute(
'INSERT INTO repos (repo, ref, path) VALUES (?, ?, ?)',
[repo, ref, directory],
)
return directory
def clone(self, repo, ref):

View File

@@ -142,7 +142,8 @@ FILES_CHANGED = (
NORMAL_PRE_COMMIT_RUN = re.compile(
r'^\[INFO\] Initializing environment for .+\.\r?\n'
r'^\[INFO\] Locking pre-commit directory\r?\n'
r'\[INFO\] Initializing environment for .+\.\r?\n'
r'Bash hook\.+Passed\r?\n'
r'\[master [a-f0-9]{7}\] Commit!\r?\n' +
FILES_CHANGED +
@@ -254,7 +255,8 @@ def test_environment_not_sourced(tempdir_factory):
FAILING_PRE_COMMIT_RUN = re.compile(
r'^\[INFO\] Initializing environment for .+\.\r?\n'
r'^\[INFO\] Locking pre-commit directory\r?\n'
r'\[INFO\] Initializing environment for .+\.\r?\n'
r'Failing hook\.+Failed\r?\n'
r'hookid: failing_hook\r?\n'
r'\r?\n'
@@ -332,6 +334,7 @@ def test_install_existing_hook_no_overwrite_idempotent(tempdir_factory):
FAIL_OLD_HOOK = re.compile(
r'fail!\r?\n'
r'\[INFO\] Locking pre-commit directory\r?\n'
r'\[INFO\] Initializing environment for .+\.\r?\n'
r'Bash hook\.+Passed\r?\n',
)

View File

@@ -547,8 +547,8 @@ def test_reinstall(tempdir_factory, store, log_info_mock):
config = make_config_from_repo(path)
repo = Repository.create(config, store)
repo.require_installed()
# We print some logging during clone (1) + install (3)
assert log_info_mock.call_count == 4
# We print some logging during clone (2) + install (4)
assert log_info_mock.call_count == 6
log_info_mock.reset_mock()
# Reinstall with same repo should not trigger another install
repo.require_installed()

View File

@@ -88,7 +88,7 @@ def test_clone(store, tempdir_factory, log_info_mock):
ret = store.clone(path, sha)
# Should have printed some stuff
assert log_info_mock.call_args_list[0][0][0].startswith(
assert log_info_mock.call_args_list[1][0][0].startswith(
'Initializing environment for ',
)