diff --git a/.gitignore b/.gitignore index e881c088b..fa774ea34 100644 --- a/.gitignore +++ b/.gitignore @@ -95,3 +95,6 @@ monkeytype.sqlite3 # Generated by sphinx_fonts extension (downloaded at build time) docs/_static/fonts/ docs/_static/css/fonts.css + +# pytest-optimizer durable state +.pytest-optimizer/ diff --git a/CHANGES b/CHANGES index 6ae101aa3..f25f0f5ba 100644 --- a/CHANGES +++ b/CHANGES @@ -20,6 +20,34 @@ $ uv add libvcs --prerelease allow _Notes on the upcoming release will go here._ +### Fixes + +#### `VCSRegistry` no longer shares parser state across instances (#539) + +{class}`~libvcs.url.registry.VCSRegistry` kept its parser map in a class +variable, so constructing any second registry mutated the shared +{data}`~libvcs.url.registry.registry`. Each registry now owns its own parser +map, leaving the global one untouched. + +#### `git_repo` and `hg_repo` fixtures return isolated clones (#539) + +The first consumer of the `git_repo` / `hg_repo` pytest fixtures received a live +handle to the session-cached master checkout, so mutating that checkout (adding +a remote, switching branches) polluted the cache for every later test. The +master copy is now treated as a pristine, read-only cache and every consumer — +including the first — gets its own copytree. + +### Development + +#### Opt-in parallel test runs (#539) + +The test suite can now run in parallel via [pytest-xdist], a new development +dependency. Enable workers with `just test-parallel` or `uv run pytest -n +auto`. The plain `uv run pytest` invocation still runs serially and is +unchanged. + +[pytest-xdist]: https://pytest-xdist.readthedocs.io/ + ## libvcs 0.44.0 (2026-06-21) libvcs 0.44.0 returns VCS command output verbatim by default. {meth}`~libvcs.cmd.git.Git.run` and its `Hg`/`Svn` counterparts now return exactly what the VCS printed, so a captured `git diff` re-applies with `git apply` and `git cat-file blob` round-trips byte-for-byte; the previous per-line trimming corrupted whitespace-significant output. This is a breaking change — reads that want a bare value (a lone SHA, a branch name) pass the new `trim=True`. Downstream tools such as vcspull are the primary beneficiaries. diff --git a/docs/api/index.md b/docs/api/index.md index 601e8ce92..61e6ab382 100644 --- a/docs/api/index.md +++ b/docs/api/index.md @@ -39,8 +39,8 @@ One call to fetch or create a working copy. :::{grid-item-card} pytest Plugin :link: /api/pytest-plugin :link-type: doc -Session-scoped fixtures for Git, SVN, and Mercurial -repositories. Drop-in test isolation. +Per-test isolated Git, SVN, and Mercurial repository fixtures, +backed by session-cached remotes. Drop-in test isolation. ::: :::: diff --git a/docs/api/pytest-plugin.md b/docs/api/pytest-plugin.md index b5b40b269..8b91b5230 100644 --- a/docs/api/pytest-plugin.md +++ b/docs/api/pytest-plugin.md @@ -40,6 +40,14 @@ def setup( ) -> None: pass ``` + +## Repository isolation + +{fixture}`git_repo`, {fixture}`hg_repo`, and {fixture}`svn_repo` hand each test +its own clone. The remote is built once and cached for the session, then copied +for every consumer, so a test can commit, add remotes, or rewrite history +without affecting any other test — and the fixtures stay safe under parallel +runs (`pytest-xdist`). ::: ## Types diff --git a/docs/project/adr/0002-order-independent-tests.md b/docs/project/adr/0002-order-independent-tests.md new file mode 100644 index 000000000..f9d4acb6b --- /dev/null +++ b/docs/project/adr/0002-order-independent-tests.md @@ -0,0 +1,118 @@ +(adr-order-independent-tests)= + +# ADR 0002: Order-independent tests with opt-in parallelism + +## Status + +Accepted. 2026-06-28. + +## Context + +The test suite is subprocess-bound: it spawns real `git`, `hg`, and `svn` +processes and writes working copies to disk. Wall-time lives in process +spawning and filesystem I/O, not Python, so the single largest lever for a +faster suite is running independent tests across CPU cores. + +Parallel execution (and, equivalently, shuffled execution) is only safe when +the suite is **order-independent**: every test must pass regardless of what ran +before it. The suite was not. Two pieces of shared mutable state passed in the +fixed collection order but failed once tests were reordered: + +- `libvcs.url.registry.VCSRegistry` stored its `parser_map` in a class + variable, so constructing any second registry mutated the module-level + `registry` singleton — whichever test built a custom registry first changed + URL detection for every later test. +- The `git_repo` / `hg_repo` pytest fixtures handed the first consumer a live + handle to a session-cached master checkout. The first test to use the fixture + could mutate that cache (add a remote, switch a branch), leaking state into + every later test that copied it. + +These were invisible under the default order and surfaced only when a +shuffled run (`pytest-randomly`) or a parallel run (`pytest-xdist`) changed +which tests ran together and in what sequence. They are also genuine bugs for +downstream consumers (e.g. vcspull) that build on `VCSRegistry` and the +fixtures, independent of how the tests run. + +## Decision + +Two coupled commitments — the invariant is the prerequisite for the mechanism. + +### Tests are order-independent + +No shared mutable state may leak across tests. Coupling is fixed at the source, +never hidden behind a fixed order or a co-locating scheduler: + +- `VCSRegistry.parser_map` is a per-instance attribute; each registry is + independent and the global `registry` is never mutated by constructing + another. +- The `git_repo` / `hg_repo` / `svn_repo` fixtures treat the master checkout as + a pristine, read-only cache and hand every consumer — including the first — + its own copy. A test may mutate its checkout freely without affecting any + other. + +The expectation is documented in {ref}`workflow` so contributors keep new tests +order-independent, with a shuffled-run check (`uv run --with pytest-randomly +py.test -p randomly`). + +### Parallelism is opt-in, not the default + +`pytest-xdist` is a development dependency exposed via `just test-parallel` +(`uv run py.test -n auto`). The default `uv run py.test` stays serial. + +- The worker count is **not** hardcoded. `-n auto` adapts to the machine; the + operator caps it when needed. A subprocess-bound suite oversubscribes on + high-core machines, so a fixed count committed to `addopts` would be wrong + somewhere. +- The default scheduler (`load`) is used. A co-locating scheduler (`loadfile`) + was trialled as a workaround for the order-coupling, but once the coupling is + fixed it is unnecessary, so it is not committed. + +## Alternatives considered + +| Approach | order-safe | default unchanged | portable | chosen | +|----------|:----------:|:-----------------:|:--------:|:------:| +| Fix coupling at source + opt-in `-n auto` (`load`) | yes | yes | yes | **yes** | +| Keep `--dist=loadfile` workaround, leave coupling | masks it | yes | yes | no | +| `-n auto` in `addopts` (always parallel) | needs fix | no | no | no | +| Hardcode a worker count (e.g. `-n 12`) | needs fix | no | no | no | +| `pytest-randomly` as a committed CI gate | detects only | yes | yes | deferred | + +The decisive point: a co-locating scheduler only *hides* order-coupling, and it +was measured to be no faster than the default scheduler once the coupling was +fixed — so the coupling is fixed and the workaround dropped. `pytest-randomly` +detects order-coupling but does not parallelize; it is used transiently for +checks (`uv run --with pytest-randomly`) rather than committed. + +## Consequences + +### Positive + +- The suite passes under serial, shuffled, and parallel (`-n auto`) execution. +- Two real isolation bugs in shipped code (`VCSRegistry`, the repo fixtures) + are fixed, benefiting downstream consumers regardless of parallelism. +- A faster opt-in run is available without changing the stable default. + +### Tradeoffs + +- Parallel wall-time varies with machine load, and `-n auto` can oversubscribe + a subprocess-bound suite on high-core machines — the operator picks the + worker count. +- Contributors must keep new tests order-independent (reset global state in + teardown; isolate per-test resources). + +### Risks + +- A future order-coupling could reintroduce flakiness that appears only under + parallel or shuffled runs. Mitigation: the order-independence expectation is + documented with a shuffled-run check, so the regression is reproducible. + +## Prior art + +- `pytest-xdist` provides the `load`, `loadscope`, and `loadfile` schedulers; + its guidance is that tests must be independent for `load` to be safe. +- `pytest-randomly` randomizes order specifically to surface inter-test + coupling. +- The broader convention across test suites: parallel execution requires + order-independent tests, and shared mutable state (global registries, cached + fixtures handed out by reference) is the usual cause of order-dependent + failures. diff --git a/docs/project/adr/index.md b/docs/project/adr/index.md index c763aa55a..28d093f2c 100644 --- a/docs/project/adr/index.md +++ b/docs/project/adr/index.md @@ -8,4 +8,5 @@ Significant design decisions and their rationale. :maxdepth: 1 0001-faithful-subprocess-output-capture +0002-order-independent-tests ``` diff --git a/docs/project/workflow.md b/docs/project/workflow.md index dffb1f0a3..d565dc113 100644 --- a/docs/project/workflow.md +++ b/docs/project/workflow.md @@ -28,6 +28,31 @@ $ uv run py.test Helpers: `just test` Rerun tests on file change: `just watch-test` (requires [entr(1)]) +### Running tests in parallel + +The suite spawns real `git`, `hg`, and `svn` processes, so on a multi-core +machine it runs faster across workers with [pytest-xdist] (a dev dependency): + +```console +$ just test-parallel +``` + +This runs `uv run py.test -n auto`, where `auto` sizes the worker pool to the +machine's cores. Parallelism is opt-in — `just test` and `uv run py.test` stay +serial by default. + +### Order independence + +Tests must pass regardless of the order they run in. Parallel and shuffled runs +spread tests across workers, so any hidden coupling — shared global state, or a +fixture that leaks into a later test — surfaces as a failure. Keep fixtures +self-contained and reset any global state in teardown. Check locally with a +shuffled run: + +```console +$ uv run --with pytest-randomly py.test -p randomly +``` + ## Documentation Default preview server: http://localhost:8068 @@ -220,6 +245,7 @@ Update `__version__` in `__about__.py` and `pyproject.toml`:: uv publish [uv]: https://github.com/astral-sh/uv +[pytest-xdist]: https://pytest-xdist.readthedocs.io/ [entr(1)]: http://eradman.com/entrproject/ [`entr(1)`]: http://eradman.com/entrproject/ [ruff format]: https://docs.astral.sh/ruff/formatter/ diff --git a/docs/url/registry.md b/docs/url/registry.md index d3657af30..21d9e4edb 100644 --- a/docs/url/registry.md +++ b/docs/url/registry.md @@ -29,7 +29,7 @@ Detect VCS from `git`, `hg`, and `svn` URLs. ```python >>> import dataclasses >>> from libvcs.url.base import Rule, RuleMap ->>> from libvcs.url.registry import ParserMatch, VCSRegistry +>>> from libvcs.url.registry import ParserMatch, VCSRegistry, registry >>> from libvcs.url.git import GitURL This will match `github:org/repo`: @@ -68,6 +68,10 @@ Prefix for KDE infrastructure, `kde:group/repository`: ... } ... ) +Subclassing with its own ``RuleMap`` keeps these rules local. Registering on +``GitURL.rule_map`` instead would mutate the shared class-level map and change +``GitURL`` for every caller in the process. + >>> my_parsers: "ParserLazyMap" = { ... "git": MyGitURLParser, ... "hg": "libvcs.url.hg.HgURL", @@ -76,6 +80,12 @@ Prefix for KDE infrastructure, `kde:group/repository`: >>> vcs_matcher = VCSRegistry(parsers=my_parsers) +Each registry owns its parsers, so building a custom one leaves the +module-level ``registry`` untouched -- it still resolves git to ``GitURL``: + +>>> registry.match('git@invent.kde.org:plasma/plasma-sdk.git') +[ParserMatch(vcs='git', match=GitURL(...))] + >>> vcs_matcher.match('git@invent.kde.org:plasma/plasma-sdk.git') [ParserMatch(vcs='git', match=MyGitURLParser(...)), ParserMatch(vcs='hg', match=HgURL(...)), diff --git a/justfile b/justfile index 60f55088f..cb7ac2c1a 100644 --- a/justfile +++ b/justfile @@ -17,6 +17,11 @@ default: test *args: uv run py.test {{ args }} +# Run tests in parallel (pytest-xdist) +[group: 'test'] +test-parallel *args: + uv run py.test -n auto {{ args }} + # Run tests then start continuous testing with pytest-watcher [group: 'test'] start: diff --git a/pyproject.toml b/pyproject.toml index ba4ebc8b8..2f3430a0f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,6 +75,7 @@ dev = [ "pytest-rerunfailures", "pytest-mock", "pytest-watcher", + "pytest-xdist", # Coverage "codecov", "coverage", @@ -97,6 +98,7 @@ testing = [ "pytest-rerunfailures", "pytest-mock", "pytest-watcher", + "pytest-xdist", ] coverage =[ "codecov", diff --git a/src/libvcs/pytest_plugin.py b/src/libvcs/pytest_plugin.py index fe85b9688..a80f9e7e3 100644 --- a/src/libvcs/pytest_plugin.py +++ b/src/libvcs/pytest_plugin.py @@ -238,7 +238,7 @@ def projects_path( user_path: pathlib.Path, request: pytest.FixtureRequest, ) -> pathlib.Path: - """User's local checkouts and clones. Emphemeral directory.""" + """User's local checkouts and clones. Ephemeral directory.""" path = user_path / "projects" path.mkdir(exist_ok=True) @@ -254,7 +254,7 @@ def remote_repos_path( user_path: pathlib.Path, request: pytest.FixtureRequest, ) -> pathlib.Path: - """System's remote (file-based) repos to clone and push to. Emphemeral directory.""" + """System's remote (file-based) repos to clone and push to. Ephemeral directory.""" path = user_path / "remote_repos" path.mkdir(exist_ok=True) @@ -474,7 +474,7 @@ def git_remote_repo( vcs_gitconfig: pathlib.Path, git_commit_envvars: GitCommitEnvVars, ) -> pathlib.Path: - """Copy the session-scoped Git repository to a temporary directory.""" + """Session-scoped remote Git repository with one commit, as a clone source.""" _skip_if_git_missing() # TODO: Cache the effect of of this in a session-based repo repo_path = create_git_remote_repo() @@ -714,31 +714,38 @@ def git_repo( set_vcs_gitconfig: pathlib.Path, set_home: None, # Needed for child processes (e.g. submodules) ) -> GitSync: - """Pre-made git clone of remote repo checked out to user's projects dir.""" + """Return an isolated git clone of the remote repo, one per test. + + Every consumer gets its own checkout under the user's projects dir, copied + from a session-cached master. A test may freely mutate it (commit, add + remotes, switch branches) without affecting any other test, so the fixture + is safe under parallel runs (``pytest-xdist``). + """ remote_repo_name = unique_repo_name(remote_repos_path=projects_path) new_checkout_path = projects_path / remote_repo_name master_copy = remote_repos_path / "git_repo" - if master_copy.exists(): - shutil.copytree(master_copy, new_checkout_path) - return GitSync( + # Build the master copy once as a pristine, read-only cache. Every consumer + # gets an isolated copytree of it (including the first), so a test that + # mutates its checkout cannot pollute the cache for later tests. + if not master_copy.exists(): + GitSync( url=f"file://{git_remote_repo}", - path=str(new_checkout_path), - ) - - git_repo = GitSync( + path=master_copy, + remotes={ + "origin": GitRemote( + name="origin", + push_url=f"file://{git_remote_repo}", + fetch_url=f"file://{git_remote_repo}", + ), + }, + ).obtain() + + shutil.copytree(master_copy, new_checkout_path) + return GitSync( url=f"file://{git_remote_repo}", - path=master_copy, - remotes={ - "origin": GitRemote( - name="origin", - push_url=f"file://{git_remote_repo}", - fetch_url=f"file://{git_remote_repo}", - ), - }, + path=str(new_checkout_path), ) - git_repo.obtain() - return git_repo @pytest.fixture @@ -748,24 +755,30 @@ def hg_repo( hg_remote_repo: pathlib.Path, set_vcs_hgconfig: pathlib.Path, ) -> HgSync: - """Pre-made hg clone of remote repo checked out to user's projects dir.""" + """Return an isolated hg clone of the remote repo, one per test. + + Every consumer gets its own checkout under the user's projects dir, copied + from a session-cached master. A test may freely mutate it without affecting + any other test, so the fixture is safe under parallel runs (``pytest-xdist``). + """ remote_repo_name = unique_repo_name(remote_repos_path=projects_path) new_checkout_path = projects_path / remote_repo_name master_copy = remote_repos_path / "hg_repo" - if master_copy.exists(): - shutil.copytree(master_copy, new_checkout_path) - return HgSync( + # Build the master copy once as a pristine, read-only cache. Every consumer + # gets an isolated copytree of it (including the first), so a test that + # mutates its checkout cannot pollute the cache for later tests. + if not master_copy.exists(): + HgSync( url=f"file://{hg_remote_repo}", - path=str(new_checkout_path), - ) + path=master_copy, + ).obtain() - hg_repo = HgSync( + shutil.copytree(master_copy, new_checkout_path) + return HgSync( url=f"file://{hg_remote_repo}", - path=master_copy, + path=str(new_checkout_path), ) - hg_repo.obtain() - return hg_repo @pytest.fixture @@ -774,24 +787,31 @@ def svn_repo( projects_path: pathlib.Path, svn_remote_repo: pathlib.Path, ) -> SvnSync: - """Pre-made svn clone of remote repo checked out to user's projects dir.""" + """Return an isolated svn checkout of the remote repo, one per test. + + Every consumer gets its own working copy under the user's projects dir, + copied from a session-cached master. A test may freely mutate it without + affecting any other test, so the fixture is safe under parallel runs + (``pytest-xdist``). + """ remote_repo_name = unique_repo_name(remote_repos_path=projects_path) new_checkout_path = projects_path / remote_repo_name master_copy = remote_repos_path / "svn_repo" - if master_copy.exists(): - shutil.copytree(master_copy, new_checkout_path) - return SvnSync( + # Build the master copy once as a pristine, read-only cache. Every consumer + # gets an isolated copytree of it (including the first), so a test that + # mutates its checkout cannot pollute the cache for later tests. + if not master_copy.exists(): + SvnSync( url=f"file://{svn_remote_repo}", - path=str(new_checkout_path), - ) + path=master_copy, + ).obtain() - svn_repo = SvnSync( + shutil.copytree(master_copy, new_checkout_path) + return SvnSync( url=f"file://{svn_remote_repo}", - path=str(projects_path / "svn_repo"), + path=str(new_checkout_path), ) - svn_repo.obtain() - return svn_repo @pytest.fixture diff --git a/src/libvcs/url/base.py b/src/libvcs/url/base.py index e3a68c0ba..c32140137 100644 --- a/src/libvcs/url/base.py +++ b/src/libvcs/url/base.py @@ -178,6 +178,10 @@ def register(self, cls: Rule) -> None: >>> GitURL.is_valid(url='gitlab:vcs-python/libvcs') True + Clean up so the examples below see the default rules: + + >>> GitURL.rule_map.unregister('gl-prefix') + **Example: git URLs + pip-style git URLs:** This is already in :class:`GitURL` via :data:`PIP_DEFAULT_RULES`. For the diff --git a/src/libvcs/url/registry.py b/src/libvcs/url/registry.py index 0438fcc3f..77c9e69b8 100644 --- a/src/libvcs/url/registry.py +++ b/src/libvcs/url/registry.py @@ -30,9 +30,8 @@ class ParserMatch(t.NamedTuple): class VCSRegistry: """Index of parsers.""" - parser_map: t.ClassVar[ParserMap] = {} - def __init__(self, parsers: ParserLazyMap) -> None: + self.parser_map: ParserMap = {} for k, v in parsers.items(): if isinstance(v, str): v = import_string(v) diff --git a/uv.lock b/uv.lock index a67137dc2..81f6b86b2 100644 --- a/uv.lock +++ b/uv.lock @@ -399,6 +399,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" }, ] +[[package]] +name = "execnet" +version = "2.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bf/89/780e11f9588d9e7128a3f87788354c7946a9cbb1401ad38a48c4db9a4f07/execnet-2.1.2.tar.gz", hash = "sha256:63d83bfdd9a23e35b9c6a3261412324f964c2ec8dcd8d3c6916ee9373e0befcd", size = 166622, upload-time = "2025-11-12T09:56:37.75Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ab/84/02fc1827e8cdded4aa65baef11296a9bbe595c474f0d6d758af082d849fd/execnet-2.1.2-py3-none-any.whl", hash = "sha256:67fba928dd5a544b783f6056f449e5e3931a5c378b128bc18501f7ea79e296ec", size = 40708, upload-time = "2025-11-12T09:56:36.333Z" }, +] + [[package]] name = "gp-furo-theme" version = "0.0.1a31" @@ -617,6 +626,7 @@ dev = [ { name = "pytest-mock" }, { name = "pytest-rerunfailures" }, { name = "pytest-watcher" }, + { name = "pytest-xdist" }, { name = "ruff" }, { name = "sphinx-autobuild", version = "2024.10.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "sphinx-autobuild", version = "2025.8.25", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, @@ -641,6 +651,7 @@ testing = [ { name = "pytest-mock" }, { name = "pytest-rerunfailures" }, { name = "pytest-watcher" }, + { name = "pytest-xdist" }, ] [package.metadata] @@ -663,6 +674,7 @@ dev = [ { name = "pytest-mock" }, { name = "pytest-rerunfailures" }, { name = "pytest-watcher" }, + { name = "pytest-xdist" }, { name = "ruff" }, { name = "sphinx-autobuild" }, { name = "sphinx-autodoc-api-style", specifier = "==0.0.1a31" }, @@ -685,6 +697,7 @@ testing = [ { name = "pytest-mock" }, { name = "pytest-rerunfailures" }, { name = "pytest-watcher" }, + { name = "pytest-xdist" }, ] [[package]] @@ -1052,6 +1065,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fc/3f/172d73600ad2771774cda108efb813fc724fc345e5240a81a1085f1ade5d/pytest_watcher-0.6.3-py3-none-any.whl", hash = "sha256:83e7748c933087e8276edb6078663e6afa9926434b4fd8b85cf6b32b1d5bec89", size = 12431, upload-time = "2026-01-10T23:28:17.64Z" }, ] +[[package]] +name = "pytest-xdist" +version = "3.8.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "execnet" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/78/b4/439b179d1ff526791eb921115fca8e44e596a13efeda518b9d845a619450/pytest_xdist-3.8.0.tar.gz", hash = "sha256:7e578125ec9bc6050861aa93f2d59f1d8d085595d6551c2c90b6f4fad8d3a9f1", size = 88069, upload-time = "2025-07-01T13:30:59.346Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ca/31/d4e37e9e550c2b92a9cbc2e4d0b7420a27224968580b5a447f420847c975/pytest_xdist-3.8.0-py3-none-any.whl", hash = "sha256:202ca578cfeb7370784a8c33d6d05bc6e13b4f25b5053c30a152269fd10f0b88", size = 46396, upload-time = "2025-07-01T13:30:56.632Z" }, +] + [[package]] name = "pyyaml" version = "6.0.3"