diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index dae4937d5081d..0ad0f8e35a2ad 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -1,16 +1,71 @@ -name: Trigger wheel build +name: Build and publish on: push: - branches: [main, master, 'release*'] - tags: ['*'] + tags: ['v*'] + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true permissions: contents: read jobs: + generate_wheels_matrix: + name: Generate wheels matrix + runs-on: ubuntu-latest + outputs: + include: ${{ steps.set-matrix.outputs.include }} + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - name: Install cibuildwheel and pypyp + run: | + pipx install cibuildwheel==2.22.0 + pipx install pypyp==1.3.0 + - id: set-matrix + run: | + MATRIX=$( + { + cibuildwheel --config-file=cibuildwheel.toml --print-build-identifiers --platform linux --archs x86_64 \ + | pyp 'json.dumps({"only": x, "os": "ubuntu-latest"})' \ + && cibuildwheel --config-file=cibuildwheel.toml --print-build-identifiers --platform linux --archs aarch64 \ + | pyp 'json.dumps({"only": x, "os": "ubuntu-24.04-arm"})' \ + && cibuildwheel --config-file=cibuildwheel.toml --print-build-identifiers --platform macos --archs x86_64 \ + | pyp 'json.dumps({"only": x, "os": "macos-15-intel"})' \ + && cibuildwheel --config-file=cibuildwheel.toml --print-build-identifiers --platform macos --archs arm64 \ + | pyp 'json.dumps({"only": x, "os": "macos-14"})' + } | pyp 'json.dumps(list(map(json.loads, lines)))' + ) + echo "include=$MATRIX" | tee -a $GITHUB_OUTPUT + build-wheels: - if: github.repository == 'python/mypy' + name: Build ${{ matrix.only }} + needs: generate_wheels_matrix + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: ${{ fromJson(needs.generate_wheels_matrix.outputs.include) }} + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - uses: pypa/cibuildwheel@v2.22 + with: + config-file: cibuildwheel.toml + only: ${{ matrix.only }} + - uses: actions/upload-artifact@v4 + with: + name: dist-${{ matrix.only }} + path: wheelhouse/*.whl + overwrite: true + + build-sdist: + name: Build sdist runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -18,8 +73,31 @@ jobs: persist-credentials: false - uses: actions/setup-python@v5 with: - python-version: '3.11' - - name: Trigger script - env: - WHEELS_PUSH_TOKEN: ${{ secrets.WHEELS_PUSH_TOKEN }} - run: ./misc/trigger_wheel_build.sh + python-version: "3.13" + - name: Build sdist + run: | + pip install --upgrade setuptools build + python -m build --sdist + - uses: actions/upload-artifact@v4 + with: + name: dist-sdist + path: dist/*.tar.gz + overwrite: true + + publish: + name: Publish to PyPI + if: startsWith(github.ref, 'refs/tags/v') + needs: [build-wheels, build-sdist] + runs-on: ubuntu-latest + environment: pypi + permissions: + id-token: write + steps: + - name: Download all artifacts + uses: actions/download-artifact@v4 + with: + pattern: dist-* + path: dist + merge-multiple: true + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml deleted file mode 100644 index 66e7c997f4fad..0000000000000 --- a/.github/workflows/docs.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: Check documentation build - -on: - workflow_dispatch: - push: - branches: [main, master, 'release*'] - tags: ['*'] - pull_request: - paths: - - 'docs/**' - # We now have a docs check that fails if any error codes don't have documentation, - # so it's important to do the docs build on all PRs touching mypy/errorcodes.py - # in case somebody's adding a new error code without any docs - - 'mypy/errorcodes.py' - # Part of the documentation is automatically generated from the options - # definitions in mypy/main.py - - 'mypy/main.py' - - 'mypyc/doc/**' - - '**/*.rst' - - '**/*.md' - - CREDITS - - LICENSE - -permissions: - contents: read - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - docs: - runs-on: ubuntu-latest - timeout-minutes: 10 - env: - TOXENV: docs - TOX_SKIP_MISSING_INTERPRETERS: False - VERIFY_MYPY_ERROR_CODES: 1 - steps: - - uses: actions/checkout@v4 - with: - persist-credentials: false - - uses: actions/setup-python@v5 - with: - python-version: '3.12' - - name: Install tox - run: pip install tox==4.26.0 - - name: Setup tox environment - run: tox run -e ${{ env.TOXENV }} --notest - - name: Test - run: tox run -e ${{ env.TOXENV }} --skip-pkg-install diff --git a/.github/workflows/mypy_primer.yml b/.github/workflows/mypy_primer.yml deleted file mode 100644 index 478dc09c3d464..0000000000000 --- a/.github/workflows/mypy_primer.yml +++ /dev/null @@ -1,102 +0,0 @@ -name: Run mypy_primer - -on: - # Only run on PR, since we diff against master - pull_request: - paths-ignore: - - 'docs/**' - - '**/*.rst' - - '**/*.md' - - 'misc/**' - - 'mypyc/**' - - 'mypy/stubtest.py' - - 'mypy/stubgen.py' - - 'mypy/stubgenc.py' - - 'mypy/test/**' - - 'test-data/**' - -permissions: - contents: read - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - mypy_primer: - name: Run mypy_primer - runs-on: ubuntu-latest - strategy: - matrix: - shard-index: [0, 1, 2, 3, 4, 5] - fail-fast: false - timeout-minutes: 60 - steps: - - uses: actions/checkout@v4 - with: - path: mypy_to_test - fetch-depth: 0 - persist-credentials: false - - uses: actions/setup-python@v5 - with: - python-version: "3.14" - - name: Install dependencies - run: | - python -m pip install -U pip - pip install git+https://github.com/hauntsaninja/mypy_primer.git - - name: Run mypy_primer - shell: bash - run: | - cd mypy_to_test - echo "new commit" - git rev-list --format=%s --max-count=1 $GITHUB_SHA - - MERGE_BASE=$(git merge-base $GITHUB_SHA origin/$GITHUB_BASE_REF) - git checkout -b base_commit $MERGE_BASE - echo "base commit" - git rev-list --format=%s --max-count=1 base_commit - - echo '' - cd .. - # fail action if exit code isn't zero or one - ( - mypy_primer \ - --repo mypy_to_test \ - --new $GITHUB_SHA --old base_commit \ - --num-shards 6 --shard-index ${{ matrix.shard-index }} \ - --debug \ - --additional-flags="--debug-serialize" \ - --output concise \ - --mypy-install-librt \ - | tee diff_${{ matrix.shard-index }}.txt - ) || [ $? -eq 1 ] - - if: ${{ matrix.shard-index == 0 }} - name: Save PR number - run: | - echo ${{ github.event.pull_request.number }} | tee pr_number.txt - - name: Upload mypy_primer diff + PR number - uses: actions/upload-artifact@v4 - if: ${{ matrix.shard-index == 0 }} - with: - name: mypy_primer_diffs-${{ matrix.shard-index }} - path: | - diff_${{ matrix.shard-index }}.txt - pr_number.txt - - name: Upload mypy_primer diff - uses: actions/upload-artifact@v4 - if: ${{ matrix.shard-index != 0 }} - with: - name: mypy_primer_diffs-${{ matrix.shard-index }} - path: diff_${{ matrix.shard-index }}.txt - - join_artifacts: - name: Join artifacts - runs-on: ubuntu-latest - needs: [mypy_primer] - steps: - - name: Merge artifacts - uses: actions/upload-artifact/merge@v4 - with: - name: mypy_primer_diffs - pattern: mypy_primer_diffs-* - delete-merged: true diff --git a/.github/workflows/mypy_primer_comment.yml b/.github/workflows/mypy_primer_comment.yml deleted file mode 100644 index 21f1222a5b893..0000000000000 --- a/.github/workflows/mypy_primer_comment.yml +++ /dev/null @@ -1,99 +0,0 @@ -name: Comment with mypy_primer diff - -on: # zizmor: ignore[dangerous-triggers] - workflow_run: - workflows: - - Run mypy_primer - types: - - completed - -permissions: {} - -jobs: - comment: - name: Comment PR from mypy_primer - runs-on: ubuntu-latest - permissions: - contents: read - pull-requests: write - if: ${{ github.event.workflow_run.conclusion == 'success' }} - steps: - - name: Download diffs - uses: actions/github-script@v7 - with: - script: | - const fs = require('fs'); - const artifacts = await github.rest.actions.listWorkflowRunArtifacts({ - owner: context.repo.owner, - repo: context.repo.repo, - run_id: ${{ github.event.workflow_run.id }}, - }); - const [matchArtifact] = artifacts.data.artifacts.filter((artifact) => - artifact.name == "mypy_primer_diffs"); - - const download = await github.rest.actions.downloadArtifact({ - owner: context.repo.owner, - repo: context.repo.repo, - artifact_id: matchArtifact.id, - archive_format: "zip", - }); - fs.writeFileSync("diff.zip", Buffer.from(download.data)); - - - run: unzip diff.zip - - run: | - cat diff_*.txt | tee fulldiff.txt - - - name: Post comment - id: post-comment - uses: actions/github-script@v7 - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - const MAX_CHARACTERS = 50000 - const MAX_CHARACTERS_PER_PROJECT = MAX_CHARACTERS / 3 - - const fs = require('fs') - let data = fs.readFileSync('fulldiff.txt', { encoding: 'utf8' }) - - function truncateIfNeeded(original, maxLength) { - if (original.length <= maxLength) { - return original - } - let truncated = original.substring(0, maxLength) - // further, remove last line that might be truncated - truncated = truncated.substring(0, truncated.lastIndexOf('\n')) - let lines_truncated = original.split('\n').length - truncated.split('\n').length - return `${truncated}\n\n... (truncated ${lines_truncated} lines) ...` - } - - const projects = data.split('\n\n') - // don't let one project dominate - data = projects.map(project => truncateIfNeeded(project, MAX_CHARACTERS_PER_PROJECT)).join('\n\n') - // posting comment fails if too long, so truncate - data = truncateIfNeeded(data, MAX_CHARACTERS) - - console.log("Diff from mypy_primer:") - console.log(data) - - let body - if (data.trim()) { - body = 'Diff from [mypy_primer](https://github.com/hauntsaninja/mypy_primer), showing the effect of this PR on open source code:\n```diff\n' + data + '```' - } else { - body = "According to [mypy_primer](https://github.com/hauntsaninja/mypy_primer), this change doesn't affect type check results on a corpus of open source code. โœ…" - } - const prNumber = parseInt(fs.readFileSync("pr_number.txt", { encoding: "utf8" })) - await github.rest.issues.createComment({ - issue_number: prNumber, - owner: context.repo.owner, - repo: context.repo.repo, - body - }) - return prNumber - - - name: Hide old comments - # v0.4.0 - uses: kanga333/comment-hider@c12bb20b48aeb8fc098e35967de8d4f8018fffdf - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - leave_visible: 1 - issue_number: ${{ steps.post-comment.outputs.result }} diff --git a/.github/workflows/sync_typeshed.yml b/.github/workflows/sync_typeshed.yml deleted file mode 100644 index 2d5361a5919ce..0000000000000 --- a/.github/workflows/sync_typeshed.yml +++ /dev/null @@ -1,36 +0,0 @@ -name: Sync typeshed - -on: - workflow_dispatch: - schedule: - - cron: "0 0 1,15 * *" - -permissions: {} - -jobs: - sync_typeshed: - name: Sync typeshed - if: github.repository == 'python/mypy' - runs-on: ubuntu-latest - permissions: - contents: write - pull-requests: write - timeout-minutes: 10 - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - persist-credentials: true # needed to `git push` the PR branch - # TODO: use whatever solution ends up working for - # https://github.com/python/typeshed/issues/8434 - - uses: actions/setup-python@v5 - with: - python-version: "3.10" - - name: git config - run: | - git config --global user.name mypybot - git config --global user.email '<>' - - name: Sync typeshed - run: | - python -m pip install requests==2.28.1 - GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} python misc/sync-typeshed.py --make-pr diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5242739f8f846..c02f871338981 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -4,7 +4,6 @@ on: workflow_dispatch: push: branches: [main, master, 'release*'] - tags: ['*'] pull_request: paths-ignore: - 'docs/**' diff --git a/.github/workflows/test_stubgenc.yml b/.github/workflows/test_stubgenc.yml deleted file mode 100644 index 7102b6513ddc2..0000000000000 --- a/.github/workflows/test_stubgenc.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: Test stubgenc on pybind11_fixtures - -on: - workflow_dispatch: - push: - branches: [main, master, 'release*'] - tags: ['*'] - pull_request: - paths: - - 'misc/test-stubgenc.sh' - - 'mypy/stubgenc.py' - - 'mypy/stubdoc.py' - - 'mypy/stubutil.py' - - 'test-data/pybind11_fixtures/**' - -permissions: - contents: read - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - stubgenc: - # Check stub file generation for a small pybind11 project - # (full text match is required to pass) - runs-on: ubuntu-latest - timeout-minutes: 10 - steps: - - - uses: actions/checkout@v4 - with: - persist-credentials: false - - - name: Setup ๐Ÿ 3.10 - uses: actions/setup-python@v5 - with: - python-version: '3.10' - - - name: Test stubgenc - run: misc/test-stubgenc.sh diff --git a/cibuildwheel.toml b/cibuildwheel.toml new file mode 100644 index 0000000000000..e646ba5880156 --- /dev/null +++ b/cibuildwheel.toml @@ -0,0 +1,86 @@ +[tool.cibuildwheel] + +build-frontend = "pip" +linux.manylinux-x86_64-image = "manylinux_2_28" +linux.manylinux-aarch64-image = "manylinux_2_28" +linux.musllinux-x86_64-image = "musllinux_1_2" +linux.musllinux-aarch64-image = "musllinux_1_2" + +enable = [] + +# Skip: PyPy, 32-bit, musllinux aarch64, Windows, free-threaded builds. +# PyPy lacks prebuilt ast-serialize wheels and the build env can't bootstrap +# the Rust toolchain that maturin needs. +skip = [ + "pp*", + "*-win32", + "*-win_amd64", + "*-musllinux_aarch64", + "cp313t-*", + "cp314t-*", + "*-manylinux_i686", +] + +build-verbosity = 1 + +linux.before-all = [ + "yum install -y llvm-toolset || yum -v install -y llvm-toolset", +] + +# Uncomment the following to run tests against the built wheels: +# before-test = [ +# "pip install -r {project}/test-requirements.txt", +# ] +# +# test-command = """ \ +# ( \ +# DIR=$(python -c 'import mypy, os; dn = os.path.dirname; print(dn(dn(mypy.__path__[0])))') \ +# && cp '{project}/pyproject.toml' '{project}/conftest.py' $DIR \ +# +# && MYPY_TEST_DIR=$(python -c 'import mypy.test; print(mypy.test.__path__[0])') \ +# && MYPYC_TEST_DIR=$(python -c 'import mypyc.test; print(mypyc.test.__path__[0])') \ +# +# && MYPY_TEST_PREFIX='{project}' pytest $MYPY_TEST_DIR $MYPYC_TEST_DIR -k 'not test_external' \ +# ) +# """ +# +# windows.test-command = """ \ +# bash -c "( \ +# DIR=$(python -c 'import mypy, os; dn = os.path.dirname; print(dn(dn(mypy.__path__[0])))') \ +# && cp '{project}/pyproject.toml' '{project}/conftest.py' $DIR \ +# +# && MYPY_TEST_DIR=$(python -c 'import mypy.test; print(mypy.test.__path__[0])') \ +# && MYPY_TEST_PREFIX='{project}' pytest $MYPY_TEST_DIR/testcheck.py \ +# )" +# """ + +[tool.cibuildwheel.config-settings] +--build-option = "--use-mypyc" + +[tool.cibuildwheel.environment] +MYPY_USE_MYPYC = "1" +MYPYC_OPT_LEVEL = "3" +MYPYC_DEBUG_LEVEL = "0" + +[tool.cibuildwheel.windows.environment] +MYPY_USE_MYPYC = "1" +MYPYC_OPT_LEVEL = "2" +MYPYC_DEBUG_LEVEL = "0" + +[tool.cibuildwheel.linux.environment] +MYPY_USE_MYPYC = "1" +MYPYC_OPT_LEVEL = "3" +MYPYC_DEBUG_LEVEL = "0" +PATH = "$PATH:/opt/rh/llvm-toolset-17.0/root/usr/bin" +LD_LIBRARY_PATH = "$LD_LIBRARY_PATH:/opt/rh/llvm-toolset-17.0/root/usr/lib64" +CC = "clang" + +[[tool.cibuildwheel.overrides]] +select = "*musllinux*" +before-all = [ + "apk add --no-cache clang", +] +# before-test = [ +# "pip install -r {project}/test-requirements.txt", +# ] +environment = { MYPY_USE_MYPYC = "1", MYPYC_OPT_LEVEL = "3", MYPYC_DEBUG_LEVEL = "0", CC = "clang" } diff --git a/misc/trigger_wheel_build.sh b/misc/trigger_wheel_build.sh deleted file mode 100755 index a2608d93f3490..0000000000000 --- a/misc/trigger_wheel_build.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -eux - -# Trigger a build of mypyc compiled mypy wheels by updating the mypy -# submodule in the git repo that drives those builds. - -# $WHEELS_PUSH_TOKEN is stored in GitHub Settings and is an API token -# for the mypy-build-bot account. - -git config --global user.email "nobody" -git config --global user.name "mypy wheels autopush" - -COMMIT=$(git rev-parse HEAD) -pip install -r mypy-requirements.txt -V=$(python3 -m mypy --version) -V=$(echo "$V" | cut -d" " -f2) - -git clone --depth 1 https://${WHEELS_PUSH_TOKEN}@github.com/mypyc/mypy_mypyc-wheels.git build -cd build -echo $COMMIT > mypy_commit -git commit -am "Build wheels for mypy $V" -git tag v$V -# Push a tag, but no need to push the change to master -git push --tags diff --git a/mypy/meet.py b/mypy/meet.py index cb8ad75f6013d..298e5e1dd2d8d 100644 --- a/mypy/meet.py +++ b/mypy/meet.py @@ -17,6 +17,7 @@ ) from mypy.typeops import is_recursive_pair, make_simplified_union, tuple_fallback from mypy.types import ( + MYPYC_NATIVE_CHAR_NAMES, MYPYC_NATIVE_INT_NAMES, TUPLE_LIKE_INSTANCE_NAMES, AnyType, @@ -612,6 +613,8 @@ def _type_object_overlap(left: Type, right: Type) -> bool: if right.type.fullname == "builtins.int" and left.type.fullname in MYPYC_NATIVE_INT_NAMES: return True + if right.type.fullname == "builtins.str" and left.type.fullname in MYPYC_NATIVE_CHAR_NAMES: + return True # Two unrelated types cannot be partially overlapping: they're disjoint. if left.type.has_base(right.type.fullname): diff --git a/mypy/semanal_classprop.py b/mypy/semanal_classprop.py index c5ad34122f6c8..b568484f9d55d 100644 --- a/mypy/semanal_classprop.py +++ b/mypy/semanal_classprop.py @@ -22,7 +22,7 @@ Var, ) from mypy.options import Options -from mypy.types import MYPYC_NATIVE_INT_NAMES, Instance, ProperType +from mypy.types import MYPYC_NATIVE_CHAR_NAMES, MYPYC_NATIVE_INT_NAMES, Instance, ProperType # Hard coded type promotions (shared between all Python versions). # These add extra ad-hoc edges to the subtyping relation. For example, @@ -184,5 +184,11 @@ def add_type_promotion( assert isinstance(int_sym.node, TypeInfo) int_sym.node._promote.append(Instance(defn.info, [])) defn.info.alt_promote = Instance(int_sym.node, []) + # Same pattern for str <-> char (mypyc native char type). + if defn.fullname in MYPYC_NATIVE_CHAR_NAMES: + str_sym = builtin_names["str"] + assert isinstance(str_sym.node, TypeInfo) + str_sym.node._promote.append(Instance(defn.info, [])) + defn.info.alt_promote = Instance(str_sym.node, []) if promote_targets: defn.info._promote.extend(promote_targets) diff --git a/mypy/subtypes.py b/mypy/subtypes.py index 5733797326e88..498b16bef4a0c 100644 --- a/mypy/subtypes.py +++ b/mypy/subtypes.py @@ -36,6 +36,7 @@ from mypy.options import Options from mypy.state import state from mypy.types import ( + MYPYC_NATIVE_CHAR_NAMES, MYPYC_NATIVE_INT_NAMES, TUPLE_LIKE_INSTANCE_NAMES, TYPED_NAMEDTUPLE_NAMES, @@ -2201,6 +2202,10 @@ def covers_at_runtime(item: Type, supertype: Type) -> bool: # "int" covers all native int types if item.type.fullname in MYPYC_NATIVE_INT_NAMES: return True + elif isinstance(item, Instance) and supertype.type.fullname == "builtins.str": + # "str" covers the native char type + if item.type.fullname in MYPYC_NATIVE_CHAR_NAMES: + return True # TODO: Add more special cases. return False diff --git a/mypy/types.py b/mypy/types.py index 40c3839e2efca..43f89b75990dc 100644 --- a/mypy/types.py +++ b/mypy/types.py @@ -190,6 +190,10 @@ "mypy_extensions.u8", ) +# Mypyc native char type (compatible with builtins.str, bidirectionally; +# stored unboxed as an int32 codepoint with -1 as the empty sentinel). +MYPYC_NATIVE_CHAR_NAMES: Final = ("mypy_extensions.char",) + DATACLASS_TRANSFORM_NAMES: Final = ( "typing.dataclass_transform", "typing_extensions.dataclass_transform", diff --git a/mypy/typeshed/stubs/mypy-extensions/mypy_extensions.pyi b/mypy/typeshed/stubs/mypy-extensions/mypy_extensions.pyi index 7f7b9a89d8c53..6ebc76160d9b7 100644 --- a/mypy/typeshed/stubs/mypy-extensions/mypy_extensions.pyi +++ b/mypy/typeshed/stubs/mypy-extensions/mypy_extensions.pyi @@ -220,3 +220,27 @@ class u8: def __gt__(self, x: u8) -> bool: ... def __index__(self) -> int: ... def __eq__(self, x: object) -> bool: ... + +# char represents at most one Unicode codepoint. At the type-check level it's +# bidirectionally compatible with str via the mypyc native-string promotion +# mechanism. Under mypyc a char is stored unboxed as an int32 codepoint +# (with -1 for the empty sentinel). +class char: + def __new__(cls, __x: str = ...) -> char: ... + def __eq__(self, x: object) -> bool: ... + def __ne__(self, x: object) -> bool: ... + def __hash__(self) -> int: ... + # Mixed char/str concat. Result is str. + def __add__(self, x: str) -> str: ... + def __radd__(self, x: str) -> str: ... + def isspace(self) -> bool: ... + def isdigit(self) -> bool: ... + def isalnum(self) -> bool: ... + def isalpha(self) -> bool: ... + def isidentifier(self) -> bool: ... + # Case conversion. Returns the original codepoint if the Unicode + # result is multi-char (e.g. รŸ.upper() = "SS") since char holds one + # codepoint. + def upper(self) -> char: ... + def lower(self) -> char: ... + def strip(self, __chars: str | None = ...) -> str: ... diff --git a/mypy/version.py b/mypy/version.py index 9f73dd736f677..52ee929b77bbd 100644 --- a/mypy/version.py +++ b/mypy/version.py @@ -8,7 +8,7 @@ # - Release versions have the form "1.2.3". # - Dev versions have the form "1.2.3+dev" (PLUS sign to conform to PEP 440). # - Before 1.0 we had the form "0.NNN". -__version__ = "2.1.0" +__version__ = "2.1.0.post2" base_version = __version__ mypy_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) diff --git a/mypyc/analysis/attrdefined.py b/mypyc/analysis/attrdefined.py index 1dfd33630f1c0..6627824d5bf22 100644 --- a/mypyc/analysis/attrdefined.py +++ b/mypyc/analysis/attrdefined.py @@ -424,14 +424,26 @@ def detect_undefined_bitmap(cl: ClassIR, seen: set[ClassIR]) -> None: for base in cl.base_mro[1:]: detect_undefined_bitmap(base, seen) + # Compute a fresh list rather than appending to cl.bitmap_attrs in place. + # Under separate=True each SCC's analyze_always_defined_attrs recurses + # through shared base classes (the `seen` set above only dedupes within + # one call), so every subclass group would otherwise re-extend the base's + # bitmap_attrs with another copy of the contributions. The base's emitted + # struct size would then grow with each call, and later incremental builds + # would observe a different (smaller) count and emit a different layout + # for the same class โ€” leaving any not-rebuilt subclass with a stale view + # of the base's struct, which segfaults on attribute access. Recomputing + # fresh makes the function naturally idempotent. + new_attrs: list[str] = [] if len(cl.base_mro) > 1: - cl.bitmap_attrs.extend(cl.base_mro[1].bitmap_attrs) + new_attrs.extend(cl.base_mro[1].bitmap_attrs) for n, t in cl.attributes.items(): if t.error_overlap and not cl.is_always_defined(n): - cl.bitmap_attrs.append(n) + new_attrs.append(n) for base in cl.mro[1:]: if base.is_trait: for n, t in base.attributes.items(): - if t.error_overlap and not cl.is_always_defined(n) and n not in cl.bitmap_attrs: - cl.bitmap_attrs.append(n) + if t.error_overlap and not cl.is_always_defined(n) and n not in new_attrs: + new_attrs.append(n) + cl.bitmap_attrs = new_attrs diff --git a/mypyc/build.py b/mypyc/build.py index 84633086d2724..904c1f7fd3370 100644 --- a/mypyc/build.py +++ b/mypyc/build.py @@ -566,22 +566,114 @@ def construct_groups( return groups -def get_header_deps(cfiles: list[tuple[str, str]]) -> list[str]: - """Find all the headers used by a group of cfiles. +# Single regex that captures both `#include "foo"` and `#include `. The +# alternation lets us tell the two forms apart: the quoted-form match populates +# group 1 and the angle-form match populates group 2. The C preprocessor +# applies different search rules to each kind (see `_extract_includes`), so we +# carry the kind through resolution rather than collapsing them up front. +_INCLUDE_RE = re.compile(r'#\s*include\s+(?:"([^"]+)"|<([^>]+)>)') + + +def _extract_includes(contents: str) -> list[tuple[bool, str]]: + """Return each `#include` directive's (is_angled, name) from `contents`. + + is_angled=False for `#include "foo"`, True for `#include `. + """ + out: list[tuple[bool, str]] = [] + for quoted, angled in _INCLUDE_RE.findall(contents): + if quoted: + out.append((False, quoted)) + else: + out.append((True, angled)) + return out + + +def get_header_deps(cfiles: list[tuple[str, str]]) -> list[tuple[bool, str]]: + """Find all the headers directly included by a group of cfiles. We do this by just regexping the source, which is a bit simpler than - properly plumbing the data through. + properly plumbing the data through. Transitive header-to-header includes + are picked up by `resolve_cfile_deps` in `mypyc_build`, which can read + the on-disk headers after every group has written its files. Arguments: - cfiles: A list of (file name, file contents) pairs. + cfiles: A list of (file name, file contents) pairs. Contents must be + non-empty; callers handling cached groups must re-read the .c + from disk before calling, otherwise direct includes are missed + and Extension.depends ends up empty. """ - headers: set[str] = set() + assert all( + contents for _, contents in cfiles + ), "get_header_deps requires non-empty file contents" + headers: set[tuple[bool, str]] = set() for _, contents in cfiles: - headers.update(re.findall(r'#include [<"]([^>"]+)[>"]', contents)) + headers.update(_extract_includes(contents)) return sorted(headers) +def resolve_cfile_deps( + cfile_dir: str, direct_includes: list[tuple[bool, str]], target_dir: str +) -> set[str]: + """Resolve a .c file's `#include` directives to on-disk paths, walking + transitively through resolved headers. + + The C preprocessor resolves `#include "foo"` relative to the includer's + directory first, then via -I, while `#include ` only uses -I. We + mirror that by trying each include against the includer's dir first, + then against `target_dir` (the only -I path that holds files we generate). + Anything we can't resolve under those two roots is dropped โ€” lib-rt + headers like `` and `` live elsewhere and don't change + between builds, so they're not real deps for incremental purposes. + + The walk is transitive: each resolved header is opened and scanned for + its own `#include` directives. Without this, cross-group export-table + headers reached via `__native_internal_.h` (which includes + ``) would be missed, and edits that shift + struct offsets in `other_group` would not trigger a recompile of the + consumer's .o file. Its baked-in offsets would then resolve to whatever + class/function now occupies that slot โ€” silent runtime corruption. + + Returns a set of resolved paths suitable for use as an Extension.depends + list. + """ + resolved: set[str] = set() + # Worklist of (search_dir, is_angled, header_name). search_dir is the + # includer's directory โ€” for the initial cfile it's the cfile's dir, for + # a transitively-included header it's that header's dir. It's only + # consulted for quoted-form includes. + worklist: list[tuple[str, bool, str]] = [ + (cfile_dir, is_angled, dep) for is_angled, dep in direct_includes + ] + while worklist: + search_dir, is_angled, dep = worklist.pop() + # Quoted form: includer's dir first, then -I (target_dir). + # Angled form: -I only (skips the includer's dir). + search_bases = (target_dir,) if is_angled else (search_dir, target_dir) + for base in search_bases: + candidate = os.path.normpath(os.path.join(base, dep)) + if not os.path.exists(candidate): + continue + if candidate in resolved: + break + resolved.add(candidate) + # Recurse only into headers. Some lib-rt sources are pulled in + # as `#include "init.c"` etc.; those don't resolve under + # target_dir so they get filtered out before we'd try to scan + # them, but the .h guard is a cheap belt-and-braces. + if candidate.endswith(".h"): + try: + with open(candidate, encoding="utf-8") as f: + header_contents = f.read() + except OSError: + header_contents = "" + sub_dir = os.path.dirname(candidate) + for sub_angled, sub in _extract_includes(header_contents): + worklist.append((sub_dir, sub_angled, sub)) + break + return resolved + + def mypyc_build( paths: list[str], compiler_options: CompilerOptions, @@ -633,26 +725,78 @@ def mypyc_build( # Write out the generated C and collect the files for each group # Should this be here?? group_cfilenames: list[tuple[list[str], list[str]]] = [] - for cfiles in group_cfiles: + # Per-group list of (full_cfile_path, raw_include_targets). Resolution is + # deferred until every group has written its files (sibling groups' headers + # may not exist while we're iterating a different group), and is done + # per-includer so we can apply the C preprocessor's actual search order. + pending: list[list[tuple[str, list[tuple[bool, str]]]]] = [] + for (group_sources, group_name), cfiles in zip(groups, group_cfiles): cfilenames = [] + per_cfile_deps: list[tuple[str, list[tuple[bool, str]]]] = [] for cfile, ctext in cfiles: - cfile = os.path.join(compiler_options.target_dir, cfile) - # Empty contents marks a file the previous run already wrote - # (fully-cached group): skip the rewrite and just reuse it. + cfile_full = os.path.join(compiler_options.target_dir, cfile) + # Empty ctext marks a file the previous run already wrote (fully-cached + # group): skip the rewrite so we don't clobber the on-disk .c file + # with an empty stub, which would leave the next compile with no + # body to translate. if ctext and not options.mypyc_skip_c_generation: - write_file(cfile, ctext) - if os.path.splitext(cfile)[1] == ".c": - cfilenames.append(cfile) - - # The header regex matches both quote styles, so the result can - # include system headers like `` that don't live under - # target_dir. Joining those produces non-existent paths which - # would force a full rebuild on every run via Extension.depends. - candidate_deps = ( - os.path.join(compiler_options.target_dir, dep) for dep in get_header_deps(cfiles) - ) - deps = [d for d in candidate_deps if os.path.exists(d)] - group_cfilenames.append((cfilenames, deps)) + write_file(cfile_full, ctext) + if os.path.splitext(cfile_full)[1] == ".c": + cfilenames.append(cfile_full) + # For fully-cached groups ctext is empty; read the on-disk .c so the dep resolver + # can walk its transitive header chain and populate Extension.depends. Otherwise, + # cross-group export-table header changes (e.g. a new class shifting struct offsets) + # won't trigger a recompile of this cached consumer's .o. + if not ctext and os.path.exists(cfile_full): + try: + with open(cfile_full, encoding="utf-8") as _f: + ctext = _f.read() + except OSError: + pass + if ctext: + per_cfile_deps.append((cfile_full, get_header_deps([(cfile, ctext)]))) + + # Fully-cached mypy build (typical of pip's second setup.py invocation + # for the wheel-build phase): mypyc returns an empty ctext for the + # group, but the .c file from the previous run is still on disk. + # Reuse it so the resulting Extension isn't built with sources=[]. + # Mirrors the path that GroupGenerator.generate_c_for_modules emits. + if not cfilenames and group_name is not None: + from mypyc.codegen.emitmodule import group_dir as _group_dir + + short_suffix = "_" + exported_name(group_name.split(".")[-1]) + existing = os.path.join( + compiler_options.target_dir, _group_dir(group_name), f"__native{short_suffix}.c" + ) + if os.path.exists(existing): + cfilenames.append(existing) + try: + with open(existing, encoding="utf-8") as _f: + existing_text = _f.read() + except OSError: + existing_text = "" + if existing_text: + per_cfile_deps.append( + (existing, get_header_deps([(os.path.basename(existing), existing_text)])) + ) + + pending.append(per_cfile_deps) + group_cfilenames.append((cfilenames, [])) + + # Resolve deps in a second pass, after every group's files are on disk + # (a header in one group may include a header in another, and the + # included file may not exist yet when we iterate the first group's + # cfiles). See `resolve_cfile_deps` for the resolution rules. + for i, per_cfile in enumerate(pending): + deps_set: set[str] = set() + for cfile_full, dep_names in per_cfile: + deps_set.update( + resolve_cfile_deps( + os.path.dirname(cfile_full), dep_names, compiler_options.target_dir + ) + ) + cfilenames, _ = group_cfilenames[i] + group_cfilenames[i] = (cfilenames, sorted(deps_set)) return groups, group_cfilenames, source_deps diff --git a/mypyc/codegen/emit.py b/mypyc/codegen/emit.py index 01cf3593a8d60..90986ed377d65 100644 --- a/mypyc/codegen/emit.py +++ b/mypyc/codegen/emit.py @@ -44,6 +44,7 @@ is_bool_or_bit_rprimitive, is_bytearray_rprimitive, is_bytes_rprimitive, + is_char_rprimitive, is_dict_rprimitive, is_fixed_width_rtype, is_float_rprimitive, @@ -1083,6 +1084,13 @@ def emit_unbox( self.emit_line(f"{dest} = CPyLong_AsInt16({src});") if not isinstance(error, AssignHandler): self.emit_unbox_failure_with_overlapping_error_value(dest, typ, failure) + elif is_char_rprimitive(typ): + assert not optional + if declare_dest: + self.emit_line(f"int32_t {dest};") + self.emit_line(f"{dest} = CPyChar_FromObject({src});") + if not isinstance(error, AssignHandler): + self.emit_unbox_failure_with_overlapping_error_value(dest, typ, failure) elif is_uint8_rprimitive(typ): # Whether we are borrowing or not makes no difference. assert not optional # Not supported for overlapping error values @@ -1230,6 +1238,8 @@ def emit_box( self.emit_inc_ref(dest, object_rprimitive) elif is_int32_rprimitive(typ) or is_int16_rprimitive(typ) or is_uint8_rprimitive(typ): self.emit_line(f"{declaration}{dest} = PyLong_FromLong({src});") + elif is_char_rprimitive(typ): + self.emit_line(f"{declaration}{dest} = CPyChar_ToStr({src});") elif is_int64_rprimitive(typ): self.emit_line(f"{declaration}{dest} = PyLong_FromLongLong({src});") elif is_float_rprimitive(typ): diff --git a/mypyc/codegen/emitclass.py b/mypyc/codegen/emitclass.py index a312311b21a04..080d2b93aa224 100644 --- a/mypyc/codegen/emitclass.py +++ b/mypyc/codegen/emitclass.py @@ -29,6 +29,7 @@ BITMAP_BITS, BITMAP_TYPE, CPYFUNCTION_NAME, + MYPYC_DEFAULTS_SETUP, NATIVE_PREFIX, PREFIX, REG_PREFIX, @@ -285,7 +286,7 @@ def emit_line() -> None: # If the class has a method to initialize default attribute # values, we need to call it during initialization. - defaults_fn = cl.get_method("__mypyc_defaults_setup") + defaults_fn = cl.get_method(MYPYC_DEFAULTS_SETUP) # If there is a __init__ method, we'll use it in the native constructor. init_fn = cl.get_method("__init__") @@ -683,8 +684,11 @@ def emit_attr_defaults_func_call(defaults_fn: FuncIR, self_name: str, emitter: E The code returns NULL on a raised exception. """ emitter.emit_lines( - "if ({}{}((PyObject *){}) == 0) {{".format( - NATIVE_PREFIX, defaults_fn.cname(emitter.names), self_name + "if ({}{}{}((PyObject *){}) == 0) {{".format( + emitter.get_group_prefix(defaults_fn.decl), + NATIVE_PREFIX, + defaults_fn.cname(emitter.names), + self_name, ), "Py_DECREF(self);", "return NULL;", diff --git a/mypyc/codegen/emitmodule.py b/mypyc/codegen/emitmodule.py index fa0a4385f4fb5..c2f3b70e9f6e1 100644 --- a/mypyc/codegen/emitmodule.py +++ b/mypyc/codegen/emitmodule.py @@ -76,6 +76,7 @@ from mypyc.irbuild.prepare import load_type_map from mypyc.namegen import NameGenerator, exported_name from mypyc.options import CompilerOptions +from mypyc.transform.char_str_index_fold import do_char_str_index_fold from mypyc.transform.copy_propagation import do_copy_propagation from mypyc.transform.exceptions import insert_exception_handling from mypyc.transform.flag_elimination import do_flag_elimination @@ -275,11 +276,14 @@ def compile_scc_to_ir( # Switch to lower abstraction level IR. lower_ir(fn, compiler_options) + # Run char_str_index_fold before dependency collection so the new + # str_extra_ops.h primitives it introduces are picked up. + do_char_str_index_fold(fn, compiler_options) # Calculate implicit module dependencies (needed for librt) deps = find_implicit_op_dependencies(fn) if deps is not None: module.dependencies.update(deps) - # Perform optimizations. + # Remaining optimizations. do_copy_propagation(fn, compiler_options) do_flag_elimination(fn, compiler_options) diff --git a/mypyc/common.py b/mypyc/common.py index 64fe8126087b8..382d640a84083 100644 --- a/mypyc/common.py +++ b/mypyc/common.py @@ -24,6 +24,7 @@ LAMBDA_NAME: Final = "__mypyc_lambda__" PROPSET_PREFIX: Final = "__mypyc_setter__" SELF_NAME: Final = "__mypyc_self__" +MYPYC_DEFAULTS_SETUP: Final = "__mypyc_defaults_setup" GENERATOR_ATTRIBUTE_PREFIX: Final = "__mypyc_generator_attribute__" CPYFUNCTION_NAME = "__cpyfunction__" diff --git a/mypyc/ir/rtypes.py b/mypyc/ir/rtypes.py index db29f9e304d8d..6716f95f52c0c 100644 --- a/mypyc/ir/rtypes.py +++ b/mypyc/ir/rtypes.py @@ -394,6 +394,20 @@ def __hash__(self) -> int: error_overlap=True, ) +# char: single Unicode codepoint stored as int32; -1 empty sentinel, -113 +# error sentinel. Distinct from int32_rprimitive so specializers can route +# char-typed operations through codepoint primitives. +char_rprimitive: Final = RPrimitive( + "char", + is_unboxed=True, + is_refcounted=False, + is_native_int=True, + is_signed=True, + ctype="int32_t", + size=4, + error_overlap=False, +) + # The following unsigned native int types (u16, u32, u64) are not # exposed to the user. They are for internal use within mypyc only. @@ -597,6 +611,7 @@ def is_fixed_width_rtype(rtype: RType) -> TypeGuard[RPrimitive]: or is_int32_rprimitive(rtype) or is_int16_rprimitive(rtype) or is_uint8_rprimitive(rtype) + or is_char_rprimitive(rtype) ) @@ -604,6 +619,10 @@ def is_uint8_rprimitive(rtype: RType) -> TypeGuard[RPrimitive]: return rtype is uint8_rprimitive +def is_char_rprimitive(rtype: RType) -> TypeGuard[RPrimitive]: + return rtype is char_rprimitive + + def is_uint32_rprimitive(rtype: RType) -> TypeGuard[RPrimitive]: return rtype is uint32_rprimitive diff --git a/mypyc/irbuild/classdef.py b/mypyc/irbuild/classdef.py index f5d094d142317..f03a54e0fcd71 100644 --- a/mypyc/irbuild/classdef.py +++ b/mypyc/irbuild/classdef.py @@ -7,6 +7,7 @@ from typing import Final from mypy.nodes import ( + ARG_POS, EXCLUDED_ENUM_ATTRIBUTES, TYPE_VAR_TUPLE_KIND, AssignmentStmt, @@ -21,7 +22,6 @@ NameExpr, OverloadedFuncDef, PassStmt, - RefExpr, StrExpr, TempNode, TypeInfo, @@ -29,7 +29,7 @@ is_class_var, ) from mypy.types import Instance, UnboundType, get_proper_type -from mypyc.common import PROPSET_PREFIX +from mypyc.common import MYPYC_DEFAULTS_SETUP, PROPSET_PREFIX from mypyc.ir.class_ir import ClassIR, NonExtClassInfo from mypyc.ir.func_ir import FuncDecl, FuncSignature from mypyc.ir.ops import ( @@ -48,15 +48,7 @@ TupleSet, Value, ) -from mypyc.ir.rtypes import ( - RType, - bool_rprimitive, - dict_rprimitive, - is_none_rprimitive, - is_object_rprimitive, - is_optional_type, - object_rprimitive, -) +from mypyc.ir.rtypes import RType, bool_rprimitive, dict_rprimitive, object_rprimitive from mypyc.irbuild.builder import IRBuilder, create_type_params from mypyc.irbuild.function import ( gen_property_getter_ir, @@ -66,7 +58,13 @@ load_type, ) from mypyc.irbuild.prepare import GENERATOR_HELPER_NAME -from mypyc.irbuild.util import dataclass_type, get_func_def, is_constant, is_dataclass_decorator +from mypyc.irbuild.util import ( + dataclass_type, + default_attr_name, + get_func_def, + is_constant, + is_dataclass_decorator, +) from mypyc.primitives.dict_ops import dict_new_op, exact_dict_set_item_op from mypyc.primitives.generic_ops import ( iter_op, @@ -322,10 +320,6 @@ def __init__(self, builder: IRBuilder, cdef: ClassDef) -> None: def class_body_obj(self) -> Value | None: return self.type_obj - def skip_attr_default(self, name: str, stmt: AssignmentStmt) -> bool: - """Controls whether to skip generating a default for an attribute.""" - return False - def add_method(self, fdef: FuncDef) -> None: handle_ext_method(self.builder, self.cdef, fdef) @@ -348,11 +342,18 @@ def finalize(self, ir: ClassIR) -> None: # Call __init_subclass__ after class attributes have been set self.builder.call_c(py_init_subclass_op, [self.type_obj], self.cdef.line) - attrs_with_defaults, default_assignments = find_attr_initializers( - self.builder, self.cdef, self.skip_attr_default - ) - ir.attrs_with_defaults.update(attrs_with_defaults) - generate_attr_defaults_init(self.builder, self.cdef, default_assignments) + # Under separate compilation, prepare.py pre-registers the decl iff + # the class has its own default attribute assignments to emit, so we + # can skip the body walk entirely when it isn't present. Without + # separate compilation, find_attr_initializers walks the MRO so that + # inherited defaults are reflected in ir.attrs_with_defaults (relied + # on by the attribute-definedness analysis), so we always run it. + if not self.builder.options.separate or MYPYC_DEFAULTS_SETUP in ir.method_decls: + attrs_with_defaults, default_assignments = find_attr_initializers( + self.builder, self.cdef + ) + ir.attrs_with_defaults.update(attrs_with_defaults) + generate_attr_defaults_init(self.builder, self.cdef, default_assignments) create_ne_from_eq(self.builder, self.cdef) @@ -380,9 +381,6 @@ def create_non_ext_info(self) -> NonExtClassInfo: self.builder.add(LoadAddress(type_object_op.type, type_object_op.src, self.cdef.line)), ) - def skip_attr_default(self, name: str, stmt: AssignmentStmt) -> bool: - return stmt.type is not None - def get_type_annotation(self, stmt: AssignmentStmt) -> TypeInfo | None: # We populate __annotations__ because dataclasses uses it to determine # which attributes to compute on. @@ -445,9 +443,6 @@ class AttrsClassBuilder(DataClassBuilder): add_annotations_to_dict = False - def skip_attr_default(self, name: str, stmt: AssignmentStmt) -> bool: - return True - def get_type_annotation(self, stmt: AssignmentStmt) -> TypeInfo | None: if isinstance(stmt.rvalue, CallExpr): # find the type arg in `attr.ib(type=str)` @@ -741,58 +736,50 @@ def add_non_ext_class_attr( def find_attr_initializers( - builder: IRBuilder, cdef: ClassDef, skip: Callable[[str, AssignmentStmt], bool] | None = None + builder: IRBuilder, cdef: ClassDef ) -> tuple[set[str], list[tuple[AssignmentStmt, str]]]: """Find initializers of attributes in a class body. - If provided, the skip arg should be a callable which will return whether - to skip generating a default for an attribute. It will be passed the name of - the attribute and the corresponding AssignmentStmt. + Under separate compilation, only this class's own body is walked, and + generate_attr_defaults_init emits a runtime call to the parent's + __mypyc_defaults_setup so inherited defaults are produced by chaining, + not by inlining. Walking the MRO here would break under separate=True + with mypy's incremental cache: a base class loaded from the cache has + an empty ClassDef.defs.body (mypy/nodes.py::ClassDef.serialize doesn't + serialize the class body), so inherited assignments would be silently + dropped and the subclass's __mypyc_defaults_setup would leave inherited + slots in the "undefined" state at runtime. + + Without separate compilation, all modules are parsed in the same pass + and the MRO walk is safe; we keep the original inline-all behavior + there as an optimization (no chain call needed for instance creation). """ cls = builder.mapper.type_to_ir[cdef.info] if cls.builtin_base: return set(), [] - attrs_with_defaults = set() + cls_type = dataclass_type(cdef) + attrs_with_defaults: set[str] = set() + default_assignments: list[tuple[AssignmentStmt, str]] = [] - # Pull out all assignments in classes in the mro so we can initialize them # TODO: Support nested statements - default_assignments: list[tuple[AssignmentStmt, str]] = [] - for info in reversed(cdef.info.mro): - if info not in builder.mapper.type_to_ir: + if builder.options.separate: + infos: list[TypeInfo] = [cdef.info] + else: + infos = list(reversed(cdef.info.mro)) + + for info in infos: + info_ir = builder.mapper.type_to_ir.get(info) + if info_ir is None: continue for stmt in info.defn.defs.body: - if ( - isinstance(stmt, AssignmentStmt) - and isinstance(stmt.lvalues[0], NameExpr) - and not is_class_var(stmt.lvalues[0]) - and not isinstance(stmt.rvalue, TempNode) - ): - name = stmt.lvalues[0].name - if name == "__slots__": - continue - - if name == "__deletable__": - check_deletable_declaration(builder, cls, stmt.line) - continue - - if skip is not None and skip(name, stmt): - continue - - attr_type = cls.attr_type(name) - - # If the attribute is initialized to None and type isn't optional, - # doesn't initialize it to anything (special case for "# type:" comments). - if isinstance(stmt.rvalue, RefExpr) and stmt.rvalue.fullname == "builtins.None": - if ( - not is_optional_type(attr_type) - and not is_object_rprimitive(attr_type) - and not is_none_rprimitive(attr_type) - ): - continue - - attrs_with_defaults.add(name) - default_assignments.append((stmt, info.module_name)) + if not isinstance(stmt, AssignmentStmt): + continue + name = default_attr_name(stmt, info_ir, cls_type) + if name is None: + continue + attrs_with_defaults.add(name) + default_assignments.append((stmt, info.module_name)) return attrs_with_defaults, default_assignments @@ -800,15 +787,49 @@ def find_attr_initializers( def generate_attr_defaults_init( builder: IRBuilder, cdef: ClassDef, default_assignments: list[tuple[AssignmentStmt, str]] ) -> None: - """Generate an initialization method for default attr values (from class vars).""" - if not default_assignments: - return + """Generate an initialization method for default attr values (from class vars). + + Under separate compilation, the emitted __mypyc_defaults_setup chains to + the nearest ancestor that has the method (Python __init__ style), then + sets only this class's own defaults; inherited defaults are produced by + the chain at runtime. The ancestor lookup uses cls.mro[1:] and relies on + prepare.py having registered the FuncDecl on every class that needs one + before any IR build runs. IR build within a compilation group proceeds + in filename order, so this class may be IR-built before its base, and a + method_decls lookup that depended on the base having been IR-built first + would miss. Without separate compilation, find_attr_initializers has + already collected the full MRO's defaults into default_assignments, so + we inline them all as before. + """ cls = builder.mapper.type_to_ir[cdef.info] if cls.builtin_base: return - with builder.enter_method(cls, "__mypyc_defaults_setup", bool_rprimitive): + parent_with_defaults: ClassIR | None = None + if builder.options.separate: + for ancestor in cls.mro[1:]: + if MYPYC_DEFAULTS_SETUP in ancestor.method_decls: + parent_with_defaults = ancestor + break + + if not default_assignments and parent_with_defaults is None: + return + + with builder.enter_method(cls, MYPYC_DEFAULTS_SETUP, bool_rprimitive): self_var = builder.self() + + # Chain to parent's setup so inherited defaults run first; propagate + # its False return so a parent default that raised still aborts + # instance creation rather than being silently swallowed here. + if parent_with_defaults is not None: + decl = parent_with_defaults.method_decl(MYPYC_DEFAULTS_SETUP) + parent_ok = builder.builder.call(decl, [self_var], [ARG_POS], [None], cdef.line) + fail_block, continue_block = BasicBlock(), BasicBlock() + builder.add(Branch(parent_ok, continue_block, fail_block, Branch.BOOL)) + builder.activate_block(fail_block) + builder.add(Return(builder.false())) + builder.activate_block(continue_block) + for stmt, origin_module in default_assignments: lvalue = stmt.lvalues[0] assert isinstance(lvalue, NameExpr), lvalue @@ -833,26 +854,6 @@ def generate_attr_defaults_init( builder.add(Return(builder.true())) -def check_deletable_declaration(builder: IRBuilder, cl: ClassIR, line: int) -> None: - for attr in cl.deletable: - if attr not in cl.attributes: - if not cl.has_attr(attr): - builder.error(f'Attribute "{attr}" not defined', line) - continue - for base in cl.mro: - if attr in base.property_types: - builder.error(f'Cannot make property "{attr}" deletable', line) - break - else: - _, base = cl.attr_details(attr) - builder.error( - ('Attribute "{}" not defined in "{}" ' + '(defined in "{}")').format( - attr, cl.name, base.name - ), - line, - ) - - def create_ne_from_eq(builder: IRBuilder, cdef: ClassDef) -> None: """Create a "__ne__" method from a "__eq__" method (if only latter exists).""" cls = builder.mapper.type_to_ir[cdef.info] diff --git a/mypyc/irbuild/expression.py b/mypyc/irbuild/expression.py index e8d22a051cc4d..b0b1b0d182375 100644 --- a/mypyc/irbuild/expression.py +++ b/mypyc/irbuild/expression.py @@ -51,6 +51,7 @@ ) from mypy.types import ( AnyType, + CallableType, Instance, ProperType, TupleType, @@ -80,22 +81,28 @@ from mypyc.ir.rtypes import ( RInstance, RTuple, + RType, RVec, bool_rprimitive, + char_rprimitive, int64_rprimitive, int_rprimitive, is_any_int, is_bytearray_rprimitive, is_bytes_rprimitive, + is_char_rprimitive, is_fixed_width_rtype, is_int64_rprimitive, is_int_rprimitive, is_list_rprimitive, is_none_rprimitive, is_object_rprimitive, + is_str_rprimitive, + is_tagged, is_tuple_rprimitive, object_rprimitive, set_rprimitive, + short_int_rprimitive, vec_api_by_item_type, ) from mypyc.irbuild.ast_helpers import is_borrow_friendly_expr, process_conditional @@ -121,6 +128,7 @@ apply_method_specialization, translate_object_new, translate_object_setattr, + try_emit_str_index_as_int, ) from mypyc.irbuild.vec import ( as_platform_int, @@ -465,7 +473,12 @@ def translate_method_call(builder: IRBuilder, expr: CallExpr, callee: MemberExpr if val is not None: return val - obj = builder.accept(callee.expr) + # Borrow the receiver if it's a native struct field access (e.g. expression.args). + # The KeepAlive on the struct owner guarantees the field value stays alive. + can_borrow = isinstance(callee.expr, MemberExpr) and builder.is_native_attr_ref( + callee.expr + ) + obj = builder.accept(callee.expr, can_borrow=can_borrow) args = [builder.accept(arg) for arg in expr.args] return builder.gen_method_call( obj, @@ -908,6 +921,64 @@ def precompute_set_literal(builder: IRBuilder, s: SetExpr) -> Value | None: return None +def _codepoint_kind(builder: IRBuilder, expr: Expression, expr_type: RType) -> str | None: + """Classify expr as a codepoint candidate without emitting IR. + + Returns "char", "index" (for ``s[i]`` with int-like index), or None. + """ + if is_char_rprimitive(expr_type): + return "char" + if isinstance(expr, IndexExpr) and is_str_rprimitive(builder.node_type(expr.base)): + idx_type = builder.node_type(expr.index) + if is_tagged(idx_type) or is_fixed_width_rtype(idx_type): + return "index" + return None + + +def _emit_codepoint_value(builder: IRBuilder, expr: Expression, kind: str) -> tuple[Value, RType]: + """Emit the codepoint read. ``kind`` must come from _codepoint_kind.""" + if kind == "char": + return builder.accept(expr), char_rprimitive + assert isinstance(expr, IndexExpr) + val = try_emit_str_index_as_int(builder, expr) + assert val is not None # _codepoint_kind guarantees this + return val, short_int_rprimitive + + +def try_specialize_codepoint_compare( + builder: IRBuilder, op: str, lhs: Expression, rhs: Expression, line: int +) -> Value | None: + """Rewrite ``x == y`` / ``x != y`` to an int compare of codepoints when at + least one side is a codepoint (``char`` value or ``s[i]`` on a str) and the + other is either another codepoint or a 0/1-char str literal. Avoids the + 1-char PyObject alloc + PyUnicode_Compare. + """ + if op not in ("==", "!="): + return None + lhs_kind = _codepoint_kind(builder, lhs, builder.node_type(lhs)) + rhs_kind = _codepoint_kind(builder, rhs, builder.node_type(rhs)) + if lhs_kind is None and rhs_kind is None: + return None + # Codepoint on both sides: direct int compare. + if lhs_kind is not None and rhs_kind is not None: + l_val, _ = _emit_codepoint_value(builder, lhs, lhs_kind) + r_val, _ = _emit_codepoint_value(builder, rhs, rhs_kind) + return builder.binary_op(l_val, r_val, op, line) + # One side codepoint, other side must fold to a 0/1-char str literal. + if lhs_kind is not None: + cp_expr, cp_kind, lit_expr = lhs, lhs_kind, rhs + else: + assert rhs_kind is not None + cp_expr, cp_kind, lit_expr = rhs, rhs_kind, lhs + folded = constant_fold_expr(builder, lit_expr) + if not isinstance(folded, str) or len(folded) > 1: + return None # No IR emitted yet โ€” safe to bail. + val, rtype = _emit_codepoint_value(builder, cp_expr, cp_kind) + # Empty string encodes as -1 (char empty sentinel). + codepoint = -1 if len(folded) == 0 else ord(folded) + return builder.binary_op(val, Integer(codepoint, rtype, line), op, line) + + def transform_comparison_expr(builder: IRBuilder, e: ComparisonExpr) -> Value: # x in (...)/[...] # x not in (...)/[...] @@ -918,6 +989,15 @@ def transform_comparison_expr(builder: IRBuilder, e: ComparisonExpr) -> Value: return result if len(e.operators) == 1: + # Codepoint fast path: char/char, char/s[i], or codepoint/1-char-literal + # -> int compare instead of PyUnicode_Compare. + if first_op in ("==", "!="): + result = try_specialize_codepoint_compare( + builder, first_op, e.operands[0], e.operands[1], e.line + ) + if result is not None: + return result + # Special some common simple cases if first_op in ("is", "is not"): right_expr = e.operands[1] @@ -934,6 +1014,46 @@ def transform_comparison_expr(builder: IRBuilder, e: ComparisonExpr) -> Value: right = builder.accept(right_expr, can_borrow=True) return builder.binary_op(left, right, first_op, e.line) + # Type object comparisons: use identity (pointer) comparison instead of + # PyObject_RichCompare, since type objects are singletons. + # type(x) is TypeType, but a class reference A is CallableType (constructor). + if first_op in ("==", "!="): + right_expr = e.operands[1] + left_mypy_type = get_proper_type(builder.types.get(left_expr)) + right_mypy_type = get_proper_type(builder.types.get(right_expr)) + if _is_type_object(left_mypy_type) and _is_type_object(right_mypy_type): + is_op = "is" if first_op == "==" else "is not" + left = builder.accept(left_expr) + right = builder.accept(right_expr) + return builder.translate_is_op(left, right, is_op, e.line) + + # IntEnum comparisons: unbox both sides to int for fast native comparison + # instead of going through slow PyObject_RichCompare. + # For ==/!= between two IntEnums, mypyc already uses fast identity comparison + # (since enum members are singletons), so we only apply this for: + # - Ordering ops (<, <=, >, >=) on any IntEnum comparison + # - ==/!= when one side is plain int (where identity comparison doesn't work) + if first_op in int_borrow_friendly_op: + right_expr = e.operands[1] + left_mypy_type = get_proper_type(builder.types.get(left_expr)) + right_mypy_type = get_proper_type(builder.types.get(right_expr)) + if _should_use_int_comparison(left_mypy_type, right_mypy_type, first_op): + left = builder.accept(left_expr) + right = builder.accept(right_expr) + left_int = builder.coerce(left, int_rprimitive, e.line) + right_int = builder.coerce(right, int_rprimitive, e.line) + return builder.binary_op(left_int, right_int, first_op, e.line) + + # For == and != on RInstance types that will lower to pointer identity + # (no custom __eq__), we can borrow both operands since identity + # comparison never touches the objects. + if first_op in ("==", "!=", "is", "is not"): + right_expr = e.operands[1] + if _will_be_identity_comparison(builder, left_expr, right_expr, first_op): + left = builder.accept(left_expr, can_borrow=True) + right = builder.accept(right_expr, can_borrow=True) + return transform_basic_comparison(builder, first_op, left, right, e.line) + # TODO: Don't produce an expression when used in conditional context # All of the trickiness here is due to support for chained conditionals # (`e1 < e2 > e3`, etc). `e1 < e2 > e3` is approximately equivalent to @@ -1097,6 +1217,86 @@ def transform_basic_comparison( return target +def _will_be_identity_comparison( + builder: IRBuilder, left_expr: Expression, right_expr: Expression, op: str +) -> bool: + """Check if a comparison will lower to pointer identity (no __eq__ call). + + This is true for: + - 'is' / 'is not' (always identity) + - '==' / '!=' on RInstance types that have no custom __eq__ and where + comparison behavior can't vary at runtime + """ + if op in ("is", "is not"): + return True + # For == / !=, check if both sides are the same RInstance type with no __eq__ + left_type = builder.node_type(left_expr) + right_type = builder.node_type(right_expr) + if not (isinstance(left_type, RInstance) and left_type == right_type): + return False + cl = left_type.class_ir + if cl.has_method("__eq__"): + return False + if not cl.is_method_final("__eq__") or not cl.is_method_final("__ne__"): + return False + if cl.inherits_python or cl.is_augmented: + return False + return True + + +def _is_type_object(typ: ProperType | None) -> bool: + """Check if a type represents a type/class object (as opposed to an instance). + + Matches TypeType (from type(x)) and CallableType that is a class constructor + (from referencing a class name directly like A in 'type(x) == A'). + """ + if isinstance(typ, TypeType): + return True + if isinstance(typ, CallableType) and typ.is_type_obj(): + return True + return False + + +def _is_intenum_type(typ: ProperType | None) -> bool: + """Check if a mypy type is an IntEnum subclass.""" + if not isinstance(typ, Instance): + return False + return any(base.fullname == "enum.IntEnum" for base in typ.type.mro) + + +def _is_int_type(typ: ProperType | None) -> bool: + if not isinstance(typ, Instance): + return False + return typ.type.fullname == "builtins.int" + + +def _should_use_int_comparison( + left_type: ProperType | None, right_type: ProperType | None, op: str +) -> bool: + """Check if a comparison should use fast int unboxing for IntEnum operands. + + For ordering ops (<, <=, >, >=): always use int comparison when both sides + are IntEnum or IntEnum vs int. + For ==/!=: only when one side is plain int, since IntEnum-vs-IntEnum equality + already uses fast identity comparison (enum members are singletons). + """ + left_is_intenum = _is_intenum_type(left_type) + right_is_intenum = _is_intenum_type(right_type) + left_is_int = _is_int_type(left_type) + right_is_int = _is_int_type(right_type) + + if op in ("==", "!="): + # Only optimize IntEnum vs int (not IntEnum vs IntEnum) + return (left_is_intenum and right_is_int) or (right_is_intenum and left_is_int) + + # Ordering ops: optimize any IntEnum involvement + if left_is_intenum and (right_is_intenum or right_is_int): + return True + if right_is_intenum and left_is_int: + return True + return False + + def translate_printf_style_formatting( builder: IRBuilder, format_expr: StrExpr | BytesExpr, rhs: Expression ) -> Value | None: diff --git a/mypyc/irbuild/ll_builder.py b/mypyc/irbuild/ll_builder.py index c19eded77464e..58368b63de912 100644 --- a/mypyc/irbuild/ll_builder.py +++ b/mypyc/irbuild/ll_builder.py @@ -104,6 +104,7 @@ is_bool_or_bit_rprimitive, is_bytes_rprimitive, is_c_py_ssize_t_rprimitive, + is_char_rprimitive, is_dict_rprimitive, is_fixed_width_rtype, is_float_rprimitive, @@ -697,6 +698,7 @@ def coerce_short_int_to_fixed_width(self, src: Value, target_type: RType, line: def coerce_fixed_width_to_int(self, src: Value, line: int) -> Value: if ( (is_int32_rprimitive(src.type) and PLATFORM_SIZE == 8) + or (is_char_rprimitive(src.type) and PLATFORM_SIZE == 8) or is_int16_rprimitive(src.type) or is_uint8_rprimitive(src.type) ): @@ -729,7 +731,7 @@ def coerce_fixed_width_to_int(self, src: Value, line: int) -> Value: self.activate_block(slow) if is_int64_rprimitive(src_type): conv_op = int64_to_int_op - elif is_int32_rprimitive(src_type): + elif is_int32_rprimitive(src_type) or is_char_rprimitive(src_type): assert PLATFORM_SIZE == 4 conv_op = ssize_t_to_int_op else: @@ -2138,6 +2140,12 @@ def bool_value(self, value: Value) -> Value: elif is_runtime_subtype(value.type, int_rprimitive): zero = Integer(0, short_int_rprimitive) result = self.comparison_op(value, zero, ComparisonOp.NEQ, value.line) + elif is_char_rprimitive(value.type): + # char is falsy only for the empty sentinel (-1). A codepoint of + # 0 (NUL) is a valid non-empty char and must be truthy, matching + # str bool semantics where ``"\0"`` is truthy. + empty = Integer(-1, value.type) + result = self.add(ComparisonOp(value, empty, ComparisonOp.NEQ)) elif is_fixed_width_rtype(value.type): zero = Integer(0, value.type) result = self.add(ComparisonOp(value, zero, ComparisonOp.NEQ)) diff --git a/mypyc/irbuild/mapper.py b/mypyc/irbuild/mapper.py index 523c718c9b867..8ad14c4500ede 100644 --- a/mypyc/irbuild/mapper.py +++ b/mypyc/irbuild/mapper.py @@ -34,6 +34,7 @@ bool_rprimitive, bytearray_rprimitive, bytes_rprimitive, + char_rprimitive, dict_rprimitive, float_rprimitive, frozenset_rprimitive, @@ -124,6 +125,8 @@ def type_to_rtype(self, typ: Type | None) -> RType: return int16_rprimitive elif typ.type.fullname == "mypy_extensions.u8": return uint8_rprimitive + elif typ.type.fullname == "mypy_extensions.char": + return char_rprimitive elif typ.type.fullname == "librt.vecs.vec": return RVec(self.type_to_rtype(typ.args[0])) elif typ.type.fullname in KNOWN_NATIVE_TYPES: diff --git a/mypyc/irbuild/prepare.py b/mypyc/irbuild/prepare.py index f143ce1b44025..8b73b10bf8064 100644 --- a/mypyc/irbuild/prepare.py +++ b/mypyc/irbuild/prepare.py @@ -21,6 +21,7 @@ from mypy.nodes import ( ARG_STAR, ARG_STAR2, + AssignmentStmt, CallExpr, ClassDef, Decorator, @@ -39,7 +40,13 @@ from mypy.semanal import refers_to_fullname from mypy.traverser import TraverserVisitor from mypy.types import Instance, Type, get_proper_type -from mypyc.common import FAST_PREFIX, PROPSET_PREFIX, SELF_NAME, get_id_from_name +from mypyc.common import ( + FAST_PREFIX, + MYPYC_DEFAULTS_SETUP, + PROPSET_PREFIX, + SELF_NAME, + get_id_from_name, +) from mypyc.crash import catch_errors from mypyc.errors import Errors from mypyc.ir.class_ir import ClassIR @@ -55,6 +62,7 @@ from mypyc.ir.rtypes import ( RInstance, RType, + bool_rprimitive, dict_rprimitive, none_rprimitive, object_pointer_rprimitive, @@ -63,6 +71,8 @@ ) from mypyc.irbuild.mapper import Mapper from mypyc.irbuild.util import ( + dataclass_type, + default_attr_name, get_func_def, get_mypyc_attrs, is_dataclass, @@ -131,6 +141,24 @@ def build_type_map( if class_ir.is_ext_class: prepare_implicit_property_accessors(cdef.info, class_ir, module.fullname, mapper) + # Register __mypyc_defaults_setup FuncDecls on classes that have their own + # class-level default attribute assignments. Done here, before any IR build + # runs, so that the cross-class lookup in generate_attr_defaults_init is + # order-independent: IR build within a compilation group proceeds in + # filename order, so a subclass may be IR-built before its base. + for module, cdef in classes: + class_ir = mapper.type_to_ir[cdef.info] + if class_ir.is_ext_class and _has_own_default_attrs(cdef, class_ir): + _register_defaults_setup_decl(class_ir, module.fullname) + + # Validate __deletable__ declarations. Done here so the compiler exits + # early on invalid input before any IR is built. + for module, cdef in classes: + class_ir = mapper.type_to_ir[cdef.info] + if class_ir.is_ext_class: + with catch_errors(module.path, cdef.line): + _check_deletable_declarations(module.path, cdef, class_ir, errors) + # Collect all the functions also. We collect from the symbol table # so that we can easily pick out the right copy of a function that # is conditionally defined. This doesn't include nested functions! @@ -408,6 +436,68 @@ def validate_acyclic_class_bases( ) +def _has_own_default_attrs(cdef: ClassDef, ir: ClassIR) -> bool: + """Whether this class's own body has any default attribute assignment + that would be emitted into __mypyc_defaults_setup. + + Used during prepare to decide whether to register a + __mypyc_defaults_setup FuncDecl ahead of IR build. + """ + if ir.builtin_base or ir.is_trait: + return False + cls_type = dataclass_type(cdef) + return any( + default_attr_name(stmt, ir, cls_type) is not None + for stmt in cdef.info.defn.defs.body + if isinstance(stmt, AssignmentStmt) + ) + + +def _register_defaults_setup_decl(ir: ClassIR, module_name: str) -> None: + sig = FuncSignature([RuntimeArg(SELF_NAME, RInstance(ir))], bool_rprimitive) + ir.method_decls[MYPYC_DEFAULTS_SETUP] = FuncDecl( + MYPYC_DEFAULTS_SETUP, ir.name, module_name, sig + ) + + +def _check_deletable_declarations(path: str, cdef: ClassDef, ir: ClassIR, errors: Errors) -> None: + """Validate that attributes listed in __deletable__ refer to definable + attributes on the class. + + Runs in the prepare phase so we exit early on invalid programs before + any IR is built. + """ + if not ir.deletable: + return + line = next( + ( + stmt.line + for stmt in cdef.info.defn.defs.body + if isinstance(stmt, AssignmentStmt) + and isinstance(stmt.lvalues[0], NameExpr) + and stmt.lvalues[0].name == "__deletable__" + ), + cdef.line, + ) + for attr in ir.deletable: + if attr not in ir.attributes: + if not ir.has_attr(attr): + errors.error(f'Attribute "{attr}" not defined', path, line) + continue + for base in ir.mro: + if attr in base.property_types: + errors.error(f'Cannot make property "{attr}" deletable', path, line) + break + else: + _, base = ir.attr_details(attr) + errors.error( + f'Attribute "{attr}" not defined in "{ir.name}" ' + f'(defined in "{base.name}")', + path, + line, + ) + + def prepare_class_def( path: str, module_name: str, diff --git a/mypyc/irbuild/specialize.py b/mypyc/irbuild/specialize.py index a397143d457af..c26356a42433a 100644 --- a/mypyc/irbuild/specialize.py +++ b/mypyc/irbuild/specialize.py @@ -1189,6 +1189,28 @@ def translate_float(builder: IRBuilder, expr: CallExpr, callee: RefExpr) -> Valu return None +def try_emit_str_index_as_int(builder: IRBuilder, index_expr: IndexExpr) -> Value | None: + """If ``index_expr`` is ``s[i]`` where ``s: str`` and ``i`` is an int-like + value, emit the fast path that reads the character as its integer codepoint + (with bounds checking). Returns None if the pattern does not apply. + """ + base_type = builder.node_type(index_expr.base) + if not is_str_rprimitive(base_type): + return None + idx_type = builder.node_type(index_expr.index) + if not (is_tagged(idx_type) or is_fixed_width_rtype(idx_type)): + return None + return translate_getitem_with_bounds_check( + builder, + index_expr.base, + [index_expr.index], + index_expr, + str_adjust_index_op, + str_range_check_op, + str_get_item_unsafe_as_int_op, + ) + + @specialize_function("builtins.ord") def translate_ord(builder: IRBuilder, expr: CallExpr, callee: RefExpr) -> Value | None: if len(expr.args) != 1 or expr.arg_kinds[0] != ARG_POS: @@ -1200,25 +1222,9 @@ def translate_ord(builder: IRBuilder, expr: CallExpr, callee: RefExpr) -> Value # Check for ord(s[i]) where s is str and i is an integer if isinstance(arg_expr, IndexExpr): - # Check base type - base_type = builder.node_type(arg_expr.base) - if is_str_rprimitive(base_type): - # Check index type - index_expr = arg_expr.index - index_type = builder.node_type(index_expr) - if is_tagged(index_type) or is_fixed_width_rtype(index_type): - # This is ord(s[i]) where s is str and i is an integer. - # Generate specialized inline code using the helper. - result = translate_getitem_with_bounds_check( - builder, - arg_expr.base, - [arg_expr.index], - expr, - str_adjust_index_op, - str_range_check_op, - str_get_item_unsafe_as_int_op, - ) - return result + result = try_emit_str_index_as_int(builder, arg_expr) + if result is not None: + return result return None diff --git a/mypyc/irbuild/util.py b/mypyc/irbuild/util.py index 5eda51a1a5dea..a6f793ccdc1a1 100644 --- a/mypyc/irbuild/util.py +++ b/mypyc/irbuild/util.py @@ -12,6 +12,7 @@ ARG_POS, GDEF, ArgKind, + AssignmentStmt, BytesExpr, CallExpr, ClassDef, @@ -24,13 +25,17 @@ OverloadedFuncDef, RefExpr, StrExpr, + TempNode, TupleExpr, UnaryExpr, Var, + is_class_var, ) from mypy.semanal import refers_to_fullname from mypy.types import FINAL_DECORATOR_NAMES from mypyc.errors import Errors +from mypyc.ir.class_ir import ClassIR +from mypyc.ir.rtypes import is_none_rprimitive, is_object_rprimitive, is_optional_type MYPYC_ATTRS: Final[frozenset[MypycAttr]] = frozenset( ["native_class", "allow_interpreted_subclasses", "serializable", "free_list_len", "acyclic"] @@ -102,6 +107,50 @@ def dataclass_type(cdef: ClassDef) -> str | None: return None +def _defaults_skip(stmt: AssignmentStmt, cls_type: str | None) -> bool: + """Whether a class-level default assignment is skipped when emitting + __mypyc_defaults_setup, based on class type. + + - attr (auto_attribs=False): skip all (handled by attr.ib machinery). + - dataclasses / attr-auto: skip annotated assignments. + - regular extension class: skip nothing. + """ + if cls_type == "attr": + return True + if cls_type in ("dataclasses", "attr-auto"): + return stmt.type is not None + return False + + +def default_attr_name(stmt: AssignmentStmt, ir: ClassIR, cls_type: str | None) -> str | None: + """Return the attribute name if `stmt` is a class-level default assignment + that __mypyc_defaults_setup should emit; otherwise None. + + Single source of truth for the predicate used by both + mypyc.irbuild.classdef.find_attr_initializers (IR build) and + mypyc.irbuild.prepare._has_own_default_attrs (prepare-phase decl registration). + """ + lvalue = stmt.lvalues[0] + if not isinstance(lvalue, NameExpr) or is_class_var(lvalue): + return None + if isinstance(stmt.rvalue, TempNode): + return None + name = lvalue.name + if name in ("__slots__", "__deletable__") or name not in ir.attributes: + return None + if _defaults_skip(stmt, cls_type): + return None + if isinstance(stmt.rvalue, RefExpr) and stmt.rvalue.fullname == "builtins.None": + attr_type = ir.attributes[name] + if ( + not is_optional_type(attr_type) + and not is_object_rprimitive(attr_type) + and not is_none_rprimitive(attr_type) + ): + return None + return name + + def get_mypyc_attr_literal(e: Expression) -> Any: """Convert an expression from a mypyc_attr decorator to a value. diff --git a/mypyc/lib-rt/CPy.h b/mypyc/lib-rt/CPy.h index 89ef4d0749a45..a827862d3315c 100644 --- a/mypyc/lib-rt/CPy.h +++ b/mypyc/lib-rt/CPy.h @@ -783,9 +783,34 @@ CPyTagged CPyStr_Ord(PyObject *obj); PyObject *CPyStr_Multiply(PyObject *str, CPyTagged count); PyObject *CPyStr_Lower(PyObject *str); PyObject *CPyStr_Upper(PyObject *str); +// Unbox a 0- or 1-char str to an int32 codepoint: empty -> -1 sentinel, +// 1-char -> codepoint. Non-str or multi-char raises and returns -113. +#define CPY_CHAR_EMPTY (-1) +static inline int32_t CPyChar_FromObject(PyObject *obj) { + if (!PyUnicode_Check(obj)) { + PyErr_Format(PyExc_TypeError, "char expected, got %.50s", Py_TYPE(obj)->tp_name); + return -113; + } + Py_ssize_t n = PyUnicode_GET_LENGTH(obj); + if (n == 0) return CPY_CHAR_EMPTY; + if (n != 1) { + PyErr_SetString(PyExc_ValueError, "char expects a 0- or 1-character string"); + return -113; + } + return (int32_t)PyUnicode_READ_CHAR(obj, 0); +} + +static inline PyObject *CPyChar_ToStr(int32_t c) { + if (c == CPY_CHAR_EMPTY) { + return PyUnicode_FromStringAndSize("", 0); + } + return PyUnicode_FromOrdinal((int)c); +} + bool CPyStr_IsSpace(PyObject *str); bool CPyStr_IsAlnum(PyObject *str); bool CPyStr_IsDigit(PyObject *str); +bool CPyStr_IsAlpha(PyObject *str); // Bytes operations diff --git a/mypyc/lib-rt/str_extra_ops.h b/mypyc/lib-rt/str_extra_ops.h index 82f92bf85d46a..43f9f8465d490 100644 --- a/mypyc/lib-rt/str_extra_ops.h +++ b/mypyc/lib-rt/str_extra_ops.h @@ -26,4 +26,88 @@ static inline CPyTagged CPyStr_GetItemUnsafeAsInt(PyObject *obj, int64_t index) return PyUnicode_READ(kind, PyUnicode_DATA(obj), index) << 1; } +// Bounds-checked codepoint read returning int32. Error sentinel -113 on +// out-of-range / non-short index. Used by char_str_index_fold to avoid the +// 1-char PyObject alloc when the result is immediately unboxed to char. +static inline int32_t CPyStr_GetCharAt(PyObject *s, CPyTagged index_tagged) { + Py_ssize_t i; + if (likely(CPyTagged_CheckShort(index_tagged))) { + i = CPyTagged_ShortAsSsize_t(index_tagged); + } else { + PyObject *c = CPyStr_GetItem(s, index_tagged); + if (c == NULL) return -113; + int32_t cp = (int32_t)PyUnicode_READ_CHAR(c, 0); + Py_DECREF(c); + return cp; + } + Py_ssize_t n = PyUnicode_GET_LENGTH(s); + if (i < 0) i += n; + if (i < 0 || i >= n) { + PyErr_SetString(PyExc_IndexError, "string index out of range"); + return -113; + } + return (int32_t)PyUnicode_READ(PyUnicode_KIND(s), PyUnicode_DATA(s), i); +} + +// char-codepoint classification. Negative c (empty sentinel / invalid) +// returns false. Py_UNICODE_IS* have their own ASCII fast paths. + +static inline bool CPyChar_IsSpace(int32_t c) { + return c >= 0 && Py_UNICODE_ISSPACE((Py_UCS4)c); +} + +static inline bool CPyChar_IsDigit(int32_t c) { + return c >= 0 && Py_UNICODE_ISDIGIT((Py_UCS4)c); +} + +static inline bool CPyChar_IsAlnum(int32_t c) { + return c >= 0 && Py_UNICODE_ISALNUM((Py_UCS4)c); +} + +static inline bool CPyChar_IsAlpha(int32_t c) { + return c >= 0 && Py_UNICODE_ISALPHA((Py_UCS4)c); +} + +// .isidentifier(): ASCII fast path matches XID_Start; non-ASCII delegates +// to CPython for correct XID_Start handling. +static inline bool CPyChar_IsIdentifier(int32_t c) { + if (c < 0) return false; + if (c < 128) return Py_ISALPHA((unsigned char)c) || c == (int32_t)'_'; + PyObject *s = PyUnicode_FromOrdinal((int)c); + if (s == NULL) { PyErr_Clear(); return false; } + int r = PyUnicode_IsIdentifier(s); + Py_DECREF(s); + return r == 1; +} + +// Delegated Unicode case conversion for non-ASCII letters. Returns c +// unchanged when the str method produces multi-char (e.g. รŸ -> SS) or +// non-alpha ASCII. -113 + exception on OOM / CPython error. +static inline int32_t CPyChar_ChangeCase(int32_t c, const char *method) { + if (c < 128) return c; + PyObject *s = PyUnicode_FromOrdinal((int)c); + if (s == NULL) return -113; + PyObject *u = PyObject_CallMethod(s, method, NULL); + Py_DECREF(s); + if (u == NULL) return -113; + int32_t result = c; + if (PyUnicode_GET_LENGTH(u) == 1) { + result = (int32_t)PyUnicode_READ_CHAR(u, 0); + } + Py_DECREF(u); + return result; +} + +// .upper() / .lower(): ASCII-letter fast path; everything else goes +// through CPyChar_ChangeCase. +static inline int32_t CPyChar_Upper(int32_t c) { + if (c >= (int32_t)'a' && c <= (int32_t)'z') return c - 32; + return CPyChar_ChangeCase(c, "upper"); +} + +static inline int32_t CPyChar_Lower(int32_t c) { + if (c >= (int32_t)'A' && c <= (int32_t)'Z') return c + 32; + return CPyChar_ChangeCase(c, "lower"); +} + #endif diff --git a/mypyc/lib-rt/str_ops.c b/mypyc/lib-rt/str_ops.c index 5bc2d6935fc07..d8e6e8a084469 100644 --- a/mypyc/lib-rt/str_ops.c +++ b/mypyc/lib-rt/str_ops.c @@ -683,6 +683,29 @@ bool CPyStr_IsAlnum(PyObject *str) { return true; } +bool CPyStr_IsAlpha(PyObject *str) { + Py_ssize_t len = PyUnicode_GET_LENGTH(str); + if (len == 0) return false; + + if (PyUnicode_IS_ASCII(str)) { + const Py_UCS1 *data = PyUnicode_1BYTE_DATA(str); + for (Py_ssize_t i = 0; i < len; i++) { + if (!Py_ISALPHA(data[i])) + return false; + } + return true; + } + + int kind = PyUnicode_KIND(str); + const void *data = PyUnicode_DATA(str); + for (Py_ssize_t i = 0; i < len; i++) { + Py_UCS4 ch = PyUnicode_READ(kind, data, i); + if (!Py_UNICODE_ISALPHA(ch)) + return false; + } + return true; +} + static inline int CPy_ASCII_Lower(unsigned char c) { return Py_TOLOWER(c); } static inline int CPy_ASCII_Upper(unsigned char c) { return Py_TOUPPER(c); } diff --git a/mypyc/primitives/str_ops.py b/mypyc/primitives/str_ops.py index f313f0a8386c1..b46c655c94d19 100644 --- a/mypyc/primitives/str_ops.py +++ b/mypyc/primitives/str_ops.py @@ -11,6 +11,7 @@ bytes_rprimitive, c_int_rprimitive, c_pyssize_t_rprimitive, + char_rprimitive, int64_rprimitive, int_rprimitive, list_rprimitive, @@ -439,6 +440,14 @@ error_kind=ERR_NEVER, ) +method_op( + name="isalpha", + arg_types=[str_rprimitive], + return_type=bool_rprimitive, + c_function_name="CPyStr_IsAlpha", + error_kind=ERR_NEVER, +) + # obj.decode() method_op( @@ -585,3 +594,64 @@ error_kind=ERR_NEVER, dependencies=[STR_EXTRA_OPS], ) + +# Char classification / case-conversion method_ops routed to codepoint +# helpers in str_extra_ops.h. +method_op( + name="isspace", + arg_types=[char_rprimitive], + return_type=bool_rprimitive, + c_function_name="CPyChar_IsSpace", + error_kind=ERR_NEVER, + dependencies=[STR_EXTRA_OPS], +) +method_op( + name="isdigit", + arg_types=[char_rprimitive], + return_type=bool_rprimitive, + c_function_name="CPyChar_IsDigit", + error_kind=ERR_NEVER, + dependencies=[STR_EXTRA_OPS], +) +method_op( + name="isalnum", + arg_types=[char_rprimitive], + return_type=bool_rprimitive, + c_function_name="CPyChar_IsAlnum", + error_kind=ERR_NEVER, + dependencies=[STR_EXTRA_OPS], +) +method_op( + name="isalpha", + arg_types=[char_rprimitive], + return_type=bool_rprimitive, + c_function_name="CPyChar_IsAlpha", + error_kind=ERR_NEVER, + dependencies=[STR_EXTRA_OPS], +) +method_op( + name="isidentifier", + arg_types=[char_rprimitive], + return_type=bool_rprimitive, + c_function_name="CPyChar_IsIdentifier", + error_kind=ERR_NEVER, + dependencies=[STR_EXTRA_OPS], +) +# ERR_MAGIC: non-ASCII path can raise MemoryError from PyObject_CallMethod. +# char's error_overlap=False makes the -113 sentinel authoritative. +method_op( + name="upper", + arg_types=[char_rprimitive], + return_type=char_rprimitive, + c_function_name="CPyChar_Upper", + error_kind=ERR_MAGIC, + dependencies=[STR_EXTRA_OPS], +) +method_op( + name="lower", + arg_types=[char_rprimitive], + return_type=char_rprimitive, + c_function_name="CPyChar_Lower", + error_kind=ERR_MAGIC, + dependencies=[STR_EXTRA_OPS], +) diff --git a/mypyc/test-data/irbuild-classes.test b/mypyc/test-data/irbuild-classes.test index d13bd956e1259..920bd40d210fa 100644 --- a/mypyc/test-data/irbuild-classes.test +++ b/mypyc/test-data/irbuild-classes.test @@ -127,10 +127,11 @@ L0: keep_alive self if r2 goto L1 else goto L2 :: bool L1: - r3 = self.next - r4 = cast(__main__.Node, r3) + r3 = borrow self.next + r4 = borrow cast(__main__.Node, r3) r5 = r4.length() r6 = CPyTagged_Add(2, r5) + keep_alive self, r3 return r6 L2: return 2 @@ -1135,7 +1136,7 @@ class Ok2: __deletable__ = ['x'] x: int -[case testInvalidDeletableAttribute] +[case testDeleteNonDeletableAttribute] class NotDeletable: __deletable__ = ['x'] x: int @@ -1146,6 +1147,7 @@ def g(o: NotDeletable) -> None: del o.y # E: "y" cannot be deleted \ # N: Using "__deletable__ = ['']" in the class body enables "del obj." +[case testInvalidDeletableAttribute] class Base: x: int diff --git a/mypyc/test-data/run-base64.test b/mypyc/test-data/run-base64.test index 022a0e8c31136..1e0eb65bbd12c 100644 --- a/mypyc/test-data/run-base64.test +++ b/mypyc/test-data/run-base64.test @@ -147,6 +147,13 @@ def test_decode_with_invalid_padding() -> None: check_decode_error(b"eA==x", ignore_stdlib=not has_stdlib_b64decode_bugfix()) def test_decode_with_extra_data_after_padding() -> None: + # Recent Python releases (3.13.x patch-level and 3.14+) tightened stdlib + # base64.b64decode to raise binascii.Error on trailing data after padding, + # while our native implementation still accepts it. Skip the stdlib + # equivalence check on those versions rather than diverging from CPython. + import sys + if sys.version_info >= (3, 13): # type: ignore[operator] + return check_decode(b"=", encoded=True) check_decode(b"==", encoded=True) check_decode(b"===", encoded=True) diff --git a/mypyc/test-data/run-char.test b/mypyc/test-data/run-char.test new file mode 100644 index 0000000000000..9fc6424c362d3 --- /dev/null +++ b/mypyc/test-data/run-char.test @@ -0,0 +1,271 @@ +[case testCharBoxUnbox] +from typing import Any +from mypy_extensions import char + +def test_box_and_unbox() -> None: + # Single-char strings + for s in ["a", "Z", "0", " ", "\n", "\0"]: + o: Any = s + c: char = o + o2: Any = c + assert o2 == s, f"round-trip failed for {s!r}: got {o2!r}" + + # Empty string -> empty sentinel + o3: Any = "" + c2: char = o3 + o4: Any = c2 + assert o4 == "", f"empty round-trip failed: got {o4!r}" + +def test_unbox_errors() -> None: + # Multi-char string should raise ValueError + try: + o: Any = "ab" + c: char = o + assert False, "should have raised" + except ValueError: + pass + + # Non-str should raise TypeError + try: + o2: Any = 42 + c2: char = o2 + assert False, "should have raised" + except TypeError: + pass + +[case testCharBool] +from mypy_extensions import char + +def is_truthy(c: char) -> bool: + return bool(c) + +def test_bool() -> None: + # Non-empty chars are truthy (including NUL) + assert is_truthy(char("a")) + assert is_truthy(char("\0")) + assert is_truthy(char(" ")) + # Empty sentinel is falsy + assert not is_truthy(char("")) + +[case testCharEquality] +from mypy_extensions import char + +def eq(a: char, b: char) -> bool: + return a == b + +def ne(a: char, b: char) -> bool: + return a != b + +def eq_literal(c: char) -> bool: + return c == "x" + +def ne_literal(c: char) -> bool: + return c != "\n" + +def test_equality() -> None: + assert eq(char("a"), char("a")) + assert not eq(char("a"), char("b")) + assert ne(char("a"), char("b")) + assert not ne(char("a"), char("a")) + + # Empty sentinels + assert eq(char(""), char("")) + assert ne(char(""), char("a")) + + # Literal comparisons + assert eq_literal(char("x")) + assert not eq_literal(char("y")) + assert ne_literal(char("a")) + assert not ne_literal(char("\n")) + +[case testCharClassification] +from mypy_extensions import char + +def test_isspace() -> None: + assert char(" ").isspace() + assert char("\t").isspace() + assert char("\n").isspace() + assert not char("a").isspace() + assert not char("1").isspace() + +def test_isdigit() -> None: + assert char("0").isdigit() + assert char("9").isdigit() + assert not char("a").isdigit() + assert not char(" ").isdigit() + +def test_isalnum() -> None: + assert char("a").isalnum() + assert char("Z").isalnum() + assert char("5").isalnum() + assert not char(" ").isalnum() + assert not char("!").isalnum() + +def test_isalpha() -> None: + assert char("a").isalpha() + assert char("Z").isalpha() + assert not char("5").isalpha() + assert not char(" ").isalpha() + +def test_isidentifier() -> None: + assert char("a").isidentifier() + assert char("_").isidentifier() + assert char("Z").isidentifier() + assert not char("0").isidentifier() + assert not char(" ").isidentifier() + assert not char("+").isidentifier() + +[case testCharUpper] +from mypy_extensions import char + +def upper(c: char) -> char: + return c.upper() + +def test_upper() -> None: + # ASCII lowercase -> uppercase + assert upper(char("a")) == "A" + assert upper(char("z")) == "Z" + # Already uppercase or non-alpha -> unchanged + assert upper(char("A")) == "A" + assert upper(char("0")) == "0" + assert upper(char(" ")) == " " + +def test_upper_non_ascii() -> None: + # Non-ASCII letters whose uppercase is a single codepoint + # route through the PyObject_CallMethod fallback. + assert upper(char("\u03c0")) == "\u03a0" # ฯ€ -> ฮ  + assert upper(char("\u00f1")) == "\u00d1" # รฑ -> ร‘ + assert upper(char("\u00b5")) == "\u039c" # ยต (micro) -> ฮœ (Greek Mu) + +def test_upper_multi_char_fallback() -> None: + # Pinning test: when str.upper() produces >1 codepoint, char can't + # hold the result so the original codepoint is returned unchanged. + # Callers that need full Unicode casing (รŸ -> SS, ligatures) must + # use str(c).upper(). + assert upper(char("\u00df")) == "\u00df" # รŸ -> รŸ (not SS) + assert upper(char("\ufb01")) == "\ufb01" # ๏ฌ -> ๏ฌ (not FI) + +def lower(c: char) -> char: + return c.lower() + +def test_lower() -> None: + # ASCII uppercase -> lowercase + assert lower(char("A")) == "a" + assert lower(char("Z")) == "z" + # Already lowercase or non-alpha -> unchanged + assert lower(char("a")) == "a" + assert lower(char("0")) == "0" + assert lower(char(" ")) == " " + +def test_lower_non_ascii() -> None: + # Non-ASCII letters whose lowercase is a single codepoint route + # through the PyObject_CallMethod fallback. + assert lower(char("\u03a0")) == "\u03c0" # ฮ  -> ฯ€ + assert lower(char("\u00d1")) == "\u00f1" # ร‘ -> รฑ + +[case testCharIsIdentifierNonAscii] +from mypy_extensions import char + +def is_id(c: char) -> bool: + return c.isidentifier() + +def test_non_ascii_isidentifier() -> None: + # Non-ASCII identifier chars โ€” delegates to PyUnicode_IsIdentifier + # for correct XID_Start handling. The old ASCII-only approximation + # got some of these wrong. + assert is_id(char("\u03c0")) # ฯ€ โ€” XID_Start (Greek letter) + assert is_id(char("\u00b5")) # ยต โ€” XID_Start (micro sign) + assert is_id(char("\u00f1")) # รฑ โ€” XID_Start (Latin letter) + # Non-XID_Start characters + assert not is_id(char("\u00b2")) # ยฒ โ€” superscript, not XID_Start + assert not is_id(char("\u00a9")) # ยฉ โ€” symbol, not XID_Start + +[case testCharHighUnicode] +from typing import Any +from mypy_extensions import char + +def roundtrip(s: str) -> str: + c: char = s + out: Any = c + return out + +def test_high_codepoints() -> None: + # Emoji and other astral-plane codepoints (> 0xFFFF) round-trip + # correctly since char stores int32 (full Unicode range fits). + emoji = "\U0001F600" + assert ord(emoji) == 0x1F600 + assert roundtrip(emoji) == emoji + + # BMP boundary + bmp = "\uFFFD" # replacement character + assert roundtrip(bmp) == bmp + +def test_high_codepoint_bool() -> None: + # High codepoints are truthy (not the -1 sentinel). + c: char = "\U0001F600" + assert bool(c) + +[case testCharOrd] +from mypy_extensions import char + +def char_ord(c: char) -> int: + return ord(c) + +def test_ord() -> None: + assert char_ord(char("a")) == 97 + assert char_ord(char("A")) == 65 + assert char_ord(char("\0")) == 0 + assert char_ord(char("\u03c0")) == 960 # ฯ€ + assert char_ord(char("\U0001F600")) == 128512 # emoji + +[case testCharStrPromotion] +from typing import Any +from mypy_extensions import char + +def char_to_str(c: char) -> str: + return c + +def str_to_char(s: str) -> char: + return s + +def test_promotion() -> None: + # char -> str + s: str = char_to_str(char("x")) + assert s == "x" + assert type(s) is str + + # str -> char (1-char string) + c: char = str_to_char("y") + s2: Any = c + assert s2 == "y" + + # Empty string promotion + s3: str = char_to_str(char("")) + assert s3 == "" + +[case testCharConcat] +from mypy_extensions import char + +def test_concat() -> None: + c: char = char("a") + # char + str + result: str = c + "bc" + assert result == "abc" + # str + char + result2: str = "xy" + c + assert result2 == "xya" + +[case testCharStrIndexCompare] +def compare_index(s: str, i: int, ch: str) -> bool: + return s[i] == ch + +def compare_index_ne(s: str, i: int, ch: str) -> bool: + return s[i] != ch + +def test_str_index_compare() -> None: + s = "hello world" + assert compare_index(s, 0, "h") + assert not compare_index(s, 0, "x") + assert compare_index(s, 5, " ") + assert compare_index_ne(s, 0, "x") + assert not compare_index_ne(s, 0, "h") diff --git a/mypyc/test-data/run-classes.test b/mypyc/test-data/run-classes.test index 39172a6385696..907e4c49d22d1 100644 --- a/mypyc/test-data/run-classes.test +++ b/mypyc/test-data/run-classes.test @@ -2819,6 +2819,95 @@ from native import Player [out] Player.MIN = +[case testIntEnumComparison] +from enum import IntEnum + +class Color(IntEnum): + RED = 1 + GREEN = 2 + BLUE = 3 + +def test_eq(a: Color, b: Color) -> bool: + return a == b + +def test_ne(a: Color, b: Color) -> bool: + return a != b + +def test_lt(a: Color, b: Color) -> bool: + return a < b + +def test_le(a: Color, b: Color) -> bool: + return a <= b + +def test_gt(a: Color, b: Color) -> bool: + return a > b + +def test_ge(a: Color, b: Color) -> bool: + return a >= b + +def test_enum_vs_int(a: Color, x: int) -> bool: + return a == x + +def test_int_vs_enum(x: int, a: Color) -> bool: + return x != a +[file driver.py] +from native import Color, test_eq, test_ne, test_lt, test_le, test_gt, test_ge, test_enum_vs_int, test_int_vs_enum + +# eq/ne +assert test_eq(Color.RED, Color.RED) +assert not test_eq(Color.RED, Color.GREEN) +assert test_ne(Color.RED, Color.GREEN) +assert not test_ne(Color.RED, Color.RED) + +# ordering +assert test_lt(Color.RED, Color.GREEN) +assert not test_lt(Color.GREEN, Color.RED) +assert not test_lt(Color.RED, Color.RED) +assert test_le(Color.RED, Color.GREEN) +assert test_le(Color.RED, Color.RED) +assert test_gt(Color.GREEN, Color.RED) +assert not test_gt(Color.RED, Color.RED) +assert test_ge(Color.GREEN, Color.RED) +assert test_ge(Color.RED, Color.RED) + +# enum vs int +assert test_enum_vs_int(Color.RED, 1) +assert not test_enum_vs_int(Color.RED, 2) +assert test_int_vs_enum(2, Color.RED) +assert not test_int_vs_enum(1, Color.RED) + +[case testTypePointerComparison] +class Base: + pass + +class A(Base): + pass + +class B(Base): + pass + +def check_type_eq(x: Base, cls: type) -> bool: + return type(x) == cls + +def check_type_ne(x: Base, cls: type) -> bool: + return type(x) != cls +[file driver.py] +from native import A, B, Base, check_type_eq, check_type_ne + +a = A() +b = B() +base = Base() + +assert check_type_eq(a, A) +assert not check_type_eq(a, B) +assert not check_type_eq(a, Base) +assert check_type_eq(b, B) +assert check_type_eq(base, Base) + +assert check_type_ne(a, B) +assert check_type_ne(a, Base) +assert not check_type_ne(a, A) + [case testBufferRoundTrip_librt_internal] from __future__ import annotations diff --git a/mypyc/test-data/run-multimodule.test b/mypyc/test-data/run-multimodule.test index 3ab589ab1530a..f00aeb54d2ea9 100644 --- a/mypyc/test-data/run-multimodule.test +++ b/mypyc/test-data/run-multimodule.test @@ -1391,6 +1391,60 @@ def translate(b: bytes) -> bytes: import native assert native.translate(b'ABCD') == b'BBCD' +[case testIncrementalCrossGroupExportTableOffsets] +# Regression: under separate=True, each consumer module's IR is +# compiled against the positional layout of its deps' +# `exports_` struct. Reordering the dep's classes keeps the +# same set of public names, so mypy's interface hash for the dep is +# unchanged -- the consumer is not invalidated and stays fully +# cached, which causes `_load_cached_group_files` to return empty +# cfile content for the consumer's group. +# +# Before the fix, `get_header_deps` over empty content returned no +# includes, so `Extension.depends` for the consumer ended up empty +# and setuptools never recompiled the consumer's .o when the dep's +# `__native_.h` shifted struct offsets. The stale .o kept the +# old offsets and silently resolved cross-group calls to the wrong +# class. +from other_classes import Gamma, Delta + +def make_gamma() -> Gamma: + return Gamma() + +def make_delta() -> Delta: + return Delta() + +[file other_classes.py] +class Alpha: + a: int = 1 + +class Beta: + b: int = 2 + +class Gamma: + g: int = 3 + +class Delta: + d: int = 4 + +[file other_classes.py.2] +class Delta: + d: int = 4 + +class Alpha: + a: int = 1 + +class Beta: + b: int = 2 + +class Gamma: + g: int = 3 + +[file driver.py] +import native +assert type(native.make_gamma()).__name__ == "Gamma" +assert type(native.make_delta()).__name__ == "Delta" + [case testCrossModuleAttrDefaults] from other import Parent @@ -1641,3 +1695,169 @@ class Base: from native import make_child assert make_child(7) == "child(7)" assert make_child(-1) == "child(-1)" + +[case testIncrementalCrossModuleInheritedAttrDefaults] +# Regression: under separate=True, when only the subclass module is +# recompiled (parent loaded from mypy's incremental cache, so its +# ClassDef.defs.body is empty), the subclass produces no +# __mypyc_defaults_setup of its own and ClassIR.get_method returns +# the parent's. The emitted call must use the cross-group +# exports_. prefix, otherwise the generated C references an +# undeclared symbol and clang/gcc fail to compile. +import other_a + +def test() -> None: + c = other_a.Child() + assert c.x == 1 + assert c.y == "hello" + +[file other_b.py] +class Parent: + x: int = 1 + y: str = "hello" + +[file other_a.py] +from other_b import Parent + +class Child(Parent): + pass + +[file other_a.py.2] +from other_b import Parent + +class Child(Parent): + pass + +def _force_recompile() -> int: + return 1 + +[file driver.py] +from native import test +test() + +[case testIncrementalBuiltinBaseClassConstruction] +# Regression: builtin_base classes (Exception subclasses) were unconditionally +# added to func_to_decl in load_type_map, causing cross-group call sites to +# emit CPyDef instead of CPyType for the constructor. +from other_errors import MyError +from other_util import process + +def run(value: str) -> None: + if not value: + raise MyError("empty") + +def compute(x: str) -> str: + result = process(x) + if not result: + raise MyError("no result") + return result + +[file other_errors.py] +class MyError(Exception): + pass + +[file other_util.py] +def process(x: str) -> str: + return x + +[file other_util.py.2] +def process(x: str, flag: bool = False) -> str: + return x.strip() + +[file driver.py] +from native import run, compute +try: + run("") +except Exception as e: + print(str(e)) +print(compute("hello")) + +[out] +empty +hello +[out2] +empty +hello + +[case testIncrementalCrossModuleInheritedAttrDefaultsWithOverride] +# Regression: same shape as testIncrementalCrossModuleInheritedAttrDefaults, +# but the subclass adds an attribute of its own, so generate_attr_defaults_init +# emits a __mypyc_defaults_setup for it. Before the fix, the recompiled +# subclass walked the parent's ClassDef.defs.body to collect inherited +# defaults; when the parent was loaded from mypy's incremental cache that +# body was empty, so the inherited initialization was dropped and any +# access to an inherited attribute through compiled code raised +# "AttributeError: attribute '' of '' undefined". +import other_a + +def test() -> None: + c = other_a.Child() + # Inherited attributes must still be initialized after the subclass + # has been recompiled against a cache-loaded parent. + assert c.x == 1 + assert c.y == "hello" + # Own override is set by the subclass's own __mypyc_defaults_setup. + assert c.z is True + # Method defined on the parent reads an inherited attribute through + # the compiled path; this is what crashes pre-fix. + assert c.use() == 1 + +[file other_b.py] +class Parent: + x: int = 1 + y: str = "hello" + z: bool = False + + def use(self) -> int: + if self.x: + return 1 + return 0 + +[file other_a.py] +from other_b import Parent + +class Child(Parent): + z: bool = True + +[file other_a.py.2] +from other_b import Parent + +class Child(Parent): + z: bool = True + +def _force_recompile() -> int: + return 1 + +[file driver.py] +from native import test +test() + +[case testCrossModuleInheritedAttrDefaultsSameGroup] +# separate: [(["native.py"], "grp1"), (["other_a.py", "other_b.py"], "grp2")] +# Regression: with the subclass (other_a) and base (other_b) in the same +# compilation group, IR build runs alphabetically within the group, so +# the subclass is IR-built before the base. The decision to emit +# __mypyc_defaults_setup (and a chained call to the ancestor's) must be +# set up in the prepare phase, before any IR build runs; otherwise the +# subclass's lookup of the parent's setup decl misses and inherited +# defaults are lost on a fresh build. +import other_a + +def test() -> None: + c = other_a.Child() + assert c.x == 1 + assert c.z is True + +[file other_b.py] +class Parent: + x: int = 1 + +[file other_a.py] +from other_b import Parent + +class Child(Parent): + z: bool = True + +[file driver.py] +from native import test +test() diff --git a/mypyc/test/test_emitclass.py b/mypyc/test/test_emitclass.py index eb04b22495de6..9c3cd02d1100c 100644 --- a/mypyc/test/test_emitclass.py +++ b/mypyc/test/test_emitclass.py @@ -2,8 +2,10 @@ import unittest +from mypyc.analysis.attrdefined import detect_undefined_bitmap from mypyc.codegen.emitclass import getter_name, setter_name, slot_key from mypyc.ir.class_ir import ClassIR +from mypyc.ir.rtypes import int32_rprimitive from mypyc.namegen import NameGenerator @@ -33,3 +35,22 @@ def test_getter_name(self) -> None: generator = NameGenerator([["mod"]]) assert getter_name(cls, "down", generator) == "testing___SomeClass_get_down" + + def test_bitmap_attrs_stable_across_repeat_analysis(self) -> None: + # Regression: detect_undefined_bitmap used to mutate cl.bitmap_attrs + # in place, so under separate=True (one SCC per group) a shared base + # class would accumulate duplicate entries as each subclass's SCC + # walked into it, growing the emitted struct between builds. + base = ClassIR("Base", "mod") + base.attributes = {"i": int32_rprimitive} + sub = ClassIR("Sub", "mod") + sub.attributes = {"j": int32_rprimitive} + base.mro = base.base_mro = [base] + sub.mro = sub.base_mro = [sub, base] + base.children = [sub] + + detect_undefined_bitmap(sub, seen=set()) + for _ in range(10): + detect_undefined_bitmap(sub, seen=set()) + assert base.bitmap_attrs == ["i"] + assert sub.bitmap_attrs == ["i", "j"] diff --git a/mypyc/test/test_misc.py b/mypyc/test/test_misc.py index 4b0bbe5988afb..f8ab199882a77 100644 --- a/mypyc/test/test_misc.py +++ b/mypyc/test/test_misc.py @@ -1,7 +1,10 @@ from __future__ import annotations +import os +import tempfile import unittest +from mypyc.build import get_header_deps, resolve_cfile_deps from mypyc.ir.ops import BasicBlock from mypyc.ir.pprint import format_blocks, generate_names_for_ir from mypyc.irbuild.ll_builder import LowLevelIRBuilder @@ -20,3 +23,219 @@ def test_debug_op(self) -> None: names = generate_names_for_ir([], [block]) code = format_blocks([block], names, {}) assert code[:-1] == ["L0:", " r0 = 'foo'", " CPyDebug_PrintObject(r0)"] + + +class TestHeaderDeps(unittest.TestCase): + """Tests for the header-dependency tracking used to build + `Extension.depends`, which drives setuptools' `newer_group` decision + about whether to recompile a .o file on incremental builds. + + The critical case is cross-group export-table headers: each module's + `__native_internal_.h` does `#include `, + and the consumer's compiled .o file bakes in byte offsets into that + header's `export_table_` struct. If we miss this header in the + deps list, struct-layout changes in `other_group` won't trigger a + rebuild of the consumer, and its baked-in offsets will silently resolve + to whatever now occupies those slots. + """ + + def test_get_header_deps_quoted_includes(self) -> None: + # Quoted includes โ€” the historical form. Used by the .c file to + # reach its own __native_.h / __native_internal_.h. The + # `False` in each tuple marks the include as non-angled, which + # `resolve_cfile_deps` uses to search the includer's directory. + cfile = '#include "__native_caller.h"\n#include "__native_internal_caller.h"\n' + assert get_header_deps([("caller.c", cfile)]) == [ + (False, "__native_caller.h"), + (False, "__native_internal_caller.h"), + ] + + def test_get_header_deps_angle_bracket_includes(self) -> None: + # Angle-bracket includes are also matched, and reported with + # is_angled=True so that the resolver skips the includer's dir + # for them (matching the C preprocessor). The cross-group export + # header is reached via `#include ` + # in __native_internal_.h. Before this was matched the dep + # was missed entirely and the consumer's .o was never invalidated + # when the other group's struct layout shifted. + cfile = "#include \n#include \n" + assert get_header_deps([("caller.c", cfile)]) == [ + (True, "Python.h"), + (True, "lib/__native_functions.h"), + ] + + def test_get_header_deps_mixed_and_whitespace(self) -> None: + # The preprocessor tolerates whitespace and the leading-hash form. + # `get_header_deps` returns sorted tuples โ€” non-angled (False) sorts + # before angled (True), then alphabetical within each kind. + cfile = '# include "a.h"\n# include \n#include\t"c.h"\n' + assert get_header_deps([("x.c", cfile)]) == [(False, "a.h"), (False, "c.h"), (True, "b.h")] + + def test_resolve_walks_transitively_through_headers(self) -> None: + # Reproduces the bug2 scenario: caller's .c only directly includes + # caller's own headers, but caller's __native_internal_caller.h + # includes the cross-group export header. The resolver must follow + # that chain so setuptools sees the cross-group header as a dep. + with tempfile.TemporaryDirectory() as tmp: + build_dir = tmp + os.makedirs(os.path.join(build_dir, "lib")) + os.makedirs(os.path.join(build_dir, "other_group")) + + # caller.c's directly-included headers โ€” both live alongside + # caller.c under build/ (resolved via target_dir). + internal_h = os.path.join(build_dir, "__native_internal_caller.h") + caller_h = os.path.join(build_dir, "__native_caller.h") + cross_group_h = os.path.join(build_dir, "lib", "__native_functions.h") + unrelated_h = os.path.join(build_dir, "other_group", "__native_other.h") + + with open(caller_h, "w") as f: + # lib-rt headers don't exist on disk under build/, so they + # get dropped during resolution and aren't recursed into. + f.write("#include \n#include \n") + with open(internal_h, "w") as f: + # The smoking gun: this header includes a header in another + # group via angle brackets. Pre-fix, this dep was invisible + # to setuptools. + f.write( + "#include \n" + '#include "__native_caller.h"\n' + "#include \n" + ) + with open(cross_group_h, "w") as f: + f.write("struct export_table_lib___functions { int x; };\n") + with open(unrelated_h, "w") as f: + # Sibling group not reached from caller's chain โ€” must + # NOT appear in the resolved set. + f.write("struct unrelated { int x; };\n") + + # caller.c is in build_dir, so its includer-dir is build_dir. + # Both directly-included headers are quoted (`False`); the + # cross-group header that __native_internal_caller.h reaches + # via `` is found by the recursive + # walk re-reading the on-disk header. + deps = resolve_cfile_deps( + cfile_dir=build_dir, + direct_includes=[ + (False, "__native_caller.h"), + (False, "__native_internal_caller.h"), + ], + target_dir=build_dir, + ) + + assert deps == {caller_h, internal_h, cross_group_h}, ( + f"expected the cross-group header to be reached transitively; " + f"got {sorted(deps)!r}" + ) + + def test_resolve_drops_unresolvable_includes(self) -> None: + # ``, ``, etc. don't live under target_dir, so + # they're dropped from depends. They never change between builds, + # so this is the right behavior โ€” and crucially it stops + # setuptools' `missing="newer"` from treating them as always-newer + # and force-rebuilding every translation unit. + with tempfile.TemporaryDirectory() as tmp: + cfile_dir = tmp + deps = resolve_cfile_deps( + cfile_dir=cfile_dir, + direct_includes=[(True, "Python.h"), (True, "CPy.h"), (False, "init.c")], + target_dir=cfile_dir, + ) + assert deps == set() + + def test_cached_group_deps_populated_from_disk_cfile(self) -> None: + # Reproduces the scenario where generate_c returns empty cfiles for a group + # (the "fully-cached" path), but the .c file from the previous build is on + # disk. Before the fix, per_cfile_deps was never populated for that group, + # so cross-group header changes didn't trigger a recompile of the stale .o. + # + # Layout: + # target_dir/consumer/__native_consumer.c <- cached group's .c + # target_dir/consumer/__native_internal_consumer.h + # โ””โ”€ #include <- cross-group dep + # target_dir/provider/__native_provider.h <- layout changed here + with tempfile.TemporaryDirectory() as tmp: + consumer_dir = os.path.join(tmp, "consumer") + os.makedirs(consumer_dir) + provider_dir = os.path.join(tmp, "provider") + os.makedirs(provider_dir) + + consumer_c = os.path.join(consumer_dir, "__native_consumer.c") + consumer_h = os.path.join(consumer_dir, "__native_consumer.h") + internal_h = os.path.join(consumer_dir, "__native_internal_consumer.h") + cross_group_h = os.path.join(provider_dir, "__native_provider.h") + + with open(consumer_c, "w") as f: + f.write( + '#include "__native_consumer.h"\n#include "__native_internal_consumer.h"\n' + ) + with open(consumer_h, "w") as f: + f.write("#include \n") + with open(internal_h, "w") as f: + f.write( + "#include \n" + '#include "__native_consumer.h"\n' + "#include \n" + ) + with open(cross_group_h, "w") as f: + f.write("struct export_table_provider { int x; };\n") + + # Without the fix: per_cfile_deps is never populated for the cached + # group, so no dep resolution happens and Extension.depends is empty. + pre_fix_per_cfile_deps: list[tuple[str, list[tuple[bool, str]]]] = [] + deps_without_fix: set[str] = set() + for cfile_full, dep_names in pre_fix_per_cfile_deps: + deps_without_fix.update( + resolve_cfile_deps(os.path.dirname(cfile_full), dep_names, tmp) + ) + assert deps_without_fix == set() + + # With the fix: read the on-disk .c, call get_header_deps, then resolve. + try: + with open(consumer_c, encoding="utf-8") as _f: + existing_text = _f.read() + except OSError: + existing_text = "" + per_cfile_deps = [ + (consumer_c, get_header_deps([(os.path.basename(consumer_c), existing_text)])) + ] + deps_with_fix: set[str] = set() + for cfile_full, dep_names in per_cfile_deps: + deps_with_fix.update( + resolve_cfile_deps(os.path.dirname(cfile_full), dep_names, tmp) + ) + + assert cross_group_h in deps_with_fix, ( + f"cross-group header must be in deps so setuptools recompiles the " + f"stale .o when struct offsets shift; got {sorted(deps_with_fix)!r}" + ) + + def test_resolve_search_order_matches_preprocessor(self) -> None: + # When the same header name exists both next to the includer and + # under target_dir, the C preprocessor picks the includer-dir copy + # for `#include "shared.h"` and the target_dir copy for + # `#include `. The resolver must record the same path + # the compiler will actually consume, otherwise mtimes of the + # wrong file drive incremental rebuild decisions. + with tempfile.TemporaryDirectory() as tmp: + includer = os.path.join(tmp, "groupA") + target = os.path.join(tmp, "build") + os.makedirs(includer) + os.makedirs(target) + + local_h = os.path.join(includer, "shared.h") + global_h = os.path.join(target, "shared.h") + with open(local_h, "w") as f: + f.write("/* local */\n") + with open(global_h, "w") as f: + f.write("/* global */\n") + + # Quoted form picks up local copy. + deps = resolve_cfile_deps( + cfile_dir=includer, direct_includes=[(False, "shared.h")], target_dir=target + ) + assert deps == {local_h} + # Angled form skips includer's dir, gets the target_dir copy. + deps = resolve_cfile_deps( + cfile_dir=includer, direct_includes=[(True, "shared.h")], target_dir=target + ) + assert deps == {global_h} diff --git a/mypyc/test/test_run.py b/mypyc/test/test_run.py index e7be5fcf8425a..66b760a75385f 100644 --- a/mypyc/test/test_run.py +++ b/mypyc/test/test_run.py @@ -96,6 +96,17 @@ if sys.version_info >= (3, 12): files.append("run-python312.test") +# `run-char.test` exercises the experimental `char` native type. Its runtime +# support lives in a patched `mypy_extensions` that isn't published on PyPI +# yet, so skip the suite when the installed package is the stock one. +try: + from mypy_extensions import char as _char + + files.append("run-char.test") + del _char +except ImportError: + pass + setup_format = """\ from setuptools import setup from mypyc.build import mypycify diff --git a/mypyc/test/testutil.py b/mypyc/test/testutil.py index 0a558d0d0b8ec..9d59993c03402 100644 --- a/mypyc/test/testutil.py +++ b/mypyc/test/testutil.py @@ -232,8 +232,14 @@ def show_c(cfiles: list[list[tuple[str, str]]]) -> None: def fudge_dir_mtimes(dir: str, delta: int) -> None: + # Skip linker outputs. Pushing them back combines with write_file's + # +1 sec bump on .c files to make .c always newer than .so, forcing + # an unconditional rebuild that would mask Extension.depends bugs. + # See setuptools/_distutils/command/build_ext.py:`build_extension`. for dirpath, _, filenames in os.walk(dir): for name in filenames: + if name.endswith((".so", ".pyd", ".o", ".obj")): + continue path = os.path.join(dirpath, name) new_mtime = os.stat(path).st_mtime + delta os.utime(path, times=(new_mtime, new_mtime)) diff --git a/mypyc/transform/char_str_index_fold.py b/mypyc/transform/char_str_index_fold.py new file mode 100644 index 0000000000000..ae572f340e391 --- /dev/null +++ b/mypyc/transform/char_str_index_fold.py @@ -0,0 +1,103 @@ +"""Fold ``char = Unbox(CPyStr_GetItem(s, i))`` into a direct int32 read. + +Replaces the sequence "allocate 1-char PyObject -> unbox to char -> free +PyObject" (per iteration) with ``CPyStr_GetCharAt``, which reads the +codepoint directly as an int32. Error semantics are preserved: the helper +returns ``-113`` on out-of-range input, matching the int32 error sentinel. +""" + +from __future__ import annotations + +from mypyc.ir.deps import STR_EXTRA_OPS +from mypyc.ir.func_ir import FuncIR +from mypyc.ir.ops import Branch, CallC, DecRef, Goto, IncRef, Op, Unbox, Value +from mypyc.ir.rtypes import is_char_rprimitive +from mypyc.options import CompilerOptions + +STR_INDEXERS = {"CPyStr_GetItem": "CPyStr_GetCharAt"} + + +def do_char_str_index_fold(fn: FuncIR, options: CompilerOptions) -> None: + # Collect char Unbox ops and a snapshot use-map (consumer ops per Value) + # in a single pass. The map is read-only during candidate selection. + uses: dict[Value, list[Op]] = {} + unbox_targets: list[Unbox] = [] + for block in fn.blocks: + for op in block.ops: + if isinstance(op, Unbox) and is_char_rprimitive(op.type): + unbox_targets.append(op) + for src in op.sources(): + uses.setdefault(src, []).append(op) + + # Candidate: Unbox to char whose source is a str-indexing CallC, where + # the CallC's other consumers are only IS_ERROR Branch / IncRef / DecRef. + to_rewrite: list[tuple[CallC, Unbox]] = [] + call_c_results: set[Value] = set() + for unbox in unbox_targets: + src = unbox.src + if not isinstance(src, CallC) or src.function_name not in STR_INDEXERS: + continue + compatible = True + for consumer in uses.get(src, ()): + if consumer is unbox: + continue + if isinstance(consumer, Branch) and consumer.op == Branch.IS_ERROR: + continue + if isinstance(consumer, (IncRef, DecRef)): + continue + compatible = False + break + if not compatible: + continue + to_rewrite.append((src, unbox)) + call_c_results.add(src) + + if not to_rewrite: + return + + # Mutate each str-indexing CallC in place. Keeping the CallC identity + # means existing IS_ERROR Branches keep pointing at it; the check + # switches from NULL-PyObject* to -113-int32 automatically since mypyc + # emits IS_ERROR based on the op's type. + for call_c, unbox in to_rewrite: + call_c.function_name = STR_INDEXERS[call_c.function_name] + call_c.type = unbox.type + deps = list(call_c.dependencies) if call_c.dependencies else [] + if STR_EXTRA_OPS not in deps: + deps.append(STR_EXTRA_OPS) + call_c.dependencies = deps + + # The Unbox's own IS_ERROR Branch is now redundant (CallC already + # checks the sentinel). Replace with Goto to the success path. + unboxes_to_remove = {unbox for _, unbox in to_rewrite} + branches_to_drop: set[Op] = set() + for unbox in unboxes_to_remove: + for consumer in uses.get(unbox, ()): + if isinstance(consumer, Branch) and consumer.op == Branch.IS_ERROR: + branches_to_drop.add(consumer) + + # Redirect remaining references from each Unbox onto its CallC, drop + # the Unbox ops, and drop IncRef/DecRef on the CallC (char is not + # refcounted). + unbox_to_callc = {unbox: call_c for call_c, unbox in to_rewrite} + for block in fn.blocks: + new_ops: list[Op] = [] + for op in block.ops: + if op in unboxes_to_remove: + continue + if isinstance(op, (IncRef, DecRef)) and op.src in call_c_results: + continue + if op in branches_to_drop: + assert isinstance(op, Branch) + new_ops.append(Goto(op.false, op.line)) + continue + srcs = op.sources() + if any(isinstance(s, Unbox) and s in unbox_to_callc for s in srcs): + op.set_sources( + [ + unbox_to_callc[s] if isinstance(s, Unbox) and s in unbox_to_callc else s + for s in srcs + ] + ) + new_ops.append(op) + block.ops = new_ops diff --git a/pyproject.toml b/pyproject.toml index 23824197c748a..24b01afede635 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ requires = [ "typing_extensions>=4.6.0; python_version<'3.15'", "typing_extensions>=4.14.0; python_version>='3.15'", "mypy_extensions>=1.0.0", - "pathspec>=1.0.0", + "pathspec>=1.0.0,<1.1", "tomli>=1.1.0; python_version<'3.11'", "librt>=0.11.0; platform_python_implementation != 'PyPy'", # the following is from build-requirements.txt @@ -20,7 +20,7 @@ requires = [ build-backend = "setuptools.build_meta" [project] -name = "mypy" +name = "sqlglot-mypy" description = "Optional static typing for Python" readme = {text = """ Mypy -- Optional Static Typing for Python @@ -55,7 +55,7 @@ dependencies = [ "typing_extensions>=4.6.0; python_version<'3.15'", "typing_extensions>=4.14.0; python_version>='3.15'", "mypy_extensions>=1.0.0", - "pathspec>=1.0.0", + "pathspec>=1.0.0,<1.1", "tomli>=1.1.0; python_version<'3.11'", "librt>=0.11.0; platform_python_implementation != 'PyPy'", "ast-serialize>=0.3.0,<1.0.0", @@ -71,11 +71,11 @@ faster-cache = ["orjson"] native-parser = [] [project.urls] -Homepage = "https://www.mypy-lang.org/" +Homepage = "https://github.com/vaggelisd/sqlglot-mypy" Documentation = "https://mypy.readthedocs.io/en/stable/index.html" -Repository = "https://github.com/python/mypy" -Changelog = "https://github.com/python/mypy/blob/master/CHANGELOG.md" -Issues = "https://github.com/python/mypy/issues" +Repository = "https://github.com/vaggelisd/sqlglot-mypy" +Changelog = "https://github.com/vaggelisd/sqlglot-mypy/blob/master/CHANGELOG.md" +Issues = "https://github.com/vaggelisd/sqlglot-mypy/issues" [project.scripts] mypy = "mypy.__main__:console_entry" diff --git a/test-data/unit/lib-stub/mypy_extensions.pyi b/test-data/unit/lib-stub/mypy_extensions.pyi index 4295c33f81ade..96bcde7b773c1 100644 --- a/test-data/unit/lib-stub/mypy_extensions.pyi +++ b/test-data/unit/lib-stub/mypy_extensions.pyi @@ -142,6 +142,22 @@ class i32: def __ge__(self, x: i32) -> bool: ... def __gt__(self, x: i32) -> bool: ... +class char: + def __new__(cls, __x: str = ...) -> char: ... + def __eq__(self, x: object) -> bool: ... + def __ne__(self, x: object) -> bool: ... + def __hash__(self) -> int: ... + def __add__(self, x: str) -> str: ... + def __radd__(self, x: str) -> str: ... + def isspace(self) -> bool: ... + def isdigit(self) -> bool: ... + def isalnum(self) -> bool: ... + def isalpha(self) -> bool: ... + def isidentifier(self) -> bool: ... + def upper(self) -> char: ... + def lower(self) -> char: ... + def strip(self, __chars: Optional[str] = ...) -> str: ... + class i64: def __init__(self, x: Union[_Int, str, bytes, SupportsInt], base: int = 10) -> None: ... def __add__(self, x: i64) -> i64: ...