diff --git a/.agents/skills/apple-container/SKILL.md b/.agents/skills/apple-container/SKILL.md new file mode 100644 index 00000000000..fed7ce1e260 --- /dev/null +++ b/.agents/skills/apple-container/SKILL.md @@ -0,0 +1,63 @@ +--- +name: apple-container +description: Run RustPython tests in Apple's `container` CLI Linux environment. Use when user asks to run or compare Linux test results on macOS. +allowed-tools: Bash(container:*) Bash(cargo:*) Read Grep Glob +--- + +# Run Tests in Linux Container (Apple `container` CLI) + +Run RustPython tests inside a Linux container using Apple's `container` CLI. +NEVER use Docker, Podman, or any other container runtime. Only use the `container` command. + +## Arguments + +- Test command to run (examples: `test_io`, `test_codecs -v`, `test_io -v -m "test_errors"`) + +## Prerequisites + +- The `container` CLI is installed via `brew install container`. +- The dev image `rustpython-dev` is already built. + +## Workflow + +1. Check whether the test container is already running: + + ```shell + container list 2>/dev/null | grep rustpython-test + ``` + +2. Start the container if it is not running: + + ```shell + container run -d --name rustpython-test -m 8G -c 4 \ + --mount type=bind,source="$(pwd)",target=/workspace \ + -w /workspace rustpython-dev sleep infinity + ``` + +3. Run the requested test command inside the container: + + ```shell + container exec rustpython-test cargo run --release -- -m test + ``` + +4. Report results: + +- Show pass/fail summary and expected failures / unexpected successes. +- Highlight new failures compared to macOS results, if available. +- Do not stop or remove the container after testing. + +## Notes + +- The workspace is bind-mounted, so local code changes are immediately available in the container. +- Use `container exec rustpython-test sh -c "..."` for any command inside the container. +- Rebuild after code changes with: + + ```shell + container exec rustpython-test sh -c "cargo build --release" + ``` + +- Stop the container when explicitly requested: + + ```shell + container rm -f rustpython-test + ``` diff --git a/.agents/skills/investigate-test-failure/SKILL.md b/.agents/skills/investigate-test-failure/SKILL.md new file mode 100644 index 00000000000..e63dfeeec1b --- /dev/null +++ b/.agents/skills/investigate-test-failure/SKILL.md @@ -0,0 +1,53 @@ +--- +name: investigate-test-failure +description: Investigate a failing RustPython test, compare with CPython behavior, and decide whether to implement a fix or prepare incompatibility issue details. +allowed-tools: Bash(python3:*) Bash(cargo run:*) Bash(gh issue create:*) Read Grep Glob Bash(git add:*) Bash(git commit:*) Bash(cargo fmt:*) Bash(git diff:*) Task +--- + +# Investigate Test Failure + +Investigate why a specific test is failing and determine whether it can be fixed now or should be reported as an incompatibility issue. + +## Arguments + +- Failed test identifier (example: `test_inspect.TestGetSourceBase.test_getsource_reload`) + +## Workflow + +1. Analyze the failure cause: + +- Read the test code. +- Analyze failure message and traceback. +- Check related RustPython implementation code. + +2. Verify behavior in CPython: + +- Run the test with `python3 -m unittest`. +- Document expected behavior/output. + +3. Determine fix feasibility: + +- Simple fix (import issues, small logic bugs): implement fix, run formatting and checks, review changes, and commit. +- Complex fix (major missing feature): gather issue information and report to user. + +Pre-commit review process: + +- Run `git diff` to review local changes. +- Use a focused subagent review to compare implementation against CPython behavior and check for missed edge cases. +- Commit only after review passes. + +4. For complex issues, collect issue information using `.github/ISSUE_TEMPLATE/report-incompatibility.md`: + +- Feature: missing or broken Python feature. +- Minimal reproduction code. +- CPython behavior (`python3`). +- RustPython behavior (`cargo run`). +- Python documentation reference link. + +Report the collected information to the user. Create an issue only when explicitly requested. + +Example issue command: + +```shell +gh issue create --template report-incompatibility.md --title "..." --body "..." +``` diff --git a/.agents/skills/rustpython-capi-expansion/SKILL.md b/.agents/skills/rustpython-capi-expansion/SKILL.md new file mode 100644 index 00000000000..5b56929a825 --- /dev/null +++ b/.agents/skills/rustpython-capi-expansion/SKILL.md @@ -0,0 +1,60 @@ +--- +name: rustpython-capi-expansion +description: Implement missing RustPython C-API functions in crates/capi using the pyo3-ffi header split mapping (`pyo3-ffi/src/*.rs`, mirroring CPython C API headers). Use this whenever the user asks to add or port C-API functions (for example from setobject.h, dictobject.h, unicodeobject.h) or add capi tests. +--- + +# RustPython C-API Expansion + +Use this workflow for adding missing C-API functions to RustPython. + +## Source of truth for target files + +- Use this mapping source: `pyo3-ffi/src/*.rs`, which mirrors the CPython header split used by the C API. +- Map requested header APIs to `crates/capi/src/.rs` using that split. Examples: + - `setobject.h` -> `crates/capi/src/setobject.rs` + - `dictobject.h` -> `crates/capi/src/dictobject.rs` + - `unicodeobject.h` -> `crates/capi/src/unicodeobject.rs` +- Do not invent alternate target modules when the header split implies a direct target. +- If the target file is not present yet, create it and wire it in `crates/capi/src/lib.rs`. + +## Implementation workflow + +1. Identify requested missing APIs from the user request and their originating C API header. +2. Open nearby capi modules in `crates/capi/src/` and follow existing style and patterns. +3. Implement only the requested functions in the mapped target file. +4. Keep behavior aligned with CPython C-API contracts. +5. Prefer using existing `rustpython-vm` functionality as much as possible instead of re-implementing behavior in capi. +6. If a needed `rustpython-vm` helper exists but is private, make it public with a minimal, focused visibility change. +7. Prefer direct contract assumptions over defensive null checks unless required by the established local style. +8. Add basic tests only; do not overfit with very specific edge-case clutter. +9. In tests, use `pyo3` only as a safe wrapper over the API. Avoid raw pointer-heavy direct FFI-style tests. +10. Run tests from `crates/capi`. + +## Testing rules + +- Run test commands with working directory set to `crates/capi`. +- Prefer targeted tests first (module/function filter), then broader capi tests if needed. +- Keep test names concise (no required `test_` prefix). + +## Style rules + +- Follow existing RustPython capi coding style in neighboring files. +- Reuse `rustpython-vm` methods and types first; avoid duplicating VM logic in capi wrappers. +- When exposing previously private VM helpers, keep the API surface minimal and avoid unrelated refactors. +- Only expose and implement ABI-stable C-API surface needed for `abi3` / `abi3t`. +- Add comments only when they explain non-obvious behavior. +- Keep edits minimal and focused on requested API expansion. + +## Completion checklist + +- [ ] All requested functions implemented in mapped target file. +- [ ] New module exported in `crates/capi/src/lib.rs` when applicable. +- [ ] Basic safe-wrapper `pyo3` tests added/updated. +- [ ] Tests executed from `crates/capi` and passing for changed area. +- [ ] Final response includes changed file paths and test command summary. + +## Example prompts this skill should handle + +- "Implement these missing functions from `dictobject.h`." +- "Add `setobject.h` C-API functions in RustPython and include basic tests." +- "Port the listed `unicodeobject.h` APIs in capi, follow existing style, and run tests from `crates/capi`." diff --git a/.agents/skills/upgrade-pylib-next/SKILL.md b/.agents/skills/upgrade-pylib-next/SKILL.md new file mode 100644 index 00000000000..e68e7316509 --- /dev/null +++ b/.agents/skills/upgrade-pylib-next/SKILL.md @@ -0,0 +1,34 @@ +--- +name: upgrade-pylib-next +description: Pick the next CPython stdlib module ready for upgrade based on update_lib todo output and skip modules with open upgrade PRs. +allowed-tools: Skill(upgrade-pylib) Bash(gh pr list:*) Bash(cargo run:*) +--- + +# Upgrade Next Python Library + +Find the next Python library module ready for upgrade and then run the `upgrade-pylib` workflow for it. + +## Workflow + +1. Get current TODO status: + +```shell +cargo run --release -- scripts/update_lib todo 2>/dev/null +``` + +2. Get open upgrade PRs: + +```shell +gh pr list --search "Update in:title" --json number,title --template '{{range .}}#{{.number}} {{.title}}{{"\\n"}}{{end}}' +``` + +3. From TODO output, select modules in this priority order: + +- `[ ] [no deps]` +- `[ ] [0/n]` where dependencies are all upgraded (for example `[0/3]`, `[0/5]`) + +4. Skip modules that already have an open upgrade PR. + +5. Run upgrade for selected module using the `upgrade-pylib` workflow. + +If no modules match, report that eligible modules are blocked by dependencies. diff --git a/.agents/skills/upgrade-pylib/SKILL.md b/.agents/skills/upgrade-pylib/SKILL.md new file mode 100644 index 00000000000..84400e5b7a2 --- /dev/null +++ b/.agents/skills/upgrade-pylib/SKILL.md @@ -0,0 +1,102 @@ +--- +name: upgrade-pylib +description: Upgrade a Python standard library module from CPython into RustPython using scripts/update_lib and then triage and mark remaining failures. +allowed-tools: Bash(git add:*) Bash(git commit:*) Bash(git diff:*) Bash(cargo run:*) Bash(python3 scripts/update_lib quick:*) Bash(python3 scripts/update_lib auto-mark:*) +--- + +# Upgrade Python Library from CPython + +Upgrade a Python standard library module from CPython to RustPython. + +## Arguments + +- Library name or path (examples: `inspect`, `asyncio`, `json`, `cpython/Lib/inspect.py`, `cpython/Lib/json/`) + +## Important: Report Tool Issues First + +If `scripts/update_lib` shows a bug, missing automation, or unexpected behavior, stop the upgrade and report the tooling issue first, including: + +1. What you were trying to do: + +- Library name. +- Full command used. + +2. What went wrong or what is missing. +3. Expected vs actual behavior. + +## Workflow + +1. Run quick upgrade with update_lib: + +- `python3 scripts/update_lib quick ` +- or `python3 scripts/update_lib quick cpython/Lib/.py` +- or `python3 scripts/update_lib quick cpython/Lib//` + +This step copies library files, patches tests while preserving markers where possible, runs tests, auto-marks new failures, removes stale `@unittest.expectedFailure`, and creates a commit. + +If warnings like `WARNING: TestCFoo does not exist in remote file` appear, class structure changed and some markers must be restored manually. + +2. Review diff and restore RustPython-specific changes: + +- Run `git diff Lib/test/test_`. +- Restore only changes with explicit `RUSTPYTHON` comments. +- Do not restore unrelated upstream CPython changes. + +3. Investigate failing dependent tests: + +- Get dependencies: + + ```shell + cargo run --release -- scripts/update_lib deps + ``` + +- Find direct dependent test modules from the `- [ ] :` line. +- Run dependent tests and collect failures: + + ```shell + cargo run --release -- -m test 2>&1 | grep -E "^(FAIL|ERROR):" + ``` + +- For each failing test identifier, investigate with the `investigate-test-failure` workflow. + +4. Mark remaining failures with auto-mark: + +- `python3 scripts/update_lib auto-mark Lib/test/test_.py --mark-failure` +- or `python3 scripts/update_lib auto-mark Lib/test/test_/ --mark-failure` + +Note: `--mark-failure` marks all current failures including regressions; review carefully. + +5. Handle panics manually: + +- For tests that panic/crash/hang, use `@unittest.skip("TODO: RUSTPYTHON; ...")` rather than expectedFailure. + +6. Handle class-specific failures: + +- If only one subclass fails (for example `TestCFoo` but not `TestPyFoo`), move marker to the failing subclass override. + +7. Commit test-fix markers as a separate commit: + +```shell +git add -u && git commit -m "Mark failing tests" +``` + +## Example usage + +```text +upgrade-pylib inspect +upgrade-pylib json +upgrade-pylib asyncio +upgrade-pylib cpython/Lib/inspect.py +upgrade-pylib cpython/Lib/json/ +``` + +## Notes + +- The `cpython/` directory should contain the CPython source used for sync. +- `scripts/update_lib` modes: + - `quick`: patch + auto-mark + - `migrate`: patch only + - `auto-mark`: mark failures only + - `copy-lib`: copy library files only +- Patching transfers `@unittest.expectedFailure` and `@unittest.skip` with `TODO: RUSTPYTHON` markers where possible. +- The tool does not preserve every RustPython-specific change; always review and restore explicit RUSTPYTHON-marked logic. diff --git a/.cargo/config.toml b/.cargo/config.toml index a590c044d81..4e54d1d3b18 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -3,3 +3,19 @@ rustflags = "-C link-arg=/STACK:8000000" [target.'cfg(all(target_os = "windows", not(target_env = "msvc")))'] rustflags = "-C link-args=-Wl,--stack,8000000" + +[target.wasm32-unknown-unknown] +rustflags = ["--cfg=getrandom_backend=\"wasm_js\""] + +# Enforce a 1 MB stack limit for WASI targets. +# (Runaway heap growth is fixed by the smaller DataStack chunk +# and the size‑optimized wasm‑release profile.) +[target.wasm32-wasip1] +rustflags = [ + "-C", "link-arg=-zstack-size=1048576", +] + +[target.wasm32-wasip2] +rustflags = [ + "-C", "link-arg=-zstack-size=1048576", +] diff --git a/.claude/scripts/setup-env.sh b/.claude/scripts/setup-env.sh new file mode 100755 index 00000000000..f5d28a3f14c --- /dev/null +++ b/.claude/scripts/setup-env.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# Claude Code web session startup script +# Sets up the development environment for RustPython + +set -e + +cd /home/user/RustPython + +echo "=== RustPython dev environment setup ===" + +# 1. Ensure python3 points to 3.13+ (needed for scripts/update_lib) +# /usr/local/bin takes precedence over /usr/bin in PATH, +# so we update the symlink there directly. +CURRENT_PY=$(python3 --version 2>&1 | grep -oP '\d+\.\d+') +if [ "$(printf '%s\n' "3.13" "$CURRENT_PY" | sort -V | head -1)" != "3.13" ]; then + echo "Upgrading python3 default to 3.13..." + # Find best available Python >= 3.13 + TARGET="" + for ver in python3.14 python3.13; do + if command -v "$ver" &>/dev/null; then + TARGET=$(command -v "$ver") + break + fi + done + if [ -n "$TARGET" ]; then + # Override /usr/local/bin/python3 if it exists and is outdated + if [ -e /usr/local/bin/python3 ]; then + sudo ln -sf "$TARGET" /usr/local/bin/python3 + fi + # Also set /usr/bin via update-alternatives + sudo update-alternatives --install /usr/bin/python3 python3 "$TARGET" 3 2>/dev/null || true + sudo update-alternatives --set python3 "$TARGET" 2>/dev/null || true + echo "python3 now: $(python3 --version)" + else + echo "WARNING: No Python 3.13+ found. scripts/update_lib may not work." + fi +else + echo "python3 already >= 3.13: $(python3 --version)" +fi + +# 2. Clone CPython source if not present (needed for scripts/update_lib) +if [ ! -d "cpython" ]; then + echo "Cloning CPython v3.14.3 (shallow)..." + git clone --depth 1 --branch v3.14.3 https://github.com/python/cpython.git cpython + echo "CPython source ready." +else + echo "CPython source already present." +fi + +echo "=== Setup complete ===" diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 00000000000..22f0a9a8a01 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,15 @@ +{ + "hooks": { + "SessionStart": [ + { + "matcher": "", + "hooks": [ + { + "type": "command", + "command": "bash .claude/scripts/setup-env.sh" + } + ] + } + ] + } +} diff --git a/.coderabbit.yml b/.coderabbit.yml new file mode 100644 index 00000000000..6a96844e23d --- /dev/null +++ b/.coderabbit.yml @@ -0,0 +1,3 @@ +reviews: + path_filters: + - "!Lib/**" diff --git a/.cspell.dict/cpython.txt b/.cspell.dict/cpython.txt new file mode 100644 index 00000000000..84265bda609 --- /dev/null +++ b/.cspell.dict/cpython.txt @@ -0,0 +1,273 @@ +ADDOP +aftersign +argdefs +argtypes +asdl +asname +atopen +atext +attro +augassign +badcert +badsyntax +baseinfo +basetype +binop +bltin +boolop +BUFMAX +BUILDSTDLIB +bxor +byteswap +cached_tsver +cadata +cafile +calldepth +callinfo +callproc +capath +carg +cellarg +cellvar +cellvars +ceval +cfield +cfws +CFWS +CLASSDEREF +classdict +cmpop +CNOTAB +codedepth +CODEUNIT +CONIN +CONOUT +constevaluator +consti +CONVFUNC +convparam +copyslot +cpucount +datastack +defaultdict +denom +deopt +deopts +dictbytype +DICTFLAG +dictoffset +distpoint +dynload +elts +eooh +eofs +EOOH +evalloop +excepthandler +exceptiontable +fastlocal +fastlocals +fblock +fblocks +fdescr +fdst +ffi_argtypes +fielddesc +fieldlist +fileutils +finalbody +finalizers +firsttraceable +flowgraph +formatfloat +freelist +freevar +freevars +fromlist +fsrc +getdict +getfunc +getiter +getsets +getslice +globalgetvar +HASARRAY +HASBITFIELD +HASPOINTER +HASSTRUCT +HASUNION +heaptype +hexdigit +HIGHRES +ialloc +IFUNC +IMMUTABLETYPE +INCREF +inlinedepth +inplace +inpos +ioffset +isbytecode +ishidden +ismine +ISPOINTER +isoctal +iteminfo +Itertool +iused +keeped +kwnames +kwonlyarg +kwonlyargs +kwonlydefaults +lasti +libffi +linearise +lineful +lineiterator +linetable +LNOTAB +loadfast +localsplus +localspluskinds +Lshift +lslpp +lsprof +MAXBLOCKS +maxdepth +metavars +miscompiles +mult +multibytecodec +nameobj +nameop +nargsf +nblocks +ncells +ncellsused +ncellvars +nconsts +newargs +newfree +NEWLOCALS +newsemlockobject +nextop +nfrees +nkwargs +nkwelts +nlocalsplus +nointerrupt +noffsets +Nondescriptor +noninteger +nops +noraise +nseen +NSIGNALS +numer +nvars +opname +opnames +orelse +outparam +outparm +paramfunc +parg +pathconfig +patma +peepholer +phcount +platstdlib +ploc +posonlyarg +posonlyargs +prec +preds +preinitialized +pybuilddir +pycore +pyinner +pydecimal +pyerrors +Pyfunc +pylifecycle +pymain +pyrepl +pystate +PYTHONTRACEMALLOC +PYTHONUTF8 +pythonw +PYTHREAD_NAME +releasebuffer +repr +resinfo +retarget +Rshift +SA_ONSTACK +saveall +scls +setdict +setfunc +setprofileallthreads +SETREF +setresult +setslice +settraceallthreads +sget +SLOTDEFINED +SMALLBUF +SOABI +SSLEOF +stackdepth +stackref +staticbase +stginfo +storefast +stringlib +stringized +structseq +subkwargs +subparams +subscr +sval +swappedbytes +swaptimize +sysdict +tbstderr +templatelib +testconsole +threadstate +ticketer +tmptype +tok_oldval +tstate +tvars +typeobject +typeparam +Typeparam +typeparams +typeslots +unaryop +uncollectable +Unhandle +unparse +unparser +untargeted +untracking +VARKEYWORDS +varkwarg +venvlauncher +venvlaunchert +venvw +venvwlauncher +venvwlaunchert +wbits +weakreflist +weakrefobject +webpki +winconsoleio +withitem +withs +worklist +xstat +XXPRIME diff --git a/.cspell.dict/python-more.txt b/.cspell.dict/python-more.txt new file mode 100644 index 00000000000..7ea660a5d1f --- /dev/null +++ b/.cspell.dict/python-more.txt @@ -0,0 +1,307 @@ +abiflags +abstractmethods +addcompare +aenter +aexit +aiter +altzone +anext +anextawaitable +annotationlib +appendleft +argcount +arrayiterator +arraytype +asend +asyncgen +athrow +backslashreplace +baserepl +basicsize +bdfl +bigcharset +bignum +bivariant +breakpointhook +cformat +chunksize +classcell +classmethods +closefd +closesocket +codepoint +codepoints +codesize +contextvar +cpython +cratio +ctype +ctypes +dealloc +debugbuild +decompressor +defaultaction +descr +dictcomp +dictitems +dictkeys +dictview +digestmod +dllhandle +docstring +docstrings +dunder +endianness +endpos +eventmask +excepthook +exceptiongroup +exitfuncs +extendleft +fastlocals +fdel +fedcba +fget +fileencoding +fillchar +fillvalue +finallyhandler +firstiter +fobj +firstlineno +fnctl +frombytes +fromhex +fromunicode +frozensets +fset +fspath +fstring +fstrings +ftruncate +genexpr +genexpressions +getargs +getattro +getcodesize +getdefaultencoding +getfilesystemencodeerrors +getfilesystemencoding +getformat +getframe +getframemodulename +getnewargs +getopt +getpip +getrandom +getrecursionlimit +getrefcount +getsizeof +getswitchinterval +getweakref +getweakrefcount +getweakrefs +getweakrefs +getwindowsversion +gmtoff +groupdict +groupindex +hamt +hostnames +idfunc +idiv +idxs +impls +infd +indexgroup +infj +inittab +Inittab +instancecheck +instanceof +instrs +interpchannels +interpqueues +irepeat +isabstractmethod +isbytes +iscased +isfinal +istext +itemiterator +itemsize +iternext +keepends +keyfunc +keyiterator +kwarg +kwargs +kwdefaults +kwonlyargcount +lastgroup +lastindex +linearization +linearize +listcomp +longrange +lvalue +mappingproxy +markupbase +maskpri +maxdigits +MAXGROUPS +MAXREPEAT +maxsplit +maxunicode +memoryview +memoryviewiterator +metaclass +metaclasses +metatype +mformat +mro +mros +multiarch +mymodule +namereplace +nanj +nbytes +ncallbacks +ndigits +ndim +needsfree +nldecoder +nlocals +NOARGS +nonbytes +Nonprintable +onceregistry +origname +ospath +outfd +pendingcr +phello +platlibdir +popleft +posixsubprocess +posonly +posonlyargcount +prepending +profilefunc +pycache +pycapsule +pycodecs +pycs +pydatetime +pyexpat +PYGILSTATE +pyio +pymain +PYTHONAPI +PYTHONBREAKPOINT +PYTHONDEBUG +PYTHONDONTWRITEBYTECODE +PYTHONFAULTHANDLER +PYTHONHASHSEED +PYTHONHOME +PYTHONINSPECT +PYTHONINTMAXSTRDIGITS +PYTHONIOENCODING +PYTHONNODEBUGRANGES +PYTHONNOUSERSITE +PYTHONOPTIMIZE +PYTHONPATH +PYTHONPATH +PYTHONSAFEPATH +PYTHONUNBUFFERED +PYTHONVERBOSE +PYTHONWARNDEFAULTENCODING +PYTHONWARNINGS +pytraverse +PYVENV +qualname +quotetabs +radd +rdiv +rdivmod +readall +readbuffer +reconstructor +refcnt +releaselevel +reraised +reverseitemiterator +reverseiterator +reversekeyiterator +reversevalueiterator +rfloordiv +rlshift +rmod +rpow +rrshift +rsub +rtruediv +rvalue +scproxy +seennl +setattro +setcomp +setprofileallthreads +setrecursionlimit +setswitchinterval +settraceallthreads +showwarnmsg +signum +sitebuiltins +slotnames +STACKLESS +stacklevel +stacksize +startpos +subclassable +subclasscheck +subclasshook +subclassing +suboffset +suboffsets +SUBPATTERN +subpatterns +sumprod +surrogateescape +surrogatepass +sysconf +sysconfigdata +sysdict +sysvars +teedata +thisclass +titlecased +tkapp +tobytes +tolist +toreadonly +TPFLAGS +tracefunc +unimportable +unionable +unraisablehook +unsliceable +urandom +valueiterator +vararg +varargs +varnames +warningregistry +warnmsg +warnoptions +warnopts +weaklist +weakproxy +weakrefs +weakrefset +winver +withdata +xmlcharrefreplace +xoptions +xopts +yieldfrom diff --git a/.cspell.dict/rust-more.txt b/.cspell.dict/rust-more.txt new file mode 100644 index 00000000000..c4457723c6c --- /dev/null +++ b/.cspell.dict/rust-more.txt @@ -0,0 +1,94 @@ +ahash +arrayvec +bidi +biguint +bindgen +bitand +bitflags +bitflagset +bitor +bitvec +bitxor +bstr +byteorder +byteset +caseless +chrono +consts +cranelift +cstring +datelike +deserializer +deserializers +fdiv +flamescope +flate2 +fract +getres +hasher +hexf +hexversion +idents +illumos +ilog +indexmap +insta +keccak +lalrpop +lexopt +libc +libcall +libloading +libz +longlong +Manually +maplit +memmap +memmem +metas +modpow +msvc +muldiv +nanos +nonoverlapping +objclass +peekable +pemfile +powc +powf +powi +prepended +punct +replacen +retag +rmatch +rposition +rsplitn +rustc +rustfmt +rustls +rustyline +seedable +seekfrom +siphash +siphasher +splitn +subsec +thiserror +timelike +timsort +trai +ulonglong +unic +unistd +unraw +unsync +wasip1 +wasip2 +wasmbind +wasmer +wasmtime +widestring +winapi +winresource +winsock diff --git a/.cspell.dict/rustpython.txt b/.cspell.dict/rustpython.txt new file mode 100644 index 00000000000..8cd08358019 --- /dev/null +++ b/.cspell.dict/rustpython.txt @@ -0,0 +1,32 @@ +cfgs +miri +py +pyarg +pyargs +pyast +pyattr +pyclass +pyclassmethod +pyexception +pyfunction +pygetset +pyimpl +pylib +pymath +pymethod +pymodule +pyname +pyobj +pyobject +pypayload +pyref +pyslot +pystaticmethod +pystone +pystr +pystruct +pystructseq +pytype +rustix +struc +zelf diff --git a/.cspell.json b/.cspell.json index e2723d6ce5c..21199c0c5f5 100644 --- a/.cspell.json +++ b/.cspell.json @@ -1,288 +1,103 @@ // See: https://github.com/streetsidesoftware/cspell/tree/master/packages/cspell { "version": "0.2", + "import": [ + "@cspell/dict-en_us/cspell-ext.json", + // "@cspell/dict-cpp/cspell-ext.json", + "@cspell/dict-python/cspell-ext.json", + "@cspell/dict-rust/cspell-ext.json", + "@cspell/dict-win32/cspell-ext.json", + "@cspell/dict-shell/cspell-ext.json", + ], + "allowCompoundWords": true, // language - current active spelling language "language": "en", // dictionaries - list of the names of the dictionaries to use "dictionaries": [ + "cpython", // Sometimes keeping same terms with cpython is easy + "python-more", // Python API terms not listed in python + "rust-more", // Rust API terms not listed in rust + "rustpython", // RustPython derive macros and internal terms "en_US", "softwareTerms", "c", "cpp", "python", - "python-custom", "rust", - "unix", - "posix", - "winapi" + "shell", + "win32" ], // dictionaryDefinitions - this list defines any custom dictionaries to use - "dictionaryDefinitions": [], + "dictionaryDefinitions": [ + { + "name": "cpython", + "path": "./.cspell.dict/cpython.txt" + }, + { + "name": "python-more", + "path": "./.cspell.dict/python-more.txt" + }, + { + "name": "rust-more", + "path": "./.cspell.dict/rust-more.txt" + }, + { + "name": "rustpython", + "path": "./.cspell.dict/rustpython.txt" + } + ], "ignorePaths": [ "**/__pycache__/**", - "Lib/**" + "target/**", + "Lib/**", + "crates/host_env/**" ], // words - list of words to be always considered correct + // (compound words like pyarg, baseclass, microbenchmark are handled by allowCompoundWords) "words": [ - // Rust - "ahash", - "bidi", - "biguint", - "bindgen", - "bitflags", - "bstr", - "byteorder", - "chrono", - "consts", - "cstring", - "flate2", - "fract", - "hasher", - "idents", - "indexmap", - "insta", - "keccak", - "lalrpop", - "libc", - "libz", - "longlong", - "Manually", - "maplit", - "memmap", - "metas", - "modpow", - "nanos", - "objclass", - "peekable", - "powc", - "powf", - "prepended", - "punct", - "replacen", - "rsplitn", - "rustc", - "rustfmt", - "seekfrom", - "splitn", - "subsec", - "timsort", - "trai", - "ulonglong", - "unic", - "unistd", - "winapi", - "winsock", - // Python - "abstractmethods", - "aiter", - "anext", - "arrayiterator", - "arraytype", - "asend", - "athrow", - "basicsize", - "cformat", - "classcell", - "closesocket", - "codepoint", - "codepoints", - "cpython", - "decompressor", - "defaultaction", - "descr", - "dictcomp", - "dictitems", - "dictkeys", - "dictview", - "docstring", - "docstrings", - "dunder", - "eventmask", - "fdel", - "fget", - "fileencoding", - "fillchar", - "finallyhandler", - "frombytes", - "fromhex", - "fromunicode", - "fset", - "fspath", - "fstring", - "fstrings", - "genexpr", - "getattro", - "getformat", - "getnewargs", - "getweakrefcount", - "getweakrefs", - "hostnames", - "idiv", - "impls", - "infj", - "instancecheck", - "instanceof", - "isabstractmethod", - "itemiterator", - "itemsize", - "iternext", - "keyiterator", - "kwarg", - "kwargs", - "linearization", - "linearize", - "listcomp", - "mappingproxy", - "maxsplit", - "memoryview", - "memoryviewiterator", - "metaclass", - "metaclasses", - "metatype", - "mro", - "mros", - "nanj", - "ndigits", - "ndim", - "nonbytes", - "origname", - "posixsubprocess", - "pyexpat", - "PYTHONDEBUG", - "PYTHONHOME", - "PYTHONINSPECT", - "PYTHONOPTIMIZE", - "PYTHONPATH", - "PYTHONPATH", - "PYTHONVERBOSE", - "PYTHONWARNINGS", - "qualname", - "radd", - "rdiv", - "rdivmod", - "reconstructor", - "reversevalueiterator", - "rfloordiv", - "rlshift", - "rmod", - "rpow", - "rrshift", - "rsub", - "rtruediv", - "scproxy", - "setattro", - "setcomp", - "showwarnmsg", - "warnmsg", - "stacklevel", - "subclasscheck", - "subclasshook", - "unionable", - "unraisablehook", - "valueiterator", - "vararg", - "varargs", - "varnames", - "warningregistry", - "warnopts", - "weakproxy", - "xopts", - // RustPython - "baseclass", - "Bytecode", - "cfgs", - "codegen", + "aiterable", + "alnum", + "csock", + "coro", "dedentations", "dedents", "deduped", - "downcasted", - "dumpable", - "GetSet", - "internable", - "makeunicodedata", - "miri", - "notrace", - "pyarg", - "pyarg", - "pyargs", - "PyAttr", + "deoptimized", + "deoptimize", + "emscripten", + "excs", + "fnfe", + "ifexp", + "interps", + "jitted", + "jitting", + "kwonly", + "lossily", + "mcache", + "oparg", + "opargs", "pyc", - "PyClass", - "PyClassMethod", - "PyException", - "PyFunction", - "pygetset", - "pyimpl", - "pymember", - "PyMethod", - "PyModule", - "pyname", - "pyobj", - "PyObject", - "pypayload", - "PyProperty", - "pyref", - "PyResult", - "pyslot", - "PyStaticMethod", - "pystr", - "pystruct", - "pystructseq", - "pytrace", - "reducelib", - "richcompare", - "RustPython", - "struc", - "tracebacks", - "typealiases", - "Unconstructible", - "unhashable", - "uninit", + "reborrow", + "reraises", + "reraising", + "significand", + "summands", + "TESTFN", + "TZPATH", "unraisable", "wasi", - "zelf", - // cpython - "argtypes", - "asdl", - "asname", - "augassign", - "badsyntax", - "basetype", - "boolop", - "bxor", - "cellarg", - "cellvar", - "cellvars", - "cmpop", - "dictoffset", - "elts", - "excepthandler", - "finalbody", - "freevar", - "freevars", - "fromlist", - "heaptype", - "IMMUTABLETYPE", - "kwonlyarg", - "kwonlyargs", - "linearise", - "maxdepth", - "mult", - "nkwargs", - "orelse", - "patma", - "posonlyarg", - "posonlyargs", - "prec", - "stackdepth", - "unaryop", - "unparse", - "unparser", - "VARKEYWORDS", - "varkwarg", - "wbits", - "withitem", - "withs" + "weaked", + // unix + "posixshmem", + "shm", + "CLOEXEC", + "endgrent", + "gethrvtime", + "getrusage", + "sigaction", + "WRLCK", + // win32 + "IFEXEC" ], // flagWords - list of words to be always considered incorrect "flagWords": [ diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 00000000000..cdd54a47d5b --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,6 @@ +FROM rust:bullseye + +# Install clang +RUN apt-get update \ + && apt-get install -y clang \ + && rm -rf /var/lib/apt/lists/* diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index d8641749b59..8838cf6a960 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,6 +1,25 @@ { - "image": "mcr.microsoft.com/devcontainers/universal:2", - "features": { - "ghcr.io/devcontainers/features/rust:1": {} - } + "name": "Rust", + "build": { + "dockerfile": "Dockerfile" + }, + "runArgs": ["--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined"], + "customizations": { + "vscode": { + "settings": { + "lldb.executable": "/usr/bin/lldb", + // VS Code don't watch files under ./target + "files.watcherExclude": { + "**/target/**": true + }, + "extensions": [ + "rust-lang.rust-analyzer", + "tamasfe.even-better-toml", + "vadimcn.vscode-lldb", + "mutantdino.resourcemonitor" + ] + } + } + }, + "remoteUser": "vscode" } diff --git a/.gemini/config.yaml b/.gemini/config.yaml new file mode 100644 index 00000000000..76afe53388c --- /dev/null +++ b/.gemini/config.yaml @@ -0,0 +1,2 @@ +ignore_patterns: + - "Lib/**" diff --git a/.gitattributes b/.gitattributes index aa993ad110f..95aed96eb3b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,6 +1,72 @@ Lib/** linguist-vendored -Cargo.lock linguist-generated -merge +Cargo.lock linguist-generated *.snap linguist-generated -merge -vm/src/stdlib/ast/gen.rs linguist-generated -merge Lib/*.py text working-tree-encoding=UTF-8 eol=LF **/*.rs text working-tree-encoding=UTF-8 eol=LF +crates/vm/src/stdlib/ast/gen.rs linguist-generated -merge +crates/doc/src/data.inc.rs generated +crates/compiler-core/src/bytecode/opcode_metadata.rs generated + +.github/workflows/*.lock.yml linguist-generated=true merge=ours + +# Binary data types +*.aif binary +*.aifc binary +*.aiff binary +*.au binary +*.bmp binary +*.exe binary +*.icns binary +*.gif binary +*.ico binary +*.jpg binary +*.pck binary +*.pdf binary +*.png binary +*.psd binary +*.tar binary +*.wav binary +*.whl binary +*.zip binary + +# Text files that should not be subject to eol conversion +[attr]noeol -text + +Lib/test/cjkencodings/* noeol +Lib/test/tokenizedata/coding20731.py noeol +Lib/test/decimaltestdata/*.decTest noeol +Lib/test/test_email/data/*.txt noeol +Lib/test/xmltestdata/* noeol + +# Shell scripts should have LF even on Windows because of Cygwin +Lib/venv/scripts/common/activate text eol=lf +Lib/venv/scripts/posix/* text eol=lf + +# CRLF files +[attr]dos text eol=crlf + +# Language aware diff headers +# https://tekin.co.uk/2020/10/better-git-diff-output-for-ruby-python-elixir-and-more +# https://gist.github.com/tekin/12500956bd56784728e490d8cef9cb81 +*.css diff=css +*.html diff=html +*.py diff=python +*.md diff=markdown +*.inc.rs diff=rust + +# Generated files +# https://github.com/github/linguist/blob/master/docs/overrides.md +# +# To always hide generated files in local diffs, mark them as binary: +# $ git config diff.generated.binary true +# +[attr]generated linguist-generated=true diff=generated + +Lib/_opcode_metadata.py generated +Lib/keyword.py generated +Lib/idlelib/help.html generated +Lib/test/certdata/*.pem generated +Lib/test/certdata/*.0 generated +Lib/test/levenshtein_examples.json generated +Lib/test/test_stable_abi_ctypes.py generated +Lib/token.py generated diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 00000000000..18ba1b6951f --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,10 @@ + + +- [ ] Closes #xxxx +- [ ] This PR follows our [AI policy](https://github.com/RustPython/.github/blob/main/AI_POLICY.md) + +## Summary + + diff --git a/.github/actions/install-linux-deps/action.yml b/.github/actions/install-linux-deps/action.yml new file mode 100644 index 00000000000..7900060fb29 --- /dev/null +++ b/.github/actions/install-linux-deps/action.yml @@ -0,0 +1,49 @@ +# This action installs a few dependencies necessary to build RustPython on Linux. +# It can be configured depending on which libraries are needed: +# +# ``` +# - uses: ./.github/actions/install-linux-deps +# with: +# gcc-multilib: true +# musl-tools: false +# ``` +# +# See the `inputs` section for all options and their defaults. Note that you must checkout the +# repository before you can use this action. +# +# This action will only install dependencies when the current operating system is Linux. It will do +# nothing on any other OS (macOS, Windows). + +name: Install Linux dependencies +description: Installs the dependencies necessary to build RustPython on Linux. +inputs: + gcc-multilib: + description: Install gcc-multilib (gcc-multilib) + required: false + default: "false" + musl-tools: + description: Install musl-tools (musl-tools) + required: false + default: "false" + gcc-aarch64-linux-gnu: + description: Install gcc-aarch64-linux-gnu (gcc-aarch64-linux-gnu) + required: false + default: "false" + clang: + description: Install clang (clang) + required: false + default: "false" +runs: + using: composite + steps: + - name: Install Linux dependencies + shell: bash + if: ${{ runner.os == 'Linux' }} + run: > + sudo apt-get update + + sudo apt-get install --no-install-recommends + ${{ fromJSON(inputs.gcc-multilib) && 'gcc-multilib' || '' }} + ${{ fromJSON(inputs.musl-tools) && 'musl-tools' || '' }} + ${{ fromJSON(inputs.clang) && 'clang' || '' }} + ${{ fromJSON(inputs.gcc-aarch64-linux-gnu) && 'gcc-aarch64-linux-gnu linux-libc-dev-arm64-cross libc6-dev-arm64-cross' || '' }} diff --git a/.github/actions/install-macos-deps/action.yml b/.github/actions/install-macos-deps/action.yml new file mode 100644 index 00000000000..46abef197a4 --- /dev/null +++ b/.github/actions/install-macos-deps/action.yml @@ -0,0 +1,47 @@ +# This action installs a few dependencies necessary to build RustPython on macOS. By default it installs +# autoconf, automake and libtool, but can be configured depending on which libraries are needed: +# +# ``` +# - uses: ./.github/actions/install-macos-deps +# with: +# openssl: true +# libtool: false +# ``` +# +# See the `inputs` section for all options and their defaults. Note that you must checkout the +# repository before you can use this action. +# +# This action will only install dependencies when the current operating system is macOS. It will do +# nothing on any other OS (Linux, Windows). + +name: Install macOS dependencies +description: Installs the dependencies necessary to build RustPython on macOS. +inputs: + autoconf: + description: Install autoconf (autoconf) + required: false + default: "true" + automake: + description: Install automake (automake) + required: false + default: "true" + libtool: + description: Install libtool (libtool) + required: false + default: "true" + openssl: + description: Install openssl (openssl@3) + required: false + default: "false" +runs: + using: composite + steps: + - name: Install macOS dependencies + shell: bash + if: ${{ runner.os == 'macOS' }} + run: > + brew install + ${{ fromJSON(inputs.autoconf) && 'autoconf' || '' }} + ${{ fromJSON(inputs.automake) && 'automake' || '' }} + ${{ fromJSON(inputs.libtool) && 'libtool' || '' }} + ${{ fromJSON(inputs.openssl) && 'openssl@3' || '' }} diff --git a/.github/aw/actions-lock.json b/.github/aw/actions-lock.json new file mode 100644 index 00000000000..ad986cbd051 --- /dev/null +++ b/.github/aw/actions-lock.json @@ -0,0 +1,14 @@ +{ + "entries": { + "actions/github-script@v8": { + "repo": "actions/github-script", + "version": "v8", + "sha": "ed597411d8f924073f98dfc5c65a23a2325f34cd" + }, + "github/gh-aw/actions/setup@v0.43.22": { + "repo": "github/gh-aw/actions/setup", + "version": "v0.43.22", + "sha": "fe858c3e14589bf396594a0b106e634d9065823e" + } + } +} diff --git a/.github/dependabot.yml b/.github/dependabot.yml index be006de9a1a..e9e2041d482 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,13 +1,177 @@ -# Keep GitHub Actions up to date with GitHub's Dependabot... -# https://docs.github.com/en/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot -# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#package-ecosystem +# cspell:ignore manyhow tinyvec zeroize version: 2 updates: - - package-ecosystem: github-actions - directory: / + - package-ecosystem: cargo + directories: + - "/" + - "crates/*" + schedule: + interval: weekly + cooldown: + default-days: 7 + semver-major-days: 30 + semver-minor-days: 7 + semver-patch-days: 3 groups: - github-actions: + criterion: + patterns: + - "criterion*" + crypto: + patterns: + - "digest" + - "md-5" + - "sha-1" + - "sha1" + - "sha2" + - "sha3" + - "shake" + - "blake2" + - "hmac" + - "pbkdf2" + futures: + patterns: + - "futures*" + get-size2: + patterns: + - "get-size*2" + iana-time-zone: + patterns: + - "iana-time-zone*" + jiff: + patterns: + - "jiff*" + lexical: + patterns: + - "lexical*" + libffi: + patterns: + - "libffi*" + malachite: + patterns: + - "malachite*" + manyhow: + patterns: + - "manyhow*" + num: + patterns: + - "num-bigint" + - "num-complex" + - "num-integer" + - "num-iter" + - "num-rational" + - "num-traits" + num_enum: + patterns: + - "num_enum*" + openssl: + patterns: + - "openssl*" + parking_lot: + patterns: + - "parking_lot*" + phf: + patterns: + - "phf*" + plotters: + patterns: + - "plotters*" + portable-atomic: + patterns: + - "portable-atomic*" + pyo3: + patterns: + - "pyo3*" + quote-use: + patterns: + - "quote-use*" + random: + patterns: + - "getrandom" + - "mt19937" + - "rand*" + - "chacha20" + rayon: + patterns: + - "rayon*" + regex: patterns: - - "*" # Group all Actions updates into a single larger pull request + - "regex*" + result-like: + patterns: + - "result-like*" + security-framework: + patterns: + - "security-framework*" + serde: + patterns: + - "serde" + - "serde_core" + - "serde_derive" + system-configuration: + patterns: + - "system-configuration*" + thiserror: + patterns: + - "thiserror*" + time: + patterns: + - "time*" + tinyvec: + patterns: + - "tinyvec*" + tls_codec: + patterns: + - "tls_codec*" + toml: + patterns: + - "toml*" + unix: + patterns: + - "mac_address" + - "nix" + - "rustyline" + wasm-bindgen: + patterns: + - "wasm-bindgen*" + wasmtime: + patterns: + - "cranelift*" + - "wasmtime*" + webpki-root: + patterns: + - "webpki-root*" + windows: + patterns: + - "windows*" + zerocopy: + patterns: + - "zerocopy*" + zeroize: + patterns: + - "zeroize*" + ignore: + # TODO: Remove when we use ruff from crates.io + # for some reason dependabot only updates the Cargo.lock file when dealing + # with git dependencies. i.e. not updating the version in Cargo.toml + - dependency-name: "ruff_*" + - package-ecosystem: github-actions + directory: / + schedule: + interval: weekly + cooldown: + default-days: 7 + - package-ecosystem: npm + directory: / + schedule: + interval: weekly + cooldown: + default-days: 7 + semver-major-days: 30 + semver-minor-days: 7 + semver-patch-days: 3 + - package-ecosystem: pre-commit + directory: / schedule: interval: weekly + cooldown: + default-days: 7 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ee8d274c182..72599d28703 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -4,384 +4,716 @@ on: pull_request: types: [unlabeled, opened, synchronize, reopened] merge_group: + workflow_dispatch: name: CI +permissions: + contents: read + # Cancel previous workflows if they are the same workflow on same ref (branch/tags) # with the same event (push/pull_request) even they are in progress. # This setting will help reduce the number of duplicated workflows. concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }} + group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.event.pull_request.number || github.sha }} cancel-in-progress: true env: - CARGO_ARGS: --no-default-features --features stdlib,zlib,importlib,encodings,sqlite,ssl - # Skip additional tests on Windows. They are checked on Linux and MacOS. - WINDOWS_SKIPS: >- - test_datetime - test_glob - test_importlib - test_io - test_os - test_pathlib - test_posixpath - test_venv - # configparser: https://github.com/RustPython/RustPython/issues/4995#issuecomment-1582397417 - # socketserver: seems related to configparser crash. - MACOS_SKIPS: >- - test_configparser - test_socketserver - # PLATFORM_INDEPENDENT_TESTS are tests that do not depend on the underlying OS. They are currently - # only run on Linux to speed up the CI. - PLATFORM_INDEPENDENT_TESTS: >- - test_argparse - test_array - test_asyncgen - test_binop - test_bisect - test_bool - test_bytes - test_call - test_class - test_cmath - test_collections - test_complex - test_contains - test_copy - test_dataclasses - test_decimal - test_decorators - test_defaultdict - test_deque - test_dict - test_dictcomps - test_dictviews - test_dis - test_enumerate - test_exception_variations - test_exceptions - test_float - test_format - test_fractions - test_genericalias - test_genericclass - test_grammar - test_range - test_index - test_int - test_int_literal - test_isinstance - test_iter - test_iterlen - test_itertools - test_json - test_keyword - test_keywordonlyarg - test_list - test_long - test_longexp - test_math - test_operator - test_ordered_dict - test_pow - test_raise - test_richcmp - test_scope - test_set - test_slice - test_sort - test_string - test_string_literals - test_strtod - test_structseq - test_subclassinit - test_super - test_syntax - test_tuple - test_types - test_unary - test_unicode - test_unpack - test_weakref - test_yield_from - # Python version targeted by the CI. - PYTHON_VERSION: "3.12.3" + CARGO_ARGS: --no-default-features --features stdlib,importlib,stdio,encodings,sqlite,ssl-rustls-aws-lc,host_env + CARGO_ARGS_NO_SSL: --no-default-features --features stdlib,importlib,stdio,encodings,sqlite,host_env + # Crates excluded from workspace builds: + # - rustpython_wasm: requires wasm target + # - rustpython-compiler-source: deprecated + # - rustpython-venvlauncher: Windows-only + WORKSPACE_EXCLUDES: --exclude rustpython_wasm --exclude rustpython-compiler-source --exclude rustpython-venvlauncher + X86_64_PC_WINDOWS_MSVC_OPENSSL_LIB_DIR: C:\Program Files\OpenSSL\lib\VC\x64\MD + X86_64_PC_WINDOWS_MSVC_OPENSSL_INCLUDE_DIR: C:\Program Files\OpenSSL\include + CARGO_INCREMENTAL: 0 + CARGO_PROFILE_TEST_DEBUG: 0 + CARGO_PROFILE_DEV_DEBUG: 0 + CARGO_PROFILE_RELEASE_DEBUG: 0 + CARGO_TERM_COLOR: always + CI: true jobs: + determine_changes: + name: Determine changes + runs-on: ubuntu-slim + outputs: + # Flag that is raised when any rust code is changed. + rust_code: ${{ steps.check_rust_code.outputs.changed }} + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + fetch-depth: 0 + persist-credentials: false + + - name: Determine merge base + id: merge_base + run: | + sha=$(git merge-base HEAD "origin/${BASE_REF}") + echo "sha=${sha}" >> "$GITHUB_OUTPUT" + env: + BASE_REF: ${{ github.event.pull_request.base.ref || 'main' }} + + - name: Check if there was any code related change + id: check_rust_code + run: | + if git diff --quiet "${MERGE_BASE}...HEAD" -- \ + ':Cargo.toml' \ + ':Cargo.lock' \ + ':rust-toolchain.toml' \ + ':.cargo/config.toml' \ + ':crates/**' \ + ':src/**' \ + ':.github/workflows/ci.yaml' \ + ; then + echo "changed=false" >> "$GITHUB_OUTPUT" + else + echo "changed=true" >> "$GITHUB_OUTPUT" + fi + env: + MERGE_BASE: ${{ steps.merge_base.outputs.sha }} + rust_tests: if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }} env: RUST_BACKTRACE: full name: Run rust tests - runs-on: ${{ matrix.os }} + runs-on: ${{ matrix.os }} + timeout-minutes: 45 strategy: matrix: - os: [macos-latest, ubuntu-latest, windows-latest] + os: [macos-latest, ubuntu-latest, windows-2025] fail-fast: false steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - uses: dtolnay/rust-toolchain@stable + + - name: Restore cache + uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 with: - components: clippy - - uses: Swatinem/rust-cache@v2 + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: ${{ runner.os }}-${{ hashFiles('**/Cargo.toml') }}- + restore-keys: | + ${{ runner.os }}-stable--${{ hashFiles('**/Cargo.toml') }}- + ${{ runner.os }}-stable-- + # Windows runners randomly crashes, https://github.com/actions/cache/issues/1754 + continue-on-error: true - - name: Set up the Windows environment - shell: bash + - name: Install macOS dependencies + uses: ./.github/actions/install-macos-deps + + - name: run rust tests + run: cargo test --workspace --exclude rustpython-capi ${{ env.WORKSPACE_EXCLUDES }} --features threading ${{ env.CARGO_ARGS }} + env: + INSTA_WORKSPACE_ROOT: ${{ github.workspace }} + + - name: run c-api tests + working-directory: crates/capi + run: cargo test + if: runner.os != 'Windows' # Requires pyo3 0.29+ on Windows + + - name: check compilation without host_env (sandbox mode) run: | - cargo install --target-dir=target -v cargo-vcpkg - cargo vcpkg -v build - if: runner.os == 'Windows' - - name: Set up the Mac environment - run: brew install autoconf automake libtool - if: runner.os == 'macOS' + cargo check -p rustpython-vm --no-default-features --features compiler + cargo check -p rustpython-stdlib --no-default-features --features compiler + cargo build --no-default-features --features stdlib,importlib,stdio,encodings,freeze-stdlib + if: runner.os == 'Linux' - - name: run clippy - run: cargo clippy ${{ env.CARGO_ARGS }} --workspace --exclude rustpython_wasm -- -Dwarnings + - name: sandbox smoke test + run: | + target/debug/rustpython extra_tests/snippets/sandbox_smoke.py + target/debug/rustpython extra_tests/snippets/stdlib_re.py + if: runner.os == 'Linux' - - name: run rust tests - run: cargo test --workspace --exclude rustpython_wasm --verbose --features threading ${{ env.CARGO_ARGS }} - if: runner.os != 'macOS' - - name: run rust tests - run: cargo test --workspace --exclude rustpython_wasm --exclude rustpython-jit --verbose --features threading ${{ env.CARGO_ARGS }} - if: runner.os == 'macOS' + - name: Test openssl build + run: cargo build --no-default-features --features ssl-openssl + if: runner.os == 'Linux' + + - name: Test vendored OpenSSL build + run: cargo build --no-default-features --features ssl-openssl-vendor + if: runner.os == 'Linux' + + # - name: Install tk-dev for tkinter build + # run: sudo apt-get update && sudo apt-get install -y tk-dev + # if: runner.os == 'Linux' - - name: check compilation without threading - run: cargo check ${{ env.CARGO_ARGS }} + # - name: Test tkinter build + # run: cargo build --features tkinter + # if: runner.os == 'Linux' - name: Test example projects - run: + run: | cargo run --manifest-path example_projects/barebone/Cargo.toml cargo run --manifest-path example_projects/frozen_stdlib/Cargo.toml if: runner.os == 'Linux' - - name: prepare AppleSilicon build - uses: dtolnay/rust-toolchain@stable - with: - target: aarch64-apple-darwin - if: runner.os == 'macOS' - - name: Check compilation for Apple Silicon - run: cargo check --target aarch64-apple-darwin - if: runner.os == 'macOS' - - name: prepare iOS build - uses: dtolnay/rust-toolchain@stable - with: - target: aarch64-apple-ios - if: runner.os == 'macOS' - - name: Check compilation for iOS - run: cargo check --target aarch64-apple-ios - if: runner.os == 'macOS' + - name: run update_lib tests + run: cargo run -- -m unittest discover -s scripts/update_lib/tests -v + env: + PYTHONPATH: scripts + if: runner.os == 'Linux' - exotic_targets: - if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }} - name: Ensure compilation on various targets - runs-on: ubuntu-latest + cargo_check: + name: cargo check + runs-on: ${{ matrix.os }} + needs: + - determine_changes + if: | + ( + !contains(github.event.pull_request.labels.*.name, 'skip:ci') && + needs.determine_changes.outputs.rust_code == 'true' + ) || github.ref == 'refs/heads/main' + strategy: + matrix: + include: + - os: ubuntu-latest + target: aarch64-linux-android + - os: ubuntu-latest + target: i686-unknown-linux-gnu + dependencies: + gcc-multilib: true + - os: ubuntu-latest + target: i686-unknown-linux-musl + dependencies: + musl-tools: true + skip_ssl: true + - os: ubuntu-latest + target: wasm32-wasip2 + skip_ssl: true + - os: ubuntu-latest + target: x86_64-unknown-freebsd + skip_ssl: true + - os: ubuntu-latest + target: aarch64-unknown-linux-gnu + dependencies: + gcc-aarch64-linux-gnu: true + - os: macos-latest + target: aarch64-apple-ios + - os: macos-latest + target: x86_64-apple-darwin + fail-fast: false steps: - - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@stable + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 with: - target: i686-unknown-linux-gnu + persist-credentials: false - - name: Install gcc-multilib and musl-tools - run: sudo apt-get update && sudo apt-get install gcc-multilib musl-tools - - name: Check compilation for x86 32bit - run: cargo check --target i686-unknown-linux-gnu - - - uses: dtolnay/rust-toolchain@stable + - name: Install dependencies + uses: ./.github/actions/install-linux-deps + # zizmor has an issue with dynamic `with` + # with: ${{ matrix.dependencies || fromJSON('{}') }} with: - target: aarch64-linux-android + gcc-multilib: ${{ matrix.dependencies.gcc-multilib || false }} + musl-tools: ${{ matrix.dependencies.musl-tools || false }} + gcc-aarch64-linux-gnu: ${{ matrix.dependencies.gcc-aarch64-linux-gnu || false }} - - name: Check compilation for android - run: cargo check --target aarch64-linux-android + - name: Restore cache + uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + # key won't match, will rely on restore-keys + key: ${{ runner.os }}-${{ matrix.target }} + restore-keys: | + ${{ runner.os }}-stable-${{ matrix.target }}-${{ hashFiles('**/Cargo.toml') }}- + ${{ runner.os }}-stable-${{ matrix.target }}- + ${{ runner.os }}-stable--${{ hashFiles('**/Cargo.toml') }}- + ${{ runner.os }}-stable-- - uses: dtolnay/rust-toolchain@stable with: - target: aarch64-unknown-linux-gnu - - - name: Install gcc-aarch64-linux-gnu - run: sudo apt install gcc-aarch64-linux-gnu - - name: Check compilation for aarch64 linux gnu - run: cargo check --target aarch64-unknown-linux-gnu + target: ${{ matrix.target }} - - uses: dtolnay/rust-toolchain@stable + - name: Setup Android NDK + if: ${{ matrix.target == 'aarch64-linux-android' }} + id: setup-ndk + uses: nttld/setup-ndk@ed92fe6cadad69be94a966a7ee3271275e62f779 # v1.6.0 with: - target: i686-unknown-linux-musl + ndk-version: r27 + add-to-path: true - - name: Check compilation for musl - run: cargo check --target i686-unknown-linux-musl + - name: Append env conf to cargo + if: ${{ matrix.target == 'aarch64-linux-android' }} + env: + NDK_PATH: ${{ steps.setup-ndk.outputs.ndk-path }} + run: | + { + echo "[env]" + echo "CC_aarch64_linux_android = \"${NDK_PATH}/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android24-clang\"" - - uses: dtolnay/rust-toolchain@stable - with: - target: x86_64-unknown-freebsd + echo "AR_aarch64_linux_android = \"${NDK_PATH}/toolchains/llvm/prebuilt/linux-x86_64/bin/llvm-ar\"" - - name: Check compilation for freebsd - run: cargo check --target x86_64-unknown-freebsd + echo "CARGO_TARGET_AARCH64_LINUX_ANDROID_LINKER = \"${NDK_PATH}/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android24-clang\"" + } >> .cargo/config.toml - - uses: dtolnay/rust-toolchain@stable - with: - target: x86_64-unknown-freebsd + # - name: Prepare repository for redox compilation + # run: bash scripts/redox/uncomment-cargo.sh + # - name: Check compilation for Redox + # uses: coolreader18/redoxer-action@v1 + # with: + # command: check + # args: --ignore-rust-version - - name: Check compilation for freeBSD - run: cargo check --target x86_64-unknown-freebsd + - name: Check compilation with threading + run: cargo check --target "${{ matrix.target }}" ${{ env.CARGO_ARGS_NO_SSL }} --features threading - - name: Prepare repository for redox compilation - run: bash scripts/redox/uncomment-cargo.sh - - name: Check compilation for Redox - uses: coolreader18/redoxer-action@v1 - with: - command: check - args: --ignore-rust-version + - name: Check compilation with ssl + if: ${{ !matrix.skip_ssl }} + run: cargo check --target "${{ matrix.target }}" ${{ env.CARGO_ARGS }} snippets_cpython: if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }} env: RUST_BACKTRACE: full + # Tests that can be flaky when running with multiple processes `-j 2`. We will use `-j 1` for these. + FLAKY_MP_TESTS: >- + test_class + test_concurrent_futures + test_eintr + test_multiprocessing_fork + test_multiprocessing_forkserver + test_multiprocessing_spawn name: Run snippets and cpython tests - runs-on: ${{ matrix.os }} + runs-on: ${{ matrix.os }} strategy: matrix: - os: [macos-latest, ubuntu-latest, windows-latest] + include: + - os: macos-latest + extra_test_args: + - '-u all' + env_polluting_tests: + - test_set + skips: [] + timeout: 50 + - os: ubuntu-latest + extra_test_args: + - '-u all' + env_polluting_tests: + - test_set + skips: [] + timeout: 60 + - os: windows-2025 + extra_test_args: + - '-u all' + env_polluting_tests: + - test_set + skips: [] + timeout: 50 fail-fast: false steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - uses: dtolnay/rust-toolchain@stable - - uses: Swatinem/rust-cache@v2 - - uses: actions/setup-python@v5 + + - name: Restore cache + uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 with: - python-version: ${{ env.PYTHON_VERSION }} - - name: Set up the Windows environment - shell: bash - run: | - cargo install cargo-vcpkg - cargo vcpkg build - if: runner.os == 'Windows' - - name: Set up the Mac environment - run: brew install autoconf automake libtool openssl@3 - if: runner.os == 'macOS' - - name: build rustpython - run: cargo build --release --verbose --features=threading ${{ env.CARGO_ARGS }} - if: runner.os == 'macOS' - - name: build rustpython - run: cargo build --release --verbose --features=threading ${{ env.CARGO_ARGS }},jit - if: runner.os != 'macOS' - - uses: actions/setup-python@v5 + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: ${{ runner.os }}-${{ hashFiles('**/Cargo.toml') }}- + restore-keys: | + ${{ runner.os }}-stable--${{ hashFiles('**/Cargo.toml') }}- + ${{ runner.os }}-stable-- + # Windows runners randomly crashes, https://github.com/actions/cache/issues/1754 + continue-on-error: true + + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + + - name: Install macOS dependencies + uses: ./.github/actions/install-macos-deps with: - python-version: ${{ env.PYTHON_VERSION }} + openssl: true + + - name: build rustpython + run: cargo build --release --verbose --features=threading,jit ${{ env.CARGO_ARGS }} + - name: run snippets run: python -m pip install -r requirements.txt && pytest -v working-directory: ./extra_tests - - if: runner.os == 'Linux' - name: run cpython platform-independent tests - run: - target/release/rustpython -m test -j 1 -u all --slowest --fail-env-changed -v ${{ env.PLATFORM_INDEPENDENT_TESTS }} - - if: runner.os == 'Linux' - name: run cpython platform-dependent tests (Linux) - run: target/release/rustpython -m test -j 1 -u all --slowest --fail-env-changed -v -x ${{ env.PLATFORM_INDEPENDENT_TESTS }} - - if: runner.os == 'macOS' - name: run cpython platform-dependent tests (MacOS) - run: target/release/rustpython -m test -j 1 --slowest --fail-env-changed -v -x ${{ env.PLATFORM_INDEPENDENT_TESTS }} ${{ env.MACOS_SKIPS }} - - if: runner.os == 'Windows' - name: run cpython platform-dependent tests (windows partial - fixme) - run: - target/release/rustpython -m test -j 1 --slowest --fail-env-changed -v -x ${{ env.PLATFORM_INDEPENDENT_TESTS }} ${{ env.WINDOWS_SKIPS }} + + - name: Detect available cores + id: cores + shell: bash + run: | + cores=$(python -c 'print(__import__("os").process_cpu_count())') + echo "cores=${cores}" >> "$GITHUB_OUTPUT" + + - name: Run CPython tests + run: | + target/release/rustpython -m test -j ${{ steps.cores.outputs.cores }} ${{ join(matrix.extra_test_args, ' ') }} --slowest --fail-env-changed --timeout 600 -v -x ${{ env.FLAKY_MP_TESTS }} ${{ join(matrix.skips, ' ') }} + timeout-minutes: ${{ matrix.timeout }} + env: + RUSTPYTHON_SKIP_ENV_POLLUTERS: true + + - name: Run flaky MP CPython tests + run: | + for attempt in $(seq 1 5); do + echo "::group::Attempt ${attempt}" + + set +e + target/release/rustpython -m test -j 1 ${{ join(matrix.extra_test_args, ' ') }} --slowest --fail-env-changed --timeout 600 -v ${{ env.FLAKY_MP_TESTS }} + status=$? + set -e + + echo "::endgroup::" + + if [ $status -eq 0 ]; then + exit 0 + fi + done + + exit 1 + timeout-minutes: ${{ matrix.timeout }} + shell: bash + env: + RUSTPYTHON_SKIP_ENV_POLLUTERS: true + + - name: run cpython tests to check if env polluters have stopped polluting + shell: bash + run: | + IFS=' ' read -r -a target_array <<< "$TARGETS" + + for thing in "${target_array[@]}"; do + for i in $(seq 1 10); do + set +e + target/release/rustpython -m test -j 1 --slowest --fail-env-changed --timeout 600 -v "${thing}" + exit_code=$? + set -e + if [ "${exit_code}" -eq 3 ]; then + echo "Test ${thing} polluted the environment on attempt ${i}." + break + fi + done + if [ "${exit_code}" -ne 3 ]; then + echo "Test ${thing} is no longer polluting the environment after ${i} attempts!" + echo "Please remove ${thing} from matrix.env_polluting_tests in '.github/workflows/ci.yaml'." + echo "Please also remove the skip decorators that include the word 'POLLUTERS' in ${thing}." + if [ "${exit_code}" -ne 0 ]; then + echo "Test ${thing} failed with exit code ${exit_code}." + echo "Please investigate which test item in ${thing} is failing and either mark it as an expected failure or a skip." + fi + exit 1 + fi + done + env: + TARGETS: ${{ join(matrix.env_polluting_tests, ' ') }} + timeout-minutes: 15 + - if: runner.os != 'Windows' name: check that --install-pip succeeds run: | mkdir site-packages target/release/rustpython --install-pip ensurepip --user target/release/rustpython -m pip install six - - if: runner.os != 'Windows' - name: Check that ensurepip succeeds. + + - name: Check that ensurepip succeeds. run: | target/release/rustpython -m ensurepip target/release/rustpython -c "import pip" + - if: runner.os != 'Windows' name: Check if pip inside venv is functional run: | target/release/rustpython -m venv testvenv testvenv/bin/rustpython -m pip install wheel + - name: Check whats_left is not broken - run: python -I whats_left.py + shell: bash + run: python -I scripts/whats_left.py ${{ env.CARGO_ARGS }} --features jit + + clippy: + name: clippy + runs-on: ${{ matrix.os }} + needs: + - determine_changes + permissions: + contents: read + if: | + needs.determine_changes.outputs.rust_code == 'true' || + github.ref == 'refs/heads/main' + strategy: + fail-fast: false + matrix: + os: + - macos-latest + - ubuntu-latest + - windows-latest + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + + - uses: dtolnay/rust-toolchain@stable + with: + components: clippy + + - name: Restore cache + uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: ${{ runner.os }}-${{ hashFiles('**/Cargo.toml') }}- + restore-keys: | + ${{ runner.os }}-stable--${{ hashFiles('**/Cargo.toml') }}- + ${{ runner.os }}-stable-- + # Windows runners randomly crashes, https://github.com/actions/cache/issues/1754 + continue-on-error: true + + - name: Clippy + run: cargo clippy --keep-going ${{ env.CARGO_ARGS }} --workspace --all-targets ${{ env.WORKSPACE_EXCLUDES }} -- -Dwarnings + + cargo_shear: + name: cargo shear + runs-on: ubuntu-latest + needs: + - determine_changes + permissions: + contents: read + if: | + needs.determine_changes.outputs.rust_code == 'true' || + github.ref == 'refs/heads/main' + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + + - uses: dtolnay/rust-toolchain@stable + + - uses: cargo-bins/cargo-binstall@30b5ca8b54e1dcffd9548bc87ede1531310fdc67 # v1.20.0 + + - name: cargo shear + run: | + cargo binstall --no-confirm cargo-shear + cargo shear lint: - name: Check Rust code with rustfmt and clippy + name: Lint runs-on: ubuntu-latest + permissions: + contents: read + checks: write + issues: write + pull-requests: write + security-events: write # for zizmor steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + - uses: dtolnay/rust-toolchain@stable with: - components: rustfmt, clippy - - name: run rustfmt - run: cargo fmt --check - - name: run clippy on wasm - run: cargo clippy --manifest-path=wasm/lib/Cargo.toml -- -Dwarnings - - uses: actions/setup-python@v5 - with: - python-version: ${{ env.PYTHON_VERSION }} - - name: install ruff - run: python -m pip install ruff==0.0.291 # astral-sh/ruff#7778 - - name: run python lint - run: ruff extra_tests wasm examples --exclude='./.*',./Lib,./vm/Lib,./benches/ --select=E9,F63,F7,F82 --show-source - - name: install prettier - run: yarn global add prettier && echo "$(yarn global bin)" >>$GITHUB_PATH - - name: check wasm code with prettier - # prettier doesn't handle ignore files very well: https://github.com/prettier/prettier/issues/8506 - run: cd wasm && git ls-files -z | xargs -0 prettier --check -u + components: rustfmt + + - name: actionlint + uses: reviewdog/action-actionlint@6fb7acc99f4a1008869fa8a0f09cfca740837d9d # v1.72.0 + + - name: zizmor + uses: zizmorcore/zizmor-action@5f14fd08f7cf1cb1609c1e344975f152c7ee938d # v0.5.6 + + - name: restore prek cache + uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + key: prek-${{ hashFiles('.pre-commit-config.yaml') }} + path: ~/.cache/prek + + - name: install prek + id: prek + uses: j178/prek-action@bdca6f102f98e2b4c7029491a53dfd366469e33d # v2.0.4 + with: + cache: false + show-verbose-logs: false + install-only: true + + - name: prek run + run: prek run --show-diff-on-failure --color=always --all-files + + - name: Get target CPython version + id: cpython-version + run: | + version=$(cat .python-version) + echo "version=${version}" >> "$GITHUB_OUTPUT" + + - name: Clone CPython + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + repository: python/cpython + path: cpython + ref: "v${{ steps.cpython-version.outputs.version }}" + persist-credentials: false + + - name: prek run (manual stage) + run: prek run --show-diff-on-failure --color=always --all-files --hook-stage manual + env: + CPYTHON_ROOT: ${{ github.workspace }}/cpython + + - name: save prek cache + if: ${{ github.ref == 'refs/heads/main' }} # only save on main + uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + key: prek-${{ hashFiles('.pre-commit-config.yaml') }} + path: ~/.cache/prek + + - name: restore git permissions + if: ${{ !cancelled() }} + run: sudo chown -R "$(id -u):$(id -g)" .git + + - name: reviewdog + if: ${{ !cancelled() }} + uses: reviewdog/action-suggester@aa38384ceb608d00f84b4690cacc83a5aba307ff # v1.24.0 + with: + level: warning + fail_level: error miri: if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }} name: Run tests under miri runs-on: ubuntu-latest + timeout-minutes: 30 + env: + NIGHTLY_CHANNEL: nightly steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - uses: dtolnay/rust-toolchain@master with: - toolchain: nightly - components: miri + toolchain: ${{ env.NIGHTLY_CHANNEL }} + components: miri + + - name: Restore cache + uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: ${{ runner.os }}-${{ hashFiles('**/Cargo.toml') }} + restore-keys: | + ${{ runner.os }}- - - uses: Swatinem/rust-cache@v2 - name: Run tests under miri - # miri-ignore-leaks because the type-object circular reference means that there will always be - # a memory leak, at least until we have proper cyclic gc - run: MIRIFLAGS='-Zmiri-ignore-leaks' cargo +nightly miri test -p rustpython-vm -- miri_test + run: cargo +${{ env.NIGHTLY_CHANNEL }} miri test -p rustpython-vm -- miri_test + env: + # miri-ignore-leaks because the type-object circular reference means that there will always be + # a memory leak, at least until we have proper cyclic gc + MIRIFLAGS: "-Zmiri-ignore-leaks" wasm: if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }} name: Check the WASM package and demo runs-on: ubuntu-latest + timeout-minutes: 30 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - uses: dtolnay/rust-toolchain@stable + with: + components: clippy + + - name: Restore cache + uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: ${{ runner.os }}-${{ hashFiles('**/Cargo.toml') }}- + restore-keys: | + ${{ runner.os }}-stable--${{ hashFiles('**/Cargo.toml') }}- + ${{ runner.os }}-stable-- + ${{ runner.os }}- + + - name: cargo clippy + run: cargo clippy --keep-going --manifest-path=crates/wasm/Cargo.toml -- -Dwarnings - - uses: Swatinem/rust-cache@v2 - name: install wasm-pack run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh - name: install geckodriver run: | - wget https://github.com/mozilla/geckodriver/releases/download/v0.34.0/geckodriver-v0.34.0-linux64.tar.gz + wget https://github.com/mozilla/geckodriver/releases/download/v0.36.0/geckodriver-v0.36.0-linux64.tar.gz mkdir geckodriver - tar -xzf geckodriver-v0.34.0-linux64.tar.gz -C geckodriver - - uses: actions/setup-python@v5 - with: - python-version: ${{ env.PYTHON_VERSION }} + tar -xzf geckodriver-v0.36.0-linux64.tar.gz -C geckodriver + + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + - run: python -m pip install -r requirements.txt working-directory: ./wasm/tests - - uses: actions/setup-node@v4 + + - uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + package-manager-cache: false + + - name: Get npm cache directory + id: npm-cache-dir + shell: bash + run: echo "dir=$(npm config get cache)" >> "$GITHUB_OUTPUT" + + - name: Restore npm cache + uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + # don't restore on main or release + if: github.ref != 'refs/heads/main' && github.ref != 'refs/heads/release' + with: + path: ${{ steps.npm-cache-dir.outputs.dir }} + key: node-${{ runner.os }}-wasm-demo- + restore-keys: | + node-${{ runner.os }}-wasm-demo- + - name: run test run: | - export PATH=$PATH:`pwd`/../../geckodriver + driver_path="$(pwd)/../../geckodriver" + export PATH="$PATH:${driver_path}" npm install npm run test env: NODE_OPTIONS: "--openssl-legacy-provider" working-directory: ./wasm/demo - - uses: mwilliamson/setup-wabt-action@v3 - with: { wabt-version: "1.0.30" } + + - uses: mwilliamson/setup-wabt-action@427f2fdd70bc4dbc2e53c2eb4f19f66162d71bd2 # v4.0.0 + with: + wabt-version: "1.0.36" + - name: check wasm32-unknown without js run: | - cargo build --release --manifest-path wasm/wasm-unknown-test/Cargo.toml --target wasm32-unknown-unknown --verbose - if wasm-objdump -xj Import target/wasm32-unknown-unknown/release/wasm_unknown_test.wasm; then - echo "ERROR: wasm32-unknown module expects imports from the host environment" >2 + cd example_projects/wasm32_without_js/rustpython-without-js + cargo build + cd .. + if wasm-objdump -xj Import rustpython-without-js/target/wasm32-unknown-unknown/debug/rustpython_without_js.wasm; then + echo "ERROR: wasm32-unknown module expects imports from the host environment" >&2 fi + cargo run --release --manifest-path wasm-runtime/Cargo.toml rustpython-without-js/target/wasm32-unknown-unknown/debug/rustpython_without_js.wasm + - name: build notebook demo if: github.ref == 'refs/heads/release' run: | @@ -391,33 +723,102 @@ jobs: env: NODE_OPTIONS: "--openssl-legacy-provider" working-directory: ./wasm/notebook + - name: Deploy demo to Github Pages if: success() && github.ref == 'refs/heads/release' - uses: peaceiris/actions-gh-pages@v4 + uses: peaceiris/actions-gh-pages@84c30a85c19949d7eee79c4ff27748b70285e453 # v4.1.0 env: ACTIONS_DEPLOY_KEY: ${{ secrets.ACTIONS_DEMO_DEPLOY_KEY }} PUBLISH_DIR: ./wasm/demo/dist EXTERNAL_REPOSITORY: RustPython/demo PUBLISH_BRANCH: master + - name: Save npm cache + # Save only on main or release + if: github.ref == 'refs/heads/main' || github.ref == 'refs/heads/release' + uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + path: ${{ steps.npm-cache-dir.outputs.dir }} + key: node-${{ runner.os }}-wasm-demo-${{ hashFiles('wasm/demo/package-lock.json') }} + wasm-wasi: if: ${{ !contains(github.event.pull_request.labels.*.name, 'skip:ci') }} name: Run snippets and cpython tests on wasm-wasi runs-on: ubuntu-latest + timeout-minutes: 30 steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - uses: dtolnay/rust-toolchain@stable with: target: wasm32-wasip1 - - uses: Swatinem/rust-cache@v2 + - name: Restore cache + uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: ${{ runner.os }}-${{ hashFiles('**/Cargo.toml') }}- + restore-keys: | + ${{ runner.os }}-stable-wasm32-wasip1-${{ hashFiles('**/Cargo.toml') }}- + ${{ runner.os }}-stable-wasm32-wasip1- + ${{ runner.os }}-stable--${{ hashFiles('**/Cargo.toml') }}- + ${{ runner.os }}-stable-- + - name: Setup Wasmer - uses: wasmerio/setup-wasmer@v3 + uses: wasmerio/setup-wasmer@24b15c95293d23f89c68bd40dac76338f773e924 # v3.1 + - name: Install clang - run: sudo apt-get update && sudo apt-get install clang -y + uses: ./.github/actions/install-linux-deps + with: + clang: true + - name: build rustpython - run: cargo build --release --target wasm32-wasip1 --features freeze-stdlib,stdlib --verbose + run: cargo build --profile wasm-release --target wasm32-wasip1 --no-default-features --features freeze-stdlib,stdlib,stdio,importlib,host_env --verbose - name: run snippets - run: wasmer run --dir `pwd` target/wasm32-wasip1/release/rustpython.wasm -- `pwd`/extra_tests/snippets/stdlib_random.py + run: wasmer run --dir "$(pwd)" target/wasm32-wasip1/wasm-release/rustpython.wasm -- "$(pwd)/extra_tests/snippets/stdlib_random.py" - name: run cpython unittest - run: wasmer run --dir `pwd` target/wasm32-wasip1/release/rustpython.wasm -- `pwd`/Lib/test/test_int.py + run: wasmer run --dir "$(pwd)" target/wasm32-wasip1/wasm-release/rustpython.wasm -- "$(pwd)/Lib/test/test_int.py" + + cargo_doc: + needs: + - determine_changes + if: | + ( + !contains(github.event.pull_request.labels.*.name, 'skip:ci') && + needs.determine_changes.outputs.rust_code == 'true' + ) || github.ref == 'refs/heads/main' + env: + RUST_BACKTRACE: full + name: cargo doc + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + + - uses: dtolnay/rust-toolchain@stable + + - name: Restore cache + uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: ${{ runner.os }}-${{ hashFiles('**/Cargo.toml') }}- + restore-keys: | + ${{ runner.os }}-stable--${{ hashFiles('**/Cargo.toml') }}- + ${{ runner.os }}-stable-- + + - name: cargo doc + run: cargo doc --locked + diff --git a/.github/workflows/comment-commands.yml b/.github/workflows/comment-commands.yml new file mode 100644 index 00000000000..eb9e2f96d1a --- /dev/null +++ b/.github/workflows/comment-commands.yml @@ -0,0 +1,23 @@ +name: Comment Commands + +on: + issue_comment: + types: created + +jobs: + issue_assign: + if: (!github.event.issue.pull_request) && github.event.comment.body == 'take' + runs-on: ubuntu-slim + + concurrency: + group: ${{ github.actor }}-issue-assign + + permissions: + issues: write + + steps: + # Using REST API and not `gh issue edit`. https://github.com/cli/cli/issues/6235#issuecomment-1243487651 + - run: | + curl -H "Authorization: token ${{ github.token }}" -d '{"assignees": ["${{ env.USER }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees + env: + USER: ${{ github.event.comment.user.login }} diff --git a/.github/workflows/cron-ci.yaml b/.github/workflows/cron-ci.yaml index 0880e2d249c..5721a642615 100644 --- a/.github/workflows/cron-ci.yaml +++ b/.github/workflows/cron-ci.yaml @@ -1,13 +1,20 @@ +name: Periodic checks/tasks + on: schedule: - - cron: '0 0 * * 6' + - cron: "0 0 * * 6" workflow_dispatch: - -name: Periodic checks/tasks + push: + paths: + - .github/workflows/cron-ci.yaml + branches: + - main + pull_request: + paths: + - .github/workflows/cron-ci.yaml env: - CARGO_ARGS: --no-default-features --features stdlib,zlib,importlib,encodings,ssl,jit - PYTHON_VERSION: "3.12.0" + CARGO_ARGS: --no-default-features --features stdlib,importlib,stdio,encodings,ssl-rustls-aws-lc,jit,host_env jobs: # codecov collects code coverage data from the rust tests, python snippets and python test suite. @@ -15,42 +22,67 @@ jobs: codecov: name: Collect code coverage data runs-on: ubuntu-latest + # Disable this scheduled job when running on a fork. + if: ${{ github.repository == 'RustPython/RustPython' || github.event_name != 'schedule' }} + env: + INSTA_WORKSPACE_ROOT: ${{ github.workspace }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - uses: dtolnay/rust-toolchain@stable - - uses: taiki-e/install-action@cargo-llvm-cov - - uses: actions/setup-python@v5 + + - uses: taiki-e/install-action@0631aa6515c7d545823c67cfae7ef4fc7f490154 # v2.81.8 with: - python-version: ${{ env.PYTHON_VERSION }} + tool: cargo-llvm-cov + + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + - run: sudo apt-get update && sudo apt-get -y install lcov + - name: Run cargo-llvm-cov with Rust tests. - run: cargo llvm-cov --no-report --workspace --exclude rustpython_wasm --verbose --no-default-features --features stdlib,zlib,importlib,encodings,ssl,jit + run: cargo llvm-cov --no-report --workspace --exclude rustpython_wasm --exclude rustpython-compiler-source --exclude rustpython-venvlauncher --verbose --no-default-features --features stdlib,importlib,stdio,encodings,ssl-rustls-aws-lc,jit,host_env + - name: Run cargo-llvm-cov with Python snippets. run: python scripts/cargo-llvm-cov.py continue-on-error: true + - name: Run cargo-llvm-cov with Python test suite. - run: cargo llvm-cov --no-report run -- -m test -u all --slowest --fail-env-changed + run: cargo llvm-cov --no-report run -- -m test -u all --slowest --fail-env-changed continue-on-error: true + - name: Prepare code coverage data run: cargo llvm-cov report --lcov --output-path='codecov.lcov' + - name: Upload to Codecov - uses: codecov/codecov-action@v5 + if: ${{ github.event_name != 'pull_request' }} + uses: codecov/codecov-action@fb8b3582c8e4def4969c97caa2f19720cb33a72f # v7.0.0 with: - file: ./codecov.lcov + files: ./codecov.lcov testdata: name: Collect regression test data runs-on: ubuntu-latest + # Disable this scheduled job when running on a fork. + if: ${{ github.repository == 'RustPython/RustPython' || github.event_name != 'schedule' }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: true + - uses: dtolnay/rust-toolchain@stable + - name: build rustpython run: cargo build --release --verbose + - name: collect tests data run: cargo run --release extra_tests/jsontests.py env: RUSTPYTHONPATH: ${{ github.workspace }}/Lib + - name: upload tests data to the website + if: ${{ github.event_name != 'pull_request' }} env: SSHKEY: ${{ secrets.ACTIONS_TESTS_DATA_DEPLOY_KEY }} GITHUB_ACTOR: ${{ github.actor }} @@ -70,21 +102,29 @@ jobs: whatsleft: name: Collect what is left data runs-on: ubuntu-latest + # Disable this scheduled job when running on a fork. + if: ${{ github.repository == 'RustPython/RustPython' || github.event_name != 'schedule' }} steps: - - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@stable - - uses: actions/setup-python@v5 + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 with: - python-version: ${{ env.PYTHON_VERSION }} + persist-credentials: true + + - uses: dtolnay/rust-toolchain@stable + + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + - name: build rustpython run: cargo build --release --verbose + - name: Collect what is left data run: | - chmod +x ./whats_left.py - ./whats_left.py > whats_left.temp + chmod +x ./scripts/whats_left.py + ./scripts/whats_left.py --features "ssl,sqlite" > whats_left.temp env: RUSTPYTHONPATH: ${{ github.workspace }}/Lib + - name: Upload data to the website + if: ${{ github.event_name != 'pull_request' }} env: SSHKEY: ${{ secrets.ACTIONS_TESTS_DATA_DEPLOY_KEY }} GITHUB_ACTOR: ${{ github.actor }} @@ -97,6 +137,26 @@ jobs: cd website [ -f ./_data/whats_left.temp ] && cp ./_data/whats_left.temp ./_data/whats_left_lastrun.temp cp ../whats_left.temp ./_data/whats_left.temp + rm ./_data/whats_left/modules.csv + echo -e "module" > ./_data/whats_left/modules.csv + cat ./_data/whats_left.temp | grep "(entire module)" | cut -d ' ' -f 1 | sort >> ./_data/whats_left/modules.csv + awk -f - ./_data/whats_left.temp > ./_data/whats_left/builtin_items.csv <<'EOF' + BEGIN { + OFS="," + print "builtin,name,is_inherited" + } + /^# builtin items/ { in_section=1; next } + /^$/ { if (in_section) exit } + in_section { + split($1, a, ".") + rest = "" + idx = index($0, " ") + if (idx > 0) { + rest = substr($0, idx+1) + } + print a[1], $1, rest + } + EOF git add -A if git -c user.name="Github Actions" -c user.email="actions@github.com" commit -m "Update what is left results" --author="$GITHUB_ACTOR"; then git push @@ -105,32 +165,43 @@ jobs: benchmark: name: Collect benchmark data runs-on: ubuntu-latest + # Disable this scheduled job when running on a fork. + if: ${{ github.repository == 'RustPython/RustPython' || github.event_name != 'schedule' }} steps: - - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@stable - - uses: actions/setup-python@v5 + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 with: - python-version: 3.9 + persist-credentials: true + + - uses: dtolnay/rust-toolchain@stable + + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + - run: cargo install cargo-criterion + - name: build benchmarks run: cargo build --release --benches + - name: collect execution benchmark data run: cargo criterion --bench execution + - name: collect microbenchmarks data run: cargo criterion --bench microbenchmarks + - name: restructure generated files run: | cd ./target/criterion/reports - find -type d -name cpython | xargs rm -rf - find -type d -name rustpython | xargs rm -rf - find -mindepth 2 -maxdepth 2 -name violin.svg | xargs rm -rf - find -type f -not -name violin.svg | xargs rm -rf - for file in $(find -type f -name violin.svg); do mv $file $(echo $file | sed -E "s_\./([^/]+)/([^/]+)/violin\.svg_./\1/\2.svg_"); done - find -mindepth 2 -maxdepth 2 -type d | xargs rm -rf + find . -type d -name cpython -print0 | xargs -0 rm -rf + find . -type d -name rustpython -print0 | xargs -0 rm -rf + find . -mindepth 2 -maxdepth 2 -name violin.svg -print0 | xargs -0 rm -rf + find . -type f -not -name violin.svg -print0 | xargs -0 rm -rf + find . -type f -name violin.svg -exec sh -c 'for file; do mv "$file" "$(echo "$file" | sed -E "s_\./([^/]+)/([^/]+)/violin\.svg_./\1/\2.svg_")"; done' _ {} + + find . -mindepth 2 -maxdepth 2 -type d -print0 | xargs -0 rm -rf cd .. mv reports/* . rmdir reports + - name: upload benchmark data to the website + if: ${{ github.event_name != 'pull_request' }} env: SSHKEY: ${{ secrets.ACTIONS_TESTS_DATA_DEPLOY_KEY }} run: | @@ -142,7 +213,11 @@ jobs: cd website rm -rf ./assets/criterion cp -r ../target/criterion ./assets/criterion - git add ./assets/criterion + printf '{\n "generated_at": "%s",\n "rustpython_commit": "%s",\n "rustpython_ref": "%s"\n}\n' \ + "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \ + "${{ github.sha }}" \ + "${{ github.ref_name }}" > ./_data/criterion-metadata.json + git add ./assets/criterion ./_data/criterion-metadata.json if git -c user.name="Github Actions" -c user.email="actions@github.com" commit -m "Update benchmark results"; then git push fi diff --git a/.github/workflows/lib-deps-check.yaml b/.github/workflows/lib-deps-check.yaml new file mode 100644 index 00000000000..e74dd561bb5 --- /dev/null +++ b/.github/workflows/lib-deps-check.yaml @@ -0,0 +1,138 @@ +name: Lib Dependencies Check + +on: + pull_request_target: + types: [opened, synchronize, reopened] + paths: + - "Lib/**" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + check_deps: + permissions: + pull-requests: write + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - name: Checkout base branch + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + # Use base branch for scripts (security: don't run PR code with elevated permissions) + ref: ${{ github.event.pull_request.base.ref }} + fetch-depth: 0 + persist-credentials: false + + - name: Fetch PR head + run: | + git fetch origin ${{ github.event.pull_request.head.sha }} + + - name: Checkout PR Lib files + run: | + # Checkout only Lib/ directory from PR head for accurate comparison + git checkout ${{ github.event.pull_request.head.sha }} -- Lib/ + + - name: Get target CPython version + id: cpython-version + run: | + version=$(cat .python-version) + echo "version=${version}" >> "$GITHUB_OUTPUT" + + - name: Checkout CPython + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + repository: python/cpython + path: cpython + ref: "v${{ steps.cpython-version.outputs.version }}" + fetch-depth: 1 + persist-credentials: false + + - name: Get changed Lib files + id: all-changed-files + run: | + # Get the list of changed files under Lib/ + { + echo 'changed<> "$GITHUB_OUTPUT" + + - name: Parse changed files + id: changed-files + run: | + from os import environ + + files = environ["FILES"] + modules = set() + for file in files.splitlines(): + file = file.strip() + + if file.startswith("Lib/test/"): + # Test files: + # Lib/test/test_pydoc.py -> test_pydoc + # Lib/test/test_pydoc/foo.py -> test_pydoc + module = file.removeprefix("Lib/test/").split("/")[0] + if not module.startswith("test_"): + continue + else: + # Lib files: + # Lib/foo.py -> foo + # Lib/foo/__init__.py -> foo + module = file.removeprefix("Lib/").split("/")[0] + + module = module.split(".")[0] + modules.add(module) + + print(f"{modules=}") + output = " ".join(sorted(modules)) + output_file = environ["GITHUB_OUTPUT"] + with open(output_file, mode="a", encoding="utf-8") as fd: + fd.write(f"modules={output}\n") + env: + FILES: ${{ steps.all-changed-files.outputs.changed }} + shell: python + + - name: Setup Python + if: steps.changed-files.outputs.modules != '' + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + + - name: Run deps check + if: steps.changed-files.outputs.modules != '' + id: deps-check + run: | + # Run deps for all modules at once + echo "deps_output<> "$GITHUB_OUTPUT" + output=$(python scripts/update_lib deps "${MODULES}" --depth 2 2>&1 || true) + echo "$output" >> "$GITHUB_OUTPUT" + echo "EOF" >> "$GITHUB_OUTPUT" + env: + MODULES: ${{ steps.changed-files.outputs.modules }} + + - name: Post comment + if: steps.deps-check.outputs.deps_output != '' + uses: marocchino/sticky-pull-request-comment@0ea0beb66eb9baf113663a64ec522f60e49231c0 # v3.0.4 + with: + header: lib-deps-check + number: ${{ github.event.pull_request.number }} + message: | + ## 📦 Library Dependencies + + The following Lib/ modules were modified. Here are their dependencies: + + ${{ steps.deps-check.outputs.deps_output }} + + **Legend:** + - `[+]` path exists in CPython + - `[x]` up-to-date, `[ ]` outdated + + - name: Remove comment if no Lib changes + if: steps.changed-files.outputs.modules == '' + uses: marocchino/sticky-pull-request-comment@0ea0beb66eb9baf113663a64ec522f60e49231c0 # v3.0.4 + with: + header: lib-deps-check + number: ${{ github.event.pull_request.number }} + delete: true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5554ed0a8b5..21a0068cbbf 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,87 +12,92 @@ on: required: false default: true -permissions: - contents: write - env: - CARGO_ARGS: --no-default-features --features stdlib,zlib,importlib,encodings,sqlite,ssl + X86_64_PC_WINDOWS_MSVC_OPENSSL_LIB_DIR: C:\Program Files\OpenSSL\lib\VC\x64\MD + X86_64_PC_WINDOWS_MSVC_OPENSSL_INCLUDE_DIR: C:\Program Files\OpenSSL\include + +permissions: {} jobs: build: - runs-on: ${{ matrix.platform.runner }} + runs-on: ${{ matrix.os }} + # Disable this scheduled job when running on a fork. + if: ${{ github.repository == 'RustPython/RustPython' || github.event_name != 'schedule' }} + permissions: + contents: read strategy: matrix: - platform: - - runner: ubuntu-latest + include: + - os: ubuntu-latest target: x86_64-unknown-linux-gnu -# - runner: ubuntu-latest -# target: i686-unknown-linux-gnu -# - runner: ubuntu-latest -# target: aarch64-unknown-linux-gnu -# - runner: ubuntu-latest -# target: armv7-unknown-linux-gnueabi -# - runner: ubuntu-latest -# target: s390x-unknown-linux-gnu -# - runner: ubuntu-latest -# target: powerpc64le-unknown-linux-gnu - - runner: macos-latest + - os: macos-latest target: aarch64-apple-darwin -# - runner: macos-latest -# target: x86_64-apple-darwin - - runner: windows-latest + - os: windows-2025 target: x86_64-pc-windows-msvc -# - runner: windows-latest -# target: i686-pc-windows-msvc -# - runner: windows-latest -# target: aarch64-pc-windows-msvc + # - os: ubuntu-latest + # target: i686-unknown-linux-gnu + # - os: ubuntu-latest + # target: aarch64-unknown-linux-gnu + # - os: ubuntu-latest + # target: armv7-unknown-linux-gnueabi + # - os: ubuntu-latest + # target: s390x-unknown-linux-gnu + # - os: ubuntu-latest + # target: powerpc64le-unknown-linux-gnu + # - os: macos-latest + # target: x86_64-apple-darwin + # - os: windows-2025 + # target: i686-pc-windows-msvc + # - os: windows-2025 + # target: aarch64-pc-windows-msvc fail-fast: false steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - uses: dtolnay/rust-toolchain@stable + with: + target: ${{ matrix.target }} - - name: Set up Environment - shell: bash - run: rustup target add ${{ matrix.platform.target }} - - name: Set up Windows Environment - shell: bash - run: | - cargo install --target-dir=target -v cargo-vcpkg - cargo vcpkg -v build - if: runner.os == 'Windows' - - name: Set up MacOS Environment - run: brew install autoconf automake libtool - if: runner.os == 'macOS' + - name: Install macOS dependencies + uses: ./.github/actions/install-macos-deps + with: + autoconf: true + automake: true + libtool: true - name: Build RustPython - run: cargo build --release --target=${{ matrix.platform.target }} --verbose --features=threading ${{ env.CARGO_ARGS }} - if: runner.os == 'macOS' - - name: Build RustPython - run: cargo build --release --target=${{ matrix.platform.target }} --verbose --features=threading ${{ env.CARGO_ARGS }},jit - if: runner.os != 'macOS' + run: cargo build --release --target=${{ matrix.target }} --verbose --no-default-features --features stdlib,stdio,importlib,encodings,sqlite,host_env,ssl-rustls-aws-lc,threading,jit - name: Rename Binary - run: cp target/${{ matrix.platform.target }}/release/rustpython target/rustpython-release-${{ runner.os }}-${{ matrix.platform.target }} + run: cp target/${{ matrix.target }}/release/rustpython target/rustpython-release-${{ runner.os }}-${{ matrix.target }} if: runner.os != 'Windows' + - name: Rename Binary - run: cp target/${{ matrix.platform.target }}/release/rustpython.exe target/rustpython-release-${{ runner.os }}-${{ matrix.platform.target }}.exe + run: cp target/${{ matrix.target }}/release/rustpython.exe target/rustpython-release-${{ runner.os }}-${{ matrix.target }}.exe if: runner.os == 'Windows' - name: Upload Binary Artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: - name: rustpython-release-${{ runner.os }}-${{ matrix.platform.target }} - path: target/rustpython-release-${{ runner.os }}-${{ matrix.platform.target }}* + name: rustpython-release-${{ runner.os }}-${{ matrix.target }} + path: target/rustpython-release-${{ runner.os }}-${{ matrix.target }}* build-wasm: runs-on: ubuntu-latest + # Disable this scheduled job when running on a fork. + if: ${{ github.repository == 'RustPython/RustPython' || github.event_name != 'schedule' }} + permissions: + contents: read steps: - - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@stable + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false - - name: Set up Environment - shell: bash - run: rustup target add wasm32-wasip1 + - uses: dtolnay/rust-toolchain@stable + with: + targets: wasm32-wasip1 - name: Build RustPython run: cargo build --target wasm32-wasip1 --no-default-features --features freeze-stdlib,stdlib --release @@ -101,33 +106,78 @@ jobs: run: cp target/wasm32-wasip1/release/rustpython.wasm target/rustpython-release-wasm32-wasip1.wasm - name: Upload Binary Artifacts - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: rustpython-release-wasm32-wasip1 path: target/rustpython-release-wasm32-wasip1.wasm + - name: install wasm-pack + run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh + + - uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + package-manager-cache: false + + - uses: mwilliamson/setup-wabt-action@427f2fdd70bc4dbc2e53c2eb4f19f66162d71bd2 # v4.0.0 + with: + wabt-version: "1.0.30" + + - name: build demo + run: | + npm install + npm run dist + env: + NODE_OPTIONS: "--openssl-legacy-provider" + working-directory: ./wasm/demo + + - name: build notebook demo + run: | + npm install + npm run dist + mv dist ../demo/dist/notebook + env: + NODE_OPTIONS: "--openssl-legacy-provider" + working-directory: ./wasm/notebook + + - name: Deploy demo to Github Pages + if: ${{ github.repository == 'RustPython/RustPython' }} + uses: peaceiris/actions-gh-pages@84c30a85c19949d7eee79c4ff27748b70285e453 # v4.1.0 + with: + deploy_key: ${{ secrets.ACTIONS_DEMO_DEPLOY_KEY }} + publish_dir: ./wasm/demo/dist + external_repository: RustPython/demo + publish_branch: master + release: runs-on: ubuntu-latest + # Disable this scheduled job when running on a fork. + if: ${{ github.repository == 'RustPython/RustPython' || github.event_name != 'schedule' }} needs: [build, build-wasm] + permissions: + contents: write # for creating a release steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - name: Download Binary Artifacts - uses: actions/download-artifact@v4 + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: path: bin - pattern: rustpython-release-* + pattern: rustpython-* merge-multiple: true + - name: Create Lib Archive + run: zip -r bin/rustpython-lib.zip Lib/ + - name: List Binaries run: | ls -lah bin/ file bin/* + - name: Create Release - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - tag: ${{ github.ref_name }} - run: ${{ github.run_number }} run: | - if [[ "${{ github.event.inputs.pre-release }}" == "false" ]]; then + if [[ "${PRE_RELEASE_INPUT}" == "false" ]]; then RELEASE_TYPE_NAME=Release PRERELEASE_ARG= else @@ -140,6 +190,12 @@ jobs: --repo="$GITHUB_REPOSITORY" \ --title="RustPython $RELEASE_TYPE_NAME $today-$tag #$run" \ --target="$tag" \ + --notes "⚠️ **Important**: To run RustPython, you must download both the binary for your platform AND the \`rustpython-lib.zip\` archive. Extract the Lib directory from the archive to the same location as the binary, or set the \`RUSTPYTHONPATH\` environment variable to point to the Lib directory." \ --generate-notes \ $PRERELEASE_ARG \ - bin/rustpython-release-* \ No newline at end of file + bin/rustpython-release-* + env: + GH_TOKEN: ${{ github.token }} + tag: ${{ github.ref_name }} + run: ${{ github.run_number }} + PRE_RELEASE_INPUT: ${{ github.event.inputs.pre-release }} diff --git a/.github/workflows/update-caches.yml b/.github/workflows/update-caches.yml new file mode 100644 index 00000000000..fc524fa738e --- /dev/null +++ b/.github/workflows/update-caches.yml @@ -0,0 +1,77 @@ +name: Update Actions Caches + +permissions: + contents: read + +on: + workflow_dispatch: + push: + branches: + - main + +concurrency: + group: ${{ github.workflow }} + cancel-in-progress: false + +env: + CARGO_INCREMENTAL: 0 + CARGO_TERM_COLOR: always + CARGO_PROFILE_TEST_DEBUG: 0 + CARGO_PROFILE_DEV_DEBUG: 0 + CARGO_PROFILE_RELEASE_DEBUG: 0 + CARGO_ARGS: --workspace --no-default-features --features stdlib,importlib,stdio,encodings,sqlite,ssl-rustls-aws-lc,host_env,threading,jit --exclude rustpython_wasm --exclude rustpython-compiler-source --exclude rustpython-venvlauncher + +jobs: + build-caches: + name: Build Caches + runs-on: ${{ matrix.os }} + strategy: + matrix: + include: + - os: macos-latest + toolchain: stable + target: "" + - os: ubuntu-latest + toolchain: stable + target: "" + - os: windows-latest + toolchain: stable + target: "" + steps: + - name: Checkout RustPython main branch + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + repository: RustPython/RustPython + ref: main + persist-credentials: false + + - name: Setup Rust + uses: dtolnay/rust-toolchain@3c5f7ea28cd621ae0bf5283f0e981fb97b8a7af9 + with: + toolchain: ${{ matrix.toolchain }} + target: ${{ matrix.target }} + + - name: Install macos dependencies + uses: ./.github/actions/install-macos-deps + with: + openssl: true + + - name: Build dev cache # dev profile used by check & doc + run: cargo build --profile dev ${{ env.CARGO_ARGS }} + + - name: Build test cache + run: cargo build --profile test ${{ env.CARGO_ARGS }} + + - name: Build release cache + run: cargo build --profile release ${{ env.CARGO_ARGS }} + + - name: Save cache + uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: ${{ runner.os }}-${{ matrix.toolchain }}-${{ matrix.target }}-${{ hashFiles('**/Cargo.toml') }}-${{ hashFiles('Cargo.lock') }}-${{ github.sha }} diff --git a/.github/workflows/update-doc-db.yml b/.github/workflows/update-doc-db.yml new file mode 100644 index 00000000000..a543f428cb5 --- /dev/null +++ b/.github/workflows/update-doc-db.yml @@ -0,0 +1,125 @@ +name: Update doc DB + +permissions: {} + +on: + workflow_dispatch: + inputs: + python-version: + description: Target python version to generate doc db for + type: string + default: "3.14.3" + base-ref: + description: Base branch to create the update branch from + type: string + default: "main" + +defaults: + run: + shell: bash + +jobs: + generate: + permissions: + contents: read + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: + - ubuntu-latest + - windows-latest + - macos-latest + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + sparse-checkout: | + crates/doc + + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: ${{ inputs.python-version }} + + - name: Generate docs + run: python crates/doc/generate.py + + - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: doc-db-${{ inputs.python-version }}-${{ matrix.os }} + path: "crates/doc/generated/*.json" + if-no-files-found: error + retention-days: 7 + overwrite: true + + merge: + runs-on: ubuntu-latest + needs: generate + permissions: + contents: write + pull-requests: write + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: true + ref: ${{ inputs.base-ref }} + + - name: Create update branch + run: git switch -c "update-doc-${PYTHON_VERSION}" + env: + PYTHON_VERSION: ${{ inputs.python-version }} + + - name: Download generated doc DBs + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + pattern: "doc-db-${{ inputs.python-version }}-**" + path: crates/doc/generated/ + merge-multiple: true + + - name: Transform JSON + env: + PYTHON_VERSION: ${{ inputs.python-version }} + run: | + # Merge all artifacts + jq -s "add" --sort-keys crates/doc/generated/*.json > crates/doc/generated/merged.json + + # Format merged json for the phf macro + jq -r 'to_entries[] | " \(.key | @json) => \(.value | @json),"' crates/doc/generated/merged.json > crates/doc/generated/raw_entries.txt + + OUTPUT_FILE='crates/doc/src/data.inc.rs' + + # shellcheck disable=SC2016 + { + echo '// This file was auto-generated by `.github/workflows/update-doc-db.yml`.' + echo "// CPython version: ${PYTHON_VERSION}" + echo '// spell-checker: disable' + echo '' + echo "pub static DB: phf::Map<&'static str, &'static str> = phf::phf_map! {" + cat crates/doc/generated/raw_entries.txt + echo '};' + } > "$OUTPUT_FILE" + + - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: doc-db-${{ inputs.python-version }} + path: "crates/doc/src/data.inc.rs" + if-no-files-found: error + retention-days: 7 + overwrite: true + + - name: Commit, push and create PR + env: + GH_TOKEN: ${{ github.token }} + PYTHON_VERSION: ${{ inputs.python-version }} + BASE_REF: ${{ inputs.base-ref }} + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + if [ -n "$(git status --porcelain)" ]; then + git add crates/doc/src/data.inc.rs + git commit -m "Update doc DB for CPython ${PYTHON_VERSION}" + git push -u origin HEAD + gh pr create \ + --base "${BASE_REF}" \ + --title "Update doc DB for CPython ${PYTHON_VERSION}" \ + --body "Auto-generated by update-doc-db workflow." + fi diff --git a/.github/workflows/update-libs-status.yaml b/.github/workflows/update-libs-status.yaml new file mode 100644 index 00000000000..837586913c8 --- /dev/null +++ b/.github/workflows/update-libs-status.yaml @@ -0,0 +1,96 @@ +name: Updated libs status + +on: + push: + branches: + - main + paths: + - "Lib/**" + workflow_dispatch: + +permissions: + contents: read + issues: write + +env: + ISSUE_ID: "6839" + +jobs: + update-issue: + runs-on: ubuntu-latest + if: ${{ github.repository == 'RustPython/RustPython' }} + steps: + - name: Clone RustPython + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + path: rustpython + persist-credentials: false + sparse-checkout: |- + Lib + scripts/update_lib + .python-version + + - name: Get target CPython version + id: cpython-version + run: | + version=$(cat rustpython/.python-version) + echo "version=${version}" >> "$GITHUB_OUTPUT" + + - name: Clone CPython + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + repository: python/cpython + path: cpython + ref: "v${{ steps.cpython-version.outputs.version }}" + persist-credentials: false + sparse-checkout: | + Lib + + - name: Get current date + id: current_date + run: | + now=$(date -u +"%Y-%m-%d %H:%M:%S") + echo "date=$now" >> "$GITHUB_OUTPUT" + + - name: Write body prefix + run: | + cat > body.txt < + + ## Summary + + Check \`scripts/update_lib\` for tools. As a note, the current latest Python version is \`${{ steps.cpython-version.outputs.version }}\`. + + Previous versions' issues as reference + - 3.13: #5529 + + + + ## Details + + ${{ steps.current_date.outputs.date }} (UTC) + \`\`\`shell + $ python3 scripts/update_lib todo --done + \`\`\` + EOF + + - name: Run todo + run: python3 rustpython/scripts/update_lib todo --cpython cpython --lib rustpython/Lib --done >> body.txt + + - name: Update GH issue + run: gh issue edit ${{ env.ISSUE_ID }} --body-file ../body.txt + env: + GH_TOKEN: ${{ github.token }} + working-directory: rustpython + + diff --git a/.github/workflows/upgrade-pylib.lock.yml b/.github/workflows/upgrade-pylib.lock.yml new file mode 100644 index 00000000000..1a028f728b1 --- /dev/null +++ b/.github/workflows/upgrade-pylib.lock.yml @@ -0,0 +1,1095 @@ +# +# ___ _ _ +# / _ \ | | (_) +# | |_| | __ _ ___ _ __ | |_ _ ___ +# | _ |/ _` |/ _ \ '_ \| __| |/ __| +# | | | | (_| | __/ | | | |_| | (__ +# \_| |_/\__, |\___|_| |_|\__|_|\___| +# __/ | +# _ _ |___/ +# | | | | / _| | +# | | | | ___ _ __ _ __| |_| | _____ ____ +# | |/\| |/ _ \ '__| |/ /| _| |/ _ \ \ /\ / / ___| +# \ /\ / (_) | | | | ( | | | | (_) \ V V /\__ \ +# \/ \/ \___/|_| |_|\_\|_| |_|\___/ \_/\_/ |___/ +# +# This file was automatically generated by gh-aw (v0.43.22). DO NOT EDIT. +# +# To update this file, edit the corresponding .md file and run: +# gh aw compile +# Not all edits will cause changes to this file. +# +# For more information: https://github.github.com/gh-aw/introduction/overview/ +# +# Pick an out-of-sync Python library from the todo list and upgrade it +# by running `scripts/update_lib quick`, then open a pull request. +# +# frontmatter-hash: 3129480d6628afe028911bb8b31b6bb3b5eb251e395f00ed0677922cd21727cb + +name: "Upgrade Python Library" +"on": + workflow_dispatch: + inputs: + name: + description: Module name to upgrade (leave empty to auto-pick) + required: false + type: string + +permissions: {} + +concurrency: + group: "gh-aw-${{ github.workflow }}" + +run-name: "Upgrade Python Library" + +env: + ISSUE_ID: "6839" + PYTHON_VERSION: v3.14.3 + +# Cache configuration from frontmatter was processed and added to the main job steps + +jobs: + activation: + runs-on: ubuntu-slim + permissions: + contents: read + outputs: + comment_id: "" + comment_repo: "" + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@f990bbb7eb83981a203d4b5eccdc24f677e950c7 # v0.77.5 + with: + destination: /opt/gh-aw/actions + - name: Check workflow file timestamps + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + env: + GH_AW_WORKFLOW_FILE: "upgrade-pylib.lock.yml" + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/check_workflow_timestamp_api.cjs'); + await main(); + + agent: + needs: activation + runs-on: ubuntu-latest + permissions: + contents: read + issues: read + pull-requests: read + concurrency: + group: "gh-aw-copilot-${{ github.workflow }}" + env: + DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} + GH_AW_ASSETS_ALLOWED_EXTS: "" + GH_AW_ASSETS_BRANCH: "" + GH_AW_ASSETS_MAX_SIZE_KB: 0 + GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs + GH_AW_SAFE_OUTPUTS: /opt/gh-aw/safeoutputs/outputs.jsonl + GH_AW_SAFE_OUTPUTS_CONFIG_PATH: /opt/gh-aw/safeoutputs/config.json + GH_AW_SAFE_OUTPUTS_TOOLS_PATH: /opt/gh-aw/safeoutputs/tools.json + outputs: + checkout_pr_success: ${{ steps.checkout-pr.outputs.checkout_pr_success || 'true' }} + has_patch: ${{ steps.collect_output.outputs.has_patch }} + model: ${{ steps.generate_aw_info.outputs.model }} + output: ${{ steps.collect_output.outputs.output }} + output_types: ${{ steps.collect_output.outputs.output_types }} + secret_verification_result: ${{ steps.validate-secret.outputs.verification_result }} + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@f990bbb7eb83981a203d4b5eccdc24f677e950c7 # v0.77.5 + with: + destination: /opt/gh-aw/actions + - name: Checkout repository + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - name: Setup Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.14' + - name: Create gh-aw temp directory + run: bash /opt/gh-aw/actions/create_gh_aw_tmp_dir.sh + # Cache configuration from frontmatter processed below + - name: Cache (cpython-lib-${{ env.PYTHON_VERSION }}) + uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + key: cpython-lib-${{ env.PYTHON_VERSION }} + path: cpython + restore-keys: | + cpython-lib- + - name: Configure Git credentials + env: + REPO_NAME: ${{ github.repository }} + SERVER_URL: ${{ github.server_url }} + run: | + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git config --global user.name "github-actions[bot]" + # Re-authenticate git with GitHub token + SERVER_URL_STRIPPED="${SERVER_URL#https://}" + git remote set-url origin "https://x-access-token:${{ github.token }}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git" + echo "Git configured with standard GitHub Actions identity" + - name: Checkout PR branch + id: checkout-pr + if: | + github.event.pull_request + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + env: + GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + with: + github-token: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/checkout_pr_branch.cjs'); + await main(); + - name: Generate agentic run info + id: generate_aw_info + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + with: + script: | + const fs = require('fs'); + + const awInfo = { + engine_id: "copilot", + engine_name: "GitHub Copilot CLI", + model: process.env.GH_AW_MODEL_AGENT_COPILOT || "", + version: "", + agent_version: "0.0.409", + cli_version: "v0.43.22", + workflow_name: "Upgrade Python Library", + experimental: false, + supports_tools_allowlist: true, + supports_http_transport: true, + run_id: context.runId, + run_number: context.runNumber, + run_attempt: process.env.GITHUB_RUN_ATTEMPT, + repository: context.repo.owner + '/' + context.repo.repo, + ref: context.ref, + sha: context.sha, + actor: context.actor, + event_name: context.eventName, + staged: false, + allowed_domains: ["defaults","rust","python"], + firewall_enabled: true, + awf_version: "v0.16.4", + awmg_version: "", + steps: { + firewall: "squid" + }, + created_at: new Date().toISOString() + }; + + // Write to /tmp/gh-aw directory to avoid inclusion in PR + const tmpPath = '/tmp/gh-aw/aw_info.json'; + fs.writeFileSync(tmpPath, JSON.stringify(awInfo, null, 2)); + console.log('Generated aw_info.json at:', tmpPath); + console.log(JSON.stringify(awInfo, null, 2)); + + // Set model as output for reuse in other steps/jobs + core.setOutput('model', awInfo.model); + - name: Validate COPILOT_GITHUB_TOKEN secret + id: validate-secret + run: /opt/gh-aw/actions/validate_multi_secret.sh COPILOT_GITHUB_TOKEN 'GitHub Copilot CLI' https://github.github.com/gh-aw/reference/engines/#github-copilot-default + env: + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + - name: Install GitHub Copilot CLI + run: /opt/gh-aw/actions/install_copilot_cli.sh 0.0.409 + - name: Install awf binary + run: bash /opt/gh-aw/actions/install_awf_binary.sh v0.16.4 + - name: Determine automatic lockdown mode for GitHub MCP server + id: determine-automatic-lockdown + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + env: + GH_AW_GITHUB_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN }} + GH_AW_GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN }} + with: + script: | + const determineAutomaticLockdown = require('/opt/gh-aw/actions/determine_automatic_lockdown.cjs'); + await determineAutomaticLockdown(github, context, core); + - name: Download container images + run: bash /opt/gh-aw/actions/download_docker_images.sh ghcr.io/github/gh-aw-firewall/agent:0.16.4 ghcr.io/github/gh-aw-firewall/squid:0.16.4 ghcr.io/github/gh-aw-mcpg:v0.1.4 ghcr.io/github/github-mcp-server:v0.30.3 node:lts-alpine + - name: Write Safe Outputs Config + run: | + mkdir -p /opt/gh-aw/safeoutputs + mkdir -p /tmp/gh-aw/safeoutputs + mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs + cat > /opt/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_EOF' + {"create_pull_request":{"expires":30},"missing_data":{},"missing_tool":{},"noop":{"max":1}} + GH_AW_SAFE_OUTPUTS_CONFIG_EOF + cat > /opt/gh-aw/safeoutputs/tools.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_EOF' + [ + { + "description": "Create a new GitHub pull request to propose code changes. Use this after making file edits to submit them for review and merging. The PR will be created from the current branch with your committed changes. For code review comments on an existing PR, use create_pull_request_review_comment instead. CONSTRAINTS: Maximum 1 pull request(s) can be created. Title will be prefixed with \"Update \". Labels [pylib-sync] will be automatically added.", + "inputSchema": { + "additionalProperties": false, + "properties": { + "body": { + "description": "Detailed PR description in Markdown. Include what changes were made, why, testing notes, and any breaking changes. Do NOT repeat the title as a heading.", + "type": "string" + }, + "branch": { + "description": "Source branch name containing the changes. If omitted, uses the current working branch.", + "type": "string" + }, + "labels": { + "description": "Labels to categorize the PR (e.g., 'enhancement', 'bugfix'). Labels must exist in the repository.", + "items": { + "type": "string" + }, + "type": "array" + }, + "title": { + "description": "Concise PR title describing the changes. Follow repository conventions (e.g., conventional commits). The title appears as the main heading.", + "type": "string" + } + }, + "required": [ + "title", + "body" + ], + "type": "object" + }, + "name": "create_pull_request" + }, + { + "description": "Report that a tool or capability needed to complete the task is not available, or share any information you deem important about missing functionality or limitations. Use this when you cannot accomplish what was requested because the required functionality is missing or access is restricted.", + "inputSchema": { + "additionalProperties": false, + "properties": { + "alternatives": { + "description": "Any workarounds, manual steps, or alternative approaches the user could take (max 256 characters).", + "type": "string" + }, + "reason": { + "description": "Explanation of why this tool is needed or what information you want to share about the limitation (max 256 characters).", + "type": "string" + }, + "tool": { + "description": "Optional: Name or description of the missing tool or capability (max 128 characters). Be specific about what functionality is needed.", + "type": "string" + } + }, + "required": [ + "reason" + ], + "type": "object" + }, + "name": "missing_tool" + }, + { + "description": "Log a transparency message when no significant actions are needed. Use this to confirm workflow completion and provide visibility when analysis is complete but no changes or outputs are required (e.g., 'No issues found', 'All checks passed'). This ensures the workflow produces human-visible output even when no other actions are taken.", + "inputSchema": { + "additionalProperties": false, + "properties": { + "message": { + "description": "Status or completion message to log. Should explain what was analyzed and the outcome (e.g., 'Code review complete - no issues found', 'Analysis complete - all tests passing').", + "type": "string" + } + }, + "required": [ + "message" + ], + "type": "object" + }, + "name": "noop" + }, + { + "description": "Report that data or information needed to complete the task is not available. Use this when you cannot accomplish what was requested because required data, context, or information is missing.", + "inputSchema": { + "additionalProperties": false, + "properties": { + "alternatives": { + "description": "Any workarounds, manual steps, or alternative approaches the user could take (max 256 characters).", + "type": "string" + }, + "context": { + "description": "Additional context about the missing data or where it should come from (max 256 characters).", + "type": "string" + }, + "data_type": { + "description": "Type or description of the missing data or information (max 128 characters). Be specific about what data is needed.", + "type": "string" + }, + "reason": { + "description": "Explanation of why this data is needed to complete the task (max 256 characters).", + "type": "string" + } + }, + "required": [], + "type": "object" + }, + "name": "missing_data" + } + ] + GH_AW_SAFE_OUTPUTS_TOOLS_EOF + cat > /opt/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_EOF' + { + "create_pull_request": { + "defaultMax": 1, + "fields": { + "body": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 65000 + }, + "branch": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 256 + }, + "labels": { + "type": "array", + "itemType": "string", + "itemSanitize": true, + "itemMaxLength": 128 + }, + "title": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 128 + } + } + }, + "missing_tool": { + "defaultMax": 20, + "fields": { + "alternatives": { + "type": "string", + "sanitize": true, + "maxLength": 512 + }, + "reason": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 256 + }, + "tool": { + "type": "string", + "sanitize": true, + "maxLength": 128 + } + } + }, + "noop": { + "defaultMax": 1, + "fields": { + "message": { + "required": true, + "type": "string", + "sanitize": true, + "maxLength": 65000 + } + } + } + } + GH_AW_SAFE_OUTPUTS_VALIDATION_EOF + - name: Generate Safe Outputs MCP Server Config + id: safe-outputs-config + run: | + # Generate a secure random API key (360 bits of entropy, 40+ chars) + # Mask immediately to prevent timing vulnerabilities + API_KEY=$(openssl rand -base64 45 | tr -d '/+=') + echo "::add-mask::${API_KEY}" + + PORT=3001 + + # Set outputs for next steps + { + echo "safe_outputs_api_key=${API_KEY}" + echo "safe_outputs_port=${PORT}" + } >> "$GITHUB_OUTPUT" + + echo "Safe Outputs MCP server will run on port ${PORT}" + + - name: Start Safe Outputs MCP HTTP Server + id: safe-outputs-start + env: + DEBUG: '*' + GH_AW_SAFE_OUTPUTS_PORT: ${{ steps.safe-outputs-config.outputs.safe_outputs_port }} + GH_AW_SAFE_OUTPUTS_API_KEY: ${{ steps.safe-outputs-config.outputs.safe_outputs_api_key }} + GH_AW_SAFE_OUTPUTS_TOOLS_PATH: /opt/gh-aw/safeoutputs/tools.json + GH_AW_SAFE_OUTPUTS_CONFIG_PATH: /opt/gh-aw/safeoutputs/config.json + GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs + run: | + # Environment variables are set above to prevent template injection + export DEBUG + export GH_AW_SAFE_OUTPUTS_PORT + export GH_AW_SAFE_OUTPUTS_API_KEY + export GH_AW_SAFE_OUTPUTS_TOOLS_PATH + export GH_AW_SAFE_OUTPUTS_CONFIG_PATH + export GH_AW_MCP_LOG_DIR + + bash /opt/gh-aw/actions/start_safe_outputs_server.sh + + - name: Start MCP gateway + id: start-mcp-gateway + env: + GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }} + GH_AW_SAFE_OUTPUTS_API_KEY: ${{ steps.safe-outputs-start.outputs.api_key }} + GH_AW_SAFE_OUTPUTS_PORT: ${{ steps.safe-outputs-start.outputs.port }} + GITHUB_MCP_LOCKDOWN: ${{ steps.determine-automatic-lockdown.outputs.lockdown == 'true' && '1' || '0' }} + GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + run: | + set -eo pipefail + mkdir -p /tmp/gh-aw/mcp-config + + # Export gateway environment variables for MCP config and gateway script + export MCP_GATEWAY_PORT="80" + export MCP_GATEWAY_DOMAIN="host.docker.internal" + MCP_GATEWAY_API_KEY=$(openssl rand -base64 45 | tr -d '/+=') + echo "::add-mask::${MCP_GATEWAY_API_KEY}" + export MCP_GATEWAY_API_KEY + export MCP_GATEWAY_PAYLOAD_DIR="/tmp/gh-aw/mcp-payloads" + mkdir -p "${MCP_GATEWAY_PAYLOAD_DIR}" + export DEBUG="*" + + export GH_AW_ENGINE="copilot" + export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_LOCKDOWN -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.1.4' + + mkdir -p /home/runner/.copilot + cat << GH_AW_MCP_CONFIG_EOF | bash /opt/gh-aw/actions/start_mcp_gateway.sh + { + "mcpServers": { + "github": { + "type": "stdio", + "container": "ghcr.io/github/github-mcp-server:v0.30.3", + "env": { + "GITHUB_LOCKDOWN_MODE": "$GITHUB_MCP_LOCKDOWN", + "GITHUB_PERSONAL_ACCESS_TOKEN": "\${GITHUB_MCP_SERVER_TOKEN}", + "GITHUB_READ_ONLY": "1", + "GITHUB_TOOLSETS": "repos,issues,pull_requests" + } + }, + "safeoutputs": { + "type": "http", + "url": "http://host.docker.internal:$GH_AW_SAFE_OUTPUTS_PORT", + "headers": { + "Authorization": "\${GH_AW_SAFE_OUTPUTS_API_KEY}" + } + } + }, + "gateway": { + "port": $MCP_GATEWAY_PORT, + "domain": "${MCP_GATEWAY_DOMAIN}", + "apiKey": "${MCP_GATEWAY_API_KEY}", + "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}" + } + } + GH_AW_MCP_CONFIG_EOF + - name: Generate workflow overview + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + with: + script: | + const { generateWorkflowOverview } = require('/opt/gh-aw/actions/generate_workflow_overview.cjs'); + await generateWorkflowOverview(core); + - name: Create prompt with built-in context + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }} + GH_AW_ENV_ISSUE_ID: ${{ env.ISSUE_ID }} + GH_AW_GITHUB_ACTOR: ${{ github.actor }} + GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }} + GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }} + GH_AW_GITHUB_EVENT_INPUTS_NAME: ${{ github.event.inputs.name }} + GH_AW_GITHUB_EVENT_ISSUE_NUMBER: ${{ github.event.issue.number }} + GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }} + GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} + GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} + GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} + run: | + bash /opt/gh-aw/actions/create_prompt_first.sh + cat << 'GH_AW_PROMPT_EOF' > "$GH_AW_PROMPT" + + GH_AW_PROMPT_EOF + { + cat "/opt/gh-aw/prompts/xpia.md" + cat "/opt/gh-aw/prompts/temp_folder_prompt.md" + cat "/opt/gh-aw/prompts/markdown.md" + cat << 'GH_AW_PROMPT_EOF' + + GitHub API Access Instructions + + The gh CLI is NOT authenticated. Do NOT use gh commands for GitHub operations. + + + To create or modify GitHub resources (issues, discussions, pull requests, etc.), you MUST call the appropriate safe output tool. Simply writing content will NOT work - the workflow requires actual tool calls. + + Temporary IDs: Some safe output tools support a temporary ID field (usually named temporary_id) so you can reference newly-created items elsewhere in the SAME agent output (for example, using #aw_abc1 in a later body). + + **IMPORTANT - temporary_id format rules:** + - If you DON'T need to reference the item later, OMIT the temporary_id field entirely (it will be auto-generated if needed) + - If you DO need cross-references/chaining, you MUST match this EXACT validation regex: /^aw_[A-Za-z0-9]{3,8}$/i + - Format: aw_ prefix followed by 3 to 8 alphanumeric characters (A-Z, a-z, 0-9, case-insensitive) + - Valid alphanumeric characters: ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 + - INVALID examples: aw_ab (too short), aw_123456789 (too long), aw_test-id (contains hyphen), aw_id_123 (contains underscore) + - VALID examples: aw_abc, aw_abc1, aw_Test123, aw_A1B2C3D4, aw_12345678 + - To generate valid IDs: use 3-8 random alphanumeric characters or omit the field to let the system auto-generate + + Do NOT invent other aw_* formats — downstream steps will reject them with validation errors matching against /^aw_[A-Za-z0-9]{3,8}$/i. + + Discover available tools from the safeoutputs MCP server. + + **Critical**: Tool calls write structured data that downstream jobs process. Without tool calls, follow-up actions will be skipped. + + **Note**: If you made no other safe output tool calls during this workflow execution, call the "noop" tool to provide a status message indicating completion or that no actions were needed. + + + + The following GitHub context information is available for this workflow: + {{#if __GH_AW_GITHUB_ACTOR__ }} + - **actor**: __GH_AW_GITHUB_ACTOR__ + {{/if}} + {{#if __GH_AW_GITHUB_REPOSITORY__ }} + - **repository**: __GH_AW_GITHUB_REPOSITORY__ + {{/if}} + {{#if __GH_AW_GITHUB_WORKSPACE__ }} + - **workspace**: __GH_AW_GITHUB_WORKSPACE__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_ISSUE_NUMBER__ }} + - **issue-number**: #__GH_AW_GITHUB_EVENT_ISSUE_NUMBER__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER__ }} + - **discussion-number**: #__GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER__ }} + - **pull-request-number**: #__GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_COMMENT_ID__ }} + - **comment-id**: __GH_AW_GITHUB_EVENT_COMMENT_ID__ + {{/if}} + {{#if __GH_AW_GITHUB_RUN_ID__ }} + - **workflow-run-id**: __GH_AW_GITHUB_RUN_ID__ + {{/if}} + + + GH_AW_PROMPT_EOF + cat << 'GH_AW_PROMPT_EOF' + + GH_AW_PROMPT_EOF + cat << 'GH_AW_PROMPT_EOF' + {{#runtime-import .github/workflows/upgrade-pylib.md}} + GH_AW_PROMPT_EOF + } >> "$GH_AW_PROMPT" + - name: Substitute placeholders + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_ENV_ISSUE_ID: ${{ env.ISSUE_ID }} + GH_AW_GITHUB_ACTOR: ${{ github.actor }} + GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }} + GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }} + GH_AW_GITHUB_EVENT_INPUTS_NAME: ${{ github.event.inputs.name }} + GH_AW_GITHUB_EVENT_ISSUE_NUMBER: ${{ github.event.issue.number }} + GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }} + GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} + GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} + GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} + with: + script: | + const substitutePlaceholders = require('/opt/gh-aw/actions/substitute_placeholders.cjs'); + + // Call the substitution function + return await substitutePlaceholders({ + file: process.env.GH_AW_PROMPT, + substitutions: { + GH_AW_ENV_ISSUE_ID: process.env.GH_AW_ENV_ISSUE_ID, + GH_AW_GITHUB_ACTOR: process.env.GH_AW_GITHUB_ACTOR, + GH_AW_GITHUB_EVENT_COMMENT_ID: process.env.GH_AW_GITHUB_EVENT_COMMENT_ID, + GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: process.env.GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER, + GH_AW_GITHUB_EVENT_INPUTS_NAME: process.env.GH_AW_GITHUB_EVENT_INPUTS_NAME, + GH_AW_GITHUB_EVENT_ISSUE_NUMBER: process.env.GH_AW_GITHUB_EVENT_ISSUE_NUMBER, + GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: process.env.GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER, + GH_AW_GITHUB_REPOSITORY: process.env.GH_AW_GITHUB_REPOSITORY, + GH_AW_GITHUB_RUN_ID: process.env.GH_AW_GITHUB_RUN_ID, + GH_AW_GITHUB_WORKSPACE: process.env.GH_AW_GITHUB_WORKSPACE + } + }); + - name: Interpolate variables and render templates + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_ENV_ISSUE_ID: ${{ env.ISSUE_ID }} + GH_AW_GITHUB_EVENT_INPUTS_NAME: ${{ github.event.inputs.name }} + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/interpolate_prompt.cjs'); + await main(); + - name: Validate prompt placeholders + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + run: bash /opt/gh-aw/actions/validate_prompt_placeholders.sh + - name: Print prompt + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + run: bash /opt/gh-aw/actions/print_prompt_summary.sh + - name: Clean git credentials + run: bash /opt/gh-aw/actions/clean_git_credentials.sh + - name: Execute GitHub Copilot CLI + id: agentic_execution + # Copilot CLI tool arguments (sorted): + timeout-minutes: 45 + run: | + set -o pipefail + sudo -E awf --env-all --container-workdir "${GITHUB_WORKSPACE}" --allow-domains '*.pythonhosted.org,anaconda.org,api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,binstar.org,bootstrap.pypa.io,conda.anaconda.org,conda.binstar.org,crates.io,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,files.pythonhosted.org,github.com,host.docker.internal,index.crates.io,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,pip.pypa.io,ppa.launchpad.net,pypi.org,pypi.python.org,raw.githubusercontent.com,registry.npmjs.org,repo.anaconda.com,repo.continuum.io,s.symcb.com,s.symcd.com,security.ubuntu.com,sh.rustup.rs,static.crates.io,static.rust-lang.org,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com' --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --enable-host-access --image-tag 0.16.4 --skip-pull \ + -- '/usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --add-dir "${GITHUB_WORKSPACE}" --disable-builtin-mcps --allow-all-tools --allow-all-paths --share /tmp/gh-aw/sandbox/agent/logs/conversation.md --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"${GH_AW_MODEL_AGENT_COPILOT:+ --model "$GH_AW_MODEL_AGENT_COPILOT"}' \ + 2>&1 | tee /tmp/gh-aw/agent-stdio.log + env: + COPILOT_AGENT_RUNNER_TYPE: STANDALONE + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + GH_AW_MCP_CONFIG: /home/runner/.copilot/mcp-config.json + GH_AW_MODEL_AGENT_COPILOT: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || '' }} + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }} + GITHUB_HEAD_REF: ${{ github.head_ref }} + GITHUB_REF_NAME: ${{ github.ref_name }} + GITHUB_STEP_SUMMARY: ${{ env.GITHUB_STEP_SUMMARY }} + GITHUB_WORKSPACE: ${{ github.workspace }} + XDG_CONFIG_HOME: /home/runner + - name: Configure Git credentials + env: + REPO_NAME: ${{ github.repository }} + SERVER_URL: ${{ github.server_url }} + run: | + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git config --global user.name "github-actions[bot]" + # Re-authenticate git with GitHub token + SERVER_URL_STRIPPED="${SERVER_URL#https://}" + git remote set-url origin "https://x-access-token:${{ github.token }}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git" + echo "Git configured with standard GitHub Actions identity" + - name: Copy Copilot session state files to logs + if: always() + continue-on-error: true + run: | + # Copy Copilot session state files to logs folder for artifact collection + # This ensures they are in /tmp/gh-aw/ where secret redaction can scan them + SESSION_STATE_DIR="$HOME/.copilot/session-state" + LOGS_DIR="/tmp/gh-aw/sandbox/agent/logs" + if [ -d "$SESSION_STATE_DIR" ]; then + echo "Copying Copilot session state files from $SESSION_STATE_DIR to $LOGS_DIR" + mkdir -p "$LOGS_DIR" + cp -v "$SESSION_STATE_DIR"/*.jsonl "$LOGS_DIR/" 2>/dev/null || true + echo "Session state files copied successfully" + else + echo "No session-state directory found at $SESSION_STATE_DIR" + fi + - name: Stop MCP gateway + if: always() + continue-on-error: true + env: + MCP_GATEWAY_PORT: ${{ steps.start-mcp-gateway.outputs.gateway-port }} + MCP_GATEWAY_API_KEY: ${{ steps.start-mcp-gateway.outputs.gateway-api-key }} + GATEWAY_PID: ${{ steps.start-mcp-gateway.outputs.gateway-pid }} + run: | + bash /opt/gh-aw/actions/stop_mcp_gateway.sh "$GATEWAY_PID" + - name: Redact secrets in logs + if: always() + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/redact_secrets.cjs'); + await main(); + env: + GH_AW_SECRET_NAMES: 'COPILOT_GITHUB_TOKEN,GH_AW_GITHUB_MCP_SERVER_TOKEN,GH_AW_GITHUB_TOKEN,GITHUB_TOKEN' + SECRET_COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + SECRET_GH_AW_GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN }} + SECRET_GH_AW_GITHUB_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN }} + SECRET_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Upload Safe Outputs + if: always() + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: safe-output + path: ${{ env.GH_AW_SAFE_OUTPUTS }} + if-no-files-found: warn + - name: Ingest agent output + id: collect_output + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + env: + GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }} + GH_AW_ALLOWED_DOMAINS: "*.pythonhosted.org,anaconda.org,api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,binstar.org,bootstrap.pypa.io,conda.anaconda.org,conda.binstar.org,crates.io,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,files.pythonhosted.org,github.com,host.docker.internal,index.crates.io,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,pip.pypa.io,ppa.launchpad.net,pypi.org,pypi.python.org,raw.githubusercontent.com,registry.npmjs.org,repo.anaconda.com,repo.continuum.io,s.symcb.com,s.symcd.com,security.ubuntu.com,sh.rustup.rs,static.crates.io,static.rust-lang.org,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com" + GITHUB_SERVER_URL: ${{ github.server_url }} + GITHUB_API_URL: ${{ github.api_url }} + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/collect_ndjson_output.cjs'); + await main(); + - name: Upload sanitized agent output + if: always() && env.GH_AW_AGENT_OUTPUT + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: agent-output + path: ${{ env.GH_AW_AGENT_OUTPUT }} + if-no-files-found: warn + - name: Upload engine output files + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: agent_outputs + path: | + /tmp/gh-aw/sandbox/agent/logs/ + /tmp/gh-aw/redacted-urls.log + if-no-files-found: ignore + - name: Parse agent logs for step summary + if: always() + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + env: + GH_AW_AGENT_OUTPUT: /tmp/gh-aw/sandbox/agent/logs/ + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/parse_copilot_log.cjs'); + await main(); + - name: Parse MCP gateway logs for step summary + if: always() + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/parse_mcp_gateway_log.cjs'); + await main(); + - name: Print firewall logs + if: always() + continue-on-error: true + env: + AWF_LOGS_DIR: /tmp/gh-aw/sandbox/firewall/logs + run: | + # Fix permissions on firewall logs so they can be uploaded as artifacts + # AWF runs with sudo, creating files owned by root + sudo chmod -R a+r /tmp/gh-aw/sandbox/firewall/logs 2>/dev/null || true + awf logs summary | tee -a "$GITHUB_STEP_SUMMARY" + - name: Upload agent artifacts + if: always() + continue-on-error: true + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: agent-artifacts + path: | + /tmp/gh-aw/aw-prompts/prompt.txt + /tmp/gh-aw/aw_info.json + /tmp/gh-aw/mcp-logs/ + /tmp/gh-aw/sandbox/firewall/logs/ + /tmp/gh-aw/agent-stdio.log + /tmp/gh-aw/agent/ + /tmp/gh-aw/aw*.patch + if-no-files-found: ignore + + conclusion: + needs: + - activation + - agent + - detection + - safe_outputs + if: (always()) && (needs.agent.result != 'skipped') + runs-on: ubuntu-slim + permissions: + contents: read + discussions: write + issues: write + pull-requests: write + outputs: + noop_message: ${{ steps.noop.outputs.noop_message }} + tools_reported: ${{ steps.missing_tool.outputs.tools_reported }} + total_count: ${{ steps.missing_tool.outputs.total_count }} + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@f990bbb7eb83981a203d4b5eccdc24f677e950c7 # v0.77.5 + with: + destination: /opt/gh-aw/actions + - name: Download agent output artifact + continue-on-error: true + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent-output + path: /tmp/gh-aw/safeoutputs/ + - name: Setup agent output environment variable + run: | + mkdir -p /tmp/gh-aw/safeoutputs/ + find "/tmp/gh-aw/safeoutputs/" -type f -print + echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/safeoutputs/agent_output.json" >> "$GITHUB_ENV" + - name: Process No-Op Messages + id: noop + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_NOOP_MAX: 1 + GH_AW_WORKFLOW_NAME: "Upgrade Python Library" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/noop.cjs'); + await main(); + - name: Record Missing Tool + id: missing_tool + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "Upgrade Python Library" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/missing_tool.cjs'); + await main(); + - name: Handle Agent Failure + id: handle_agent_failure + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "Upgrade Python Library" + GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} + GH_AW_WORKFLOW_ID: "upgrade-pylib" + GH_AW_SECRET_VERIFICATION_RESULT: ${{ needs.agent.outputs.secret_verification_result }} + GH_AW_CHECKOUT_PR_SUCCESS: ${{ needs.agent.outputs.checkout_pr_success }} + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/handle_agent_failure.cjs'); + await main(); + - name: Handle No-Op Message + id: handle_noop_message + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "Upgrade Python Library" + GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} + GH_AW_NOOP_MESSAGE: ${{ steps.noop.outputs.noop_message }} + GH_AW_NOOP_REPORT_AS_ISSUE: "true" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/handle_noop_message.cjs'); + await main(); + - name: Handle Create Pull Request Error + id: handle_create_pr_error + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "Upgrade Python Library" + GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/handle_create_pr_error.cjs'); + await main(); + - name: Update reaction comment with completion status + id: conclusion + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_COMMENT_ID: ${{ needs.activation.outputs.comment_id }} + GH_AW_COMMENT_REPO: ${{ needs.activation.outputs.comment_repo }} + GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + GH_AW_WORKFLOW_NAME: "Upgrade Python Library" + GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }} + GH_AW_DETECTION_CONCLUSION: ${{ needs.detection.result }} + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/notify_comment_error.cjs'); + await main(); + + detection: + needs: agent + if: needs.agent.outputs.output_types != '' || needs.agent.outputs.has_patch == 'true' + runs-on: ubuntu-latest + permissions: {} + concurrency: + group: "gh-aw-copilot-${{ github.workflow }}" + timeout-minutes: 10 + outputs: + success: ${{ steps.parse_results.outputs.success }} + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@f990bbb7eb83981a203d4b5eccdc24f677e950c7 # v0.77.5 + with: + destination: /opt/gh-aw/actions + - name: Download agent artifacts + continue-on-error: true + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent-artifacts + path: /tmp/gh-aw/threat-detection/ + - name: Download agent output artifact + continue-on-error: true + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent-output + path: /tmp/gh-aw/threat-detection/ + - name: Echo agent output types + env: + AGENT_OUTPUT_TYPES: ${{ needs.agent.outputs.output_types }} + run: | + echo "Agent output-types: $AGENT_OUTPUT_TYPES" + - name: Setup threat detection + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + env: + WORKFLOW_NAME: "Upgrade Python Library" + WORKFLOW_DESCRIPTION: "Pick an out-of-sync Python library from the todo list and upgrade it\nby running `scripts/update_lib quick`, then open a pull request." + HAS_PATCH: ${{ needs.agent.outputs.has_patch }} + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/setup_threat_detection.cjs'); + await main(); + - name: Ensure threat-detection directory and log + run: | + mkdir -p /tmp/gh-aw/threat-detection + touch /tmp/gh-aw/threat-detection/detection.log + - name: Validate COPILOT_GITHUB_TOKEN secret + id: validate-secret + run: /opt/gh-aw/actions/validate_multi_secret.sh COPILOT_GITHUB_TOKEN 'GitHub Copilot CLI' https://github.github.com/gh-aw/reference/engines/#github-copilot-default + env: + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + - name: Install GitHub Copilot CLI + run: /opt/gh-aw/actions/install_copilot_cli.sh 0.0.409 + - name: Execute GitHub Copilot CLI + id: agentic_execution + # Copilot CLI tool arguments (sorted): + # --allow-tool shell(cat) + # --allow-tool shell(grep) + # --allow-tool shell(head) + # --allow-tool shell(jq) + # --allow-tool shell(ls) + # --allow-tool shell(tail) + # --allow-tool shell(wc) + timeout-minutes: 20 + run: | + set -o pipefail + COPILOT_CLI_INSTRUCTION="$(cat /tmp/gh-aw/aw-prompts/prompt.txt)" + mkdir -p /tmp/ + mkdir -p /tmp/gh-aw/ + mkdir -p /tmp/gh-aw/agent/ + mkdir -p /tmp/gh-aw/sandbox/agent/logs/ + copilot --add-dir /tmp/ --add-dir /tmp/gh-aw/ --add-dir /tmp/gh-aw/agent/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --allow-tool 'shell(cat)' --allow-tool 'shell(grep)' --allow-tool 'shell(head)' --allow-tool 'shell(jq)' --allow-tool 'shell(ls)' --allow-tool 'shell(tail)' --allow-tool 'shell(wc)' --share /tmp/gh-aw/sandbox/agent/logs/conversation.md --prompt "$COPILOT_CLI_INSTRUCTION"${GH_AW_MODEL_DETECTION_COPILOT:+ --model "$GH_AW_MODEL_DETECTION_COPILOT"} 2>&1 | tee /tmp/gh-aw/threat-detection/detection.log + env: + COPILOT_AGENT_RUNNER_TYPE: STANDALONE + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + GH_AW_MODEL_DETECTION_COPILOT: ${{ vars.GH_AW_MODEL_DETECTION_COPILOT || '' }} + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GITHUB_HEAD_REF: ${{ github.head_ref }} + GITHUB_REF_NAME: ${{ github.ref_name }} + GITHUB_STEP_SUMMARY: ${{ env.GITHUB_STEP_SUMMARY }} + GITHUB_WORKSPACE: ${{ github.workspace }} + XDG_CONFIG_HOME: /home/runner + - name: Parse threat detection results + id: parse_results + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + with: + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/parse_threat_detection_results.cjs'); + await main(); + - name: Upload threat detection log + if: always() + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: threat-detection.log + path: /tmp/gh-aw/threat-detection/detection.log + if-no-files-found: ignore + + safe_outputs: + needs: + - activation + - agent + - detection + if: ((!cancelled()) && (needs.agent.result != 'skipped')) && (needs.detection.outputs.success == 'true') + runs-on: ubuntu-slim + permissions: + contents: write + issues: write + pull-requests: write + timeout-minutes: 15 + env: + GH_AW_ENGINE_ID: "copilot" + GH_AW_WORKFLOW_ID: "upgrade-pylib" + GH_AW_WORKFLOW_NAME: "Upgrade Python Library" + outputs: + create_discussion_error_count: ${{ steps.process_safe_outputs.outputs.create_discussion_error_count }} + create_discussion_errors: ${{ steps.process_safe_outputs.outputs.create_discussion_errors }} + process_safe_outputs_processed_count: ${{ steps.process_safe_outputs.outputs.processed_count }} + process_safe_outputs_temporary_id_map: ${{ steps.process_safe_outputs.outputs.temporary_id_map }} + steps: + - name: Setup Scripts + uses: github/gh-aw/actions/setup@f990bbb7eb83981a203d4b5eccdc24f677e950c7 # v0.77.5 + with: + destination: /opt/gh-aw/actions + - name: Download agent output artifact + continue-on-error: true + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent-output + path: /tmp/gh-aw/safeoutputs/ + - name: Setup agent output environment variable + run: | + mkdir -p /tmp/gh-aw/safeoutputs/ + find "/tmp/gh-aw/safeoutputs/" -type f -print + echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/safeoutputs/agent_output.json" >> "$GITHUB_ENV" + - name: Download patch artifact + continue-on-error: true + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: agent-artifacts + path: /tmp/gh-aw/ + - name: Checkout repository + if: ((!cancelled()) && (needs.agent.result != 'skipped')) && (contains(needs.agent.outputs.output_types, 'create_pull_request')) + uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + token: ${{ github.token }} + persist-credentials: false + fetch-depth: 1 + - name: Configure Git credentials + if: ((!cancelled()) && (needs.agent.result != 'skipped')) && (contains(needs.agent.outputs.output_types, 'create_pull_request')) + env: + REPO_NAME: ${{ github.repository }} + SERVER_URL: ${{ github.server_url }} + GIT_TOKEN: ${{ github.token }} + run: | + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git config --global user.name "github-actions[bot]" + # Re-authenticate git with GitHub token + SERVER_URL_STRIPPED="${SERVER_URL#https://}" + git remote set-url origin "https://x-access-token:${GIT_TOKEN}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git" + echo "Git configured with standard GitHub Actions identity" + - name: Process Safe Outputs + id: process_safe_outputs + uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + env: + GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }} + GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_pull_request\":{\"base_branch\":\"${{ github.ref_name }}\",\"draft\":false,\"expires\":30,\"labels\":[\"pylib-sync\"],\"max\":1,\"max_patch_size\":1024,\"title_prefix\":\"Update \"},\"missing_data\":{},\"missing_tool\":{}}" + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/opt/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/opt/gh-aw/actions/safe_output_handler_manager.cjs'); + await main(); + diff --git a/.github/workflows/upgrade-pylib.md b/.github/workflows/upgrade-pylib.md new file mode 100644 index 00000000000..ac71f3d7244 --- /dev/null +++ b/.github/workflows/upgrade-pylib.md @@ -0,0 +1,134 @@ +--- +description: | + Pick an out-of-sync Python library from the todo list and upgrade it + by running `scripts/update_lib quick`, then open a pull request. + +on: + workflow_dispatch: + inputs: + name: + description: "Module name to upgrade (leave empty to auto-pick)" + required: false + type: string + +timeout-minutes: 45 + +permissions: + contents: read + issues: read + pull-requests: read + +network: + allowed: + - defaults + - rust + - python + +engine: copilot + +runtimes: + python: + version: "3.14" + +tools: + bash: + - ":*" + edit: + github: + toolsets: [repos, issues, pull_requests] + read-only: true + +safe-outputs: + create-pull-request: + title-prefix: "Update " + labels: [pylib-sync] + draft: false + expires: 30 + +cache: + key: cpython-lib-${{ env.PYTHON_VERSION }} + path: cpython + restore-keys: + - cpython-lib- + +env: + PYTHON_VERSION: "v3.14.6" + ISSUE_ID: "6839" +--- + +# Upgrade Python Library + +You are an automated maintenance agent for RustPython, a Python 3 interpreter written in Rust. Your task is to upgrade one out-of-sync Python standard library module from CPython. + +## Step 1: Set up the environment + +The CPython source may already be cached. Check if the `cpython` directory exists and has the correct version: + +```bash +if [ -d "cpython/Lib" ]; then + echo "CPython cache hit, skipping clone" +else + git clone --depth 1 --branch "$PYTHON_VERSION" https://github.com/python/cpython.git cpython +fi +``` + +## Step 2: Determine module name + +Run this script to determine the module name: + +```bash +MODULE_NAME="${{ github.event.inputs.name }}" +if [ -z "$MODULE_NAME" ]; then + echo "No module specified, running todo to find one..." + python3 scripts/update_lib todo + echo "Pick one module from the list above that is marked [ ], has no unmet deps, and has a small Δ number." + echo "Do NOT pick: opcode, datetime, random, hashlib, tokenize, pdb, _pyrepl, concurrent, asyncio, multiprocessing, ctypes, idlelib, tkinter, shutil, tarfile, email, unittest" +else + echo "Module specified by user: $MODULE_NAME" +fi +``` + +If the script printed "Module specified by user: ...", use that exact name. If it printed the todo list, pick one suitable module from it. + +## Step 3: Run the upgrade + +Run the quick upgrade command. This will copy the library from CPython, migrate test files preserving RustPython markers, auto-mark test failures, and create a git commit: + +```bash +python3 scripts/update_lib quick +``` + +This takes a while because it builds RustPython (`cargo build --release`) and runs tests to determine which ones pass or fail. + +If the command fails, report the error and stop. Do not try to fix Rust code or modify test files manually. + +## Step 4: Verify the result + +After the script succeeds, check what changed: + +```bash +git log -1 --stat +git diff HEAD~1 --stat +``` + +Make sure the commit was created with the correct message format: `Update from `. + +## Step 5: Create the pull request + +Create a pull request. Reference issue #${{ env.ISSUE_ID }} in the body but do **NOT** use keywords that auto-close issues (Fix, Close, Resolve). + +Use this format for the PR body: + +``` +## Summary + +Upgrade `` from CPython $PYTHON_VERSION. + +Part of #$ISSUE_ID + +## Changes + +- Updated `Lib/` from CPython +- Migrated test files preserving RustPython markers +- Auto-marked test failures with `@expectedFailure` +``` diff --git a/.github/zizmor.yml b/.github/zizmor.yml new file mode 100644 index 00000000000..f22f76b70d8 --- /dev/null +++ b/.github/zizmor.yml @@ -0,0 +1,14 @@ +rules: + unpinned-uses: + config: + policies: + # dtolnay/rust-toolchain is a trusted action that uses lightweight branch + # refs (@stable, @nightly, etc.) by design. Pinning to a hash would break + # the intended usage pattern. + # We can remove this once https://github.com/dtolnay/rust-toolchain/issues/180 is resolved + dtolnay/rust-toolchain: any + # dtolnay/rust-toolchain handles component installation, target addition, and + # override configuration beyond what a bare `rustup` invocation provides. + # See: https://github.com/zizmorcore/zizmor/issues/1817 + superfluous-actions: + disable: true diff --git a/.gitignore b/.gitignore index 485272adfb3..b5887be53b5 100644 --- a/.gitignore +++ b/.gitignore @@ -2,15 +2,14 @@ /*/target **/*.rs.bk **/*.bytecode -__pycache__ +__pycache__/ **/*.pytest_cache .*sw* .repl_history.txt -.vscode +.vscode/ wasm-pack.log .idea/ .envrc -.python-version flame-graph.html flame.txt @@ -21,3 +20,11 @@ flamescope.json extra_tests/snippets/resources extra_tests/not_impl.py + +Lib/_sysconfig_vars*.json +Lib/site-packages/* +!Lib/site-packages/README.txt +Lib/test/data/* +!Lib/test/data/README +cpython/ +.claude/scheduled_tasks.lock \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000000..39481edaa9f --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,84 @@ +# NOTE: Reason for not using `prek.toml` is dependabot supports `pre-commit` as an ecosystem +# See: https://github.blog/changelog/2026-03-10-dependabot-now-supports-pre-commit-hooks/ + +fail_fast: false +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: check-merge-conflict + priority: 0 + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.15.16 + hooks: + - id: ruff-format + priority: 0 + + - id: ruff-check + args: [--select, I, --fix, --exit-non-zero-on-fix] + types_or: [python] + require_serial: true + priority: 1 + + - repo: local + hooks: + - id: redundant-test-patches + name: check redundant test patches + entry: scripts/check_redundant_patches.py + files: '^Lib/test/.*\.py$' + language: script + types: [python] + priority: 0 + + - repo: local + hooks: + - id: rustfmt + name: rustfmt + entry: rustfmt + language: system + types: [rust] + priority: 0 + + - id: generate-rs-opcode-metadata + name: generate rust opcode metadata + entry: python tools/opcode_metadata/generate_rs_opcode_metadata.py + files: '^(crates/compiler-core/src/bytecode/instruction\.rs|tools/opcode_metadata/*)$' + pass_filenames: false + language: system + require_serial: true + priority: 1 # so rustfmt runs first + stages: + - manual + + - id: generate-py-opcode-metadata + name: generate python opcode metadata + entry: python tools/opcode_metadata/generate_py_opcode_metadata.py + files: '^(crates/compiler-core/src/bytecode/instruction\.rs|tools/opcode_metadata/*)$' + pass_filenames: false + language: system + require_serial: true + priority: 1 # so rustfmt runs first + stages: + - manual + + - repo: https://github.com/streetsidesoftware/cspell-cli + rev: v10.0.0 + hooks: + - id: cspell + types: [rust] + additional_dependencies: + - '@cspell/dict-en_us' + - '@cspell/dict-cpp' + - '@cspell/dict-python' + - '@cspell/dict-rust' + - '@cspell/dict-win32' + - '@cspell/dict-shell' + priority: 0 + + - repo: https://github.com/rbubley/mirrors-prettier + rev: v3.8.3 + hooks: + - id: prettier + files: '^wasm/.*$' + priority: 0 diff --git a/.python-version b/.python-version new file mode 100644 index 00000000000..3f0a10fda70 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.14.6 diff --git a/.vscode/launch.json b/.vscode/launch.json index fa6f96c5fd8..6e00e14aff2 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -16,18 +16,6 @@ }, "cwd": "${workspaceFolder}" }, - { - "type": "lldb", - "request": "launch", - "name": "Debug executable 'rustpython' without SSL", - "preLaunchTask": "Build RustPython Debug without SSL", - "program": "target/debug/rustpython", - "args": [], - "env": { - "RUST_BACKTRACE": "1" - }, - "cwd": "${workspaceFolder}" - }, { "type": "lldb", "request": "launch", diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 18a3d6010d1..50a52aaf8be 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -1,28 +1,12 @@ { "version": "2.0.0", "tasks": [ - { - "label": "Build RustPython Debug without SSL", - "type": "shell", - "command": "cargo", - "args": [ - "build", - ], - "problemMatcher": [ - "$rustc", - ], - "group": { - "kind": "build", - "isDefault": true, - }, - }, { "label": "Build RustPython Debug", "type": "shell", "command": "cargo", "args": [ "build", - "--features=ssl" ], "problemMatcher": [ "$rustc", diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 00000000000..c89b2a4d3a4 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,307 @@ +# GitHub Copilot Instructions for RustPython + +This document provides guidelines for working with GitHub Copilot when contributing to the RustPython project. + +## Project Overview + +RustPython is a Python 3 interpreter written in Rust, implementing Python 3.14.0+ compatibility. The project aims to provide: + +- A complete Python-3 environment entirely in Rust (not CPython bindings) +- A clean implementation without compatibility hacks +- Cross-platform support, including WebAssembly compilation +- The ability to embed Python scripting in Rust applications + +## Repository Structure + +- `src/` - Top-level code for the RustPython binary +- `vm/` - The Python virtual machine implementation + - `builtins/` - Python built-in types and functions + - `stdlib/` - Essential standard library modules implemented in Rust, required to run the Python core +- `compiler/` - Python compiler components + - `parser/` - Parser for converting Python source to AST + - `core/` - Bytecode representation in Rust structures + - `codegen/` - AST to bytecode compiler +- `Lib/` - CPython's standard library in Python (copied from CPython). **IMPORTANT**: Do not edit this directory directly; The only allowed operation is copying files from CPython. +- `derive/` - Rust macros for RustPython +- `common/` - Common utilities +- `extra_tests/` - Integration tests and snippets +- `stdlib/` - Non-essential Python standard library modules implemented in Rust (useful but not required for core functionality) +- `wasm/` - WebAssembly support +- `jit/` - Experimental JIT compiler implementation +- `pylib/` - Python standard library packaging (do not modify this directory directly - its contents are generated automatically) + +## AI Agent Rules + +**CRITICAL: Git Operations** +- NEVER create pull requests directly without explicit user permission +- NEVER push commits to remote without explicit user permission +- Always ask the user before performing any git operations that affect the remote repository +- Commits can be created locally when requested, but pushing and PR creation require explicit approval + +**CRITICAL: Pre-commit Checks** +- Before creating ANY commit, you MUST run `prek run --all-files` (or `pre-commit run --all-files`) AND the full test suite. Both must pass — do not commit if either fails. +- Test commands are documented in the [Testing](#testing) section below. At minimum run `cargo test --workspace --exclude rustpython_wasm --exclude rustpython-venvlauncher`; if the change touches `extra_tests/snippets/` run `pytest -v` there too, and if it touches `Lib/` or interpreter behavior, run the relevant `cargo run --release -- -m test ` modules. +- If a hook auto-fixes files (e.g. `ruff-format`, `rustfmt`), re-stage the fixes, re-run `prek` until it reports a clean pass, then re-run the tests, then commit. +- NEVER bypass these checks with `--no-verify`, `--no-gpg-sign`, or by skipping tests "because the change is small". If a hook or test fails, fix the underlying issue and create a new commit — do not amend or force the failing commit through. + +## Important Development Notes + +### Running Python Code + +When testing Python code, always use RustPython instead of the standard `python` command: + +```bash +# Use this instead of python script.py +cargo run -- script.py + +# For interactive REPL +cargo run + +# With specific features +cargo run --features jit + +# Release mode (recommended for better performance) +cargo run --release -- script.py +``` + +### Comparing with CPython + +When you need to compare behavior with CPython or run test suites: + +```bash +# Use python command to explicitly run CPython +python my_test_script.py + +# Run RustPython +cargo run -- my_test_script.py +``` + +### Working with the Lib Directory + +The `Lib/` directory contains Python standard library files copied from the CPython repository. Important notes: + +- These files should be edited very conservatively +- Modifications should be minimal and only to work around RustPython limitations +- Tests in `Lib/test` often use one of the following markers: + - Add a `# TODO: RUSTPYTHON` comment when modifications are made + - `unittest.skip("TODO: RustPython ")` + - `unittest.expectedFailure` with `# TODO: RUSTPYTHON ` comment + +#### Choosing the right marker + +When marking a test that fails on RustPython, prefer one of the following forms: + +```python +@unittest.expectedFailure # TODO: RUSTPYTHON; +# or +@unittest.expectedFailureIf(, "TODO: RUSTPYTHON; ") +``` + +If the test would crash the interpreter (segfault, Rust panic, abort, infinite loop), use `skip` instead so the rest of the suite can still run: + +```python +@unittest.skip("TODO: RUSTPYTHON; ") +# or +@unittest.skipIf(, "TODO: RUSTPYTHON; ") +``` + +**When to use which:** + +- **Prefer `expectedFailure` / `expectedFailureIf`** by default. The test body still runs, so if RustPython is later fixed, the unexpected pass surfaces immediately and the decorator can be removed. Use the conditional `*If` form when the failure is environment-specific (e.g., a platform or build flag). +- **Use `skip` / `skipIf` only when running the test would take down the test process** — segfaults, Rust panics, aborts, or hangs that block subsequent tests. Skipping keeps the suite usable; `expectedFailure` cannot help here, because the test body still executes. + +To find WIP entries that are partly modified and may need follow-up: + +```bash +grep -d recurse 'TODO: RUSTPYTHON' Lib/test/ +``` + +### Clean Build + +When you modify bytecode instructions, a full clean is required: + +```bash +rm -r target/debug/build/rustpython-* && find . | grep -E "\.pyc$" | xargs rm -r +``` + +### Testing + +```bash +# Run Rust unit tests +cargo test --workspace --exclude rustpython_wasm --exclude rustpython-venvlauncher + +# Run Python snippets tests (debug mode recommended for faster compilation) +cargo run -- extra_tests/snippets/builtin_bytes.py + +# Run all Python snippets tests with pytest +cd extra_tests +pytest -v + +# Run the Python test module (release mode recommended for better performance) +cargo run --release -- -m test ${TEST_MODULE} +cargo run --release -- -m test test_unicode # to test test_unicode.py + +# Run the Python test module with specific function +cargo run --release -- -m test test_unicode -k test_unicode_escape +``` + +**Note**: For `extra_tests/snippets` tests, use debug mode (`cargo run`) as compilation is faster. For `unittest` (`-m test`), use release mode (`cargo run --release`) for better runtime performance. + +### Determining What to Implement + +Run `./scripts/whats_left.py` to get a list of unimplemented methods, which is helpful when looking for contribution opportunities. + +## Coding Guidelines + +### Rust Code + +- Follow the default rustfmt code style (`cargo fmt` to format) +- **IMPORTANT**: Always run clippy to lint code (`cargo clippy`) before completing tasks. Fix any warnings or lints that are introduced by your changes +- Follow Rust best practices for error handling and memory management +- Use the macro system (`pyclass`, `pymodule`, `pyfunction`, etc.) when implementing Python functionality in Rust + +#### Comments + +- Do not delete or rewrite existing comments unless they are factually wrong or directly contradict the new code. +- Do not add decorative section separators (e.g. `// -----------`, `// ===`, `/* *** */`). Use `///` doc-comments or short `//` comments only when they add value. +- Do not put `///` doc comments on items annotated with `#[pyattr]`, `#[pyclass]`, or `#[pyfunction]`. The derive macros pull authoritative docstrings from CPython via the `rustpython-doc` crate; a Rust doc comment overrides that source, and on `#[pyattr]` it is silently dropped. + +#### Avoid Duplicate Code in Branches + +When branches differ only in a value but share common logic, extract the differing value first, then call the common logic once. + +**Bad:** +```rust +let result = if condition { + let msg = format!("message A: {x}"); + some_function(msg, shared_arg) +} else { + let msg = format!("message B"); + some_function(msg, shared_arg) +}; +``` + +**Good:** +```rust +let msg = if condition { + format!("message A: {x}") +} else { + format!("message B") +}; +let result = some_function(msg, shared_arg); +``` + +### Python Code + +- **IMPORTANT**: In most cases, Python code should not be edited. Bug fixes should be made through Rust code modifications only +- Follow PEP 8 style for custom Python code +- Use ruff for linting Python code +- Minimize modifications to CPython standard library files + +## Integration Between Rust and Python + +The project provides several mechanisms for integration: + +- `pymodule` macro for creating Python modules in Rust +- `pyclass` macro for implementing Python classes in Rust +- `pyfunction` macro for exposing Rust functions to Python +- `PyObjectRef` and other types for working with Python objects in Rust + +## Common Patterns + +### Implementing a Python Module in Rust + +```rust +#[pymodule] +mod mymodule { + use rustpython_vm::prelude::*; + + #[pyfunction] + fn my_function(value: i32) -> i32 { + value * 2 + } + + #[pyattr] + #[pyclass(name = "MyClass")] + #[derive(Debug, PyPayload)] + struct MyClass { + value: usize, + } + + #[pyclass] + impl MyClass { + #[pymethod] + fn get_value(&self) -> usize { + self.value + } + } +} +``` + +### Adding a Python Module to the Interpreter + +```rust +vm.add_native_module( + "my_module_name".to_owned(), + Box::new(my_module::make_module), +); +``` + +## Building for Different Targets + +### WebAssembly + +```bash +# Build for WASM +cargo build --target wasm32-wasip1 --no-default-features --features freeze-stdlib,stdlib --release +``` + +### JIT Support + +```bash +# Enable JIT support +cargo run --features jit +``` + +### Linux Build and Debug on macOS + +See the "Testing on Linux from macOS" section in [DEVELOPMENT.md](DEVELOPMENT.md#testing-on-linux-from-macos). + +### Building venvlauncher (Windows) + +See DEVELOPMENT.md "CPython Version Upgrade Checklist" section. + +**IMPORTANT**: All 4 venvlauncher binaries use the same source code. Do NOT add multiple `[[bin]]` entries to Cargo.toml. Build once and copy with different names. + +## Test Code Modification Rules + +**CRITICAL: Test code modification restrictions** +- NEVER comment out or delete any test code lines except for removing `@unittest.expectedFailure` decorators and upper TODO comments +- NEVER modify test assertions, test logic, or test data +- When a test cannot pass due to missing language features, keep it as expectedFailure and document the reason +- The only acceptable modifications to test files are: + 1. Removing `@unittest.expectedFailure` decorators and the upper TODO comments when tests actually pass + 2. Adding `@unittest.expectedFailure` decorators when tests cannot be fixed + +**Examples of FORBIDDEN modifications:** +- Commenting out test lines +- Changing test assertions +- Modifying test data or expected results +- Removing test logic + +**Correct approach when tests fail due to unsupported syntax:** +- Keep the test as `@unittest.expectedFailure` +- Document that it requires PEP 695 support +- Focus on tests that can be fixed through Rust code changes only + +## CI Workflows + +If you modify any file under `.github/workflows/`, the change must pass a [zizmor](https://docs.zizmor.sh/) scan in CI. + +## Documentation + +- Check the [architecture document](/architecture/architecture.md) for a high-level overview +- Read the [development guide](/DEVELOPMENT.md) for detailed setup instructions +- Generate documentation with `cargo doc --no-deps --all` +- Online documentation is available at [docs.rs/rustpython](https://docs.rs/rustpython/) +- [How to update test files](https://github.com/RustPython/RustPython/wiki/How-to-update-test-files#checkout-cpython-source-code-initial-setup) — guide for syncing test cases from upstream CPython into the `Lib/` directory diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000000..58954486eaf --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,262 @@ +# Contributing to RustPython + +Contributions are more than welcome, and in many cases we are happy to guide +contributors through PRs or on [**Discord**](https://discord.gg/vru8NypEhv). + +## Finding ways to help + +We label issues that would be good for a first time contributor as [`good first issue`](https://github.com/RustPython/RustPython/issues?q=label%3A%22good+first+issue%22+is%3Aissue+is%3Aopen+). +Also checkout the [issue tracker](https://github.com/RustPython/RustPython/issues) for all open issues. + +You can enhance CPython compatibility by increasing our unittest coverage, you can see [This pinned issue](https://github.com/RustPython/RustPython/issues/6839) to see which libs and tests need be updated to our current supported python version. + +Another approach is to checkout the source code: builtin functions and object +methods are often the simplest and easiest way to contribute. + +You can also simply run `python -I scripts/whats_left.py` to assist in finding any unimplemented method. + +## Use of AI + +We **require all use of AI in contributions to follow our +[AI Policy](https://github.com/RustPython/.github/blob/main/AI_POLICY.md)**. + +If your contribution does not follow the policy, it will be closed. + +## RustPython Development Guide and Tips + +RustPython attracts developers with interest and experience in Rust, Python, +or WebAssembly. Whether you are familiar with Rust, Python, or +WebAssembly, the goal of this Development Guide is to give you the basics to +get set up for developing RustPython and contributing to this project. + +The contents of the Development Guide include: + +- [Setting up a development environment](#setting-up-a-development-environment) +- [Code style](#code-style) +- [Testing](#testing) +- [Profiling](#profiling) +- [Code organization](#code-organization) +- [Understanding internals](#understanding-internals) +- [Questions](#questions) + +## Setting up a development environment + +RustPython requires the following: + +- Rust latest stable version (e.g 1.92.0 as of Jan 7 2026) + - To check Rust version: `rustc --version` + - If you have `rustup` on your system, enter to update to the latest + stable version: `rustup update stable` + - If you do not have Rust installed, use [rustup](https://rustup.rs/) to + do so. +- CPython version 3.14 or higher + - CPython can be installed by your operating system's package manager, + from the [Python website](https://www.python.org/downloads/), or + using a third-party distribution, such as + [Anaconda](https://www.anaconda.com/distribution/). +- [macOS] In case of libffi-sys compilation error, make sure autoconf, automake, + libtool are installed + - To install with [Homebrew](https://brew.sh), enter + `brew install autoconf automake libtool` +- [Optional] The Python package, `pytest`, is used for testing Python code + snippets. To install, enter `python3 -m pip install pytest`. + +## Code style + +The Rust code style used is the default +[rustfmt](https://github.com/rust-lang/rustfmt) codestyle. Please format your +code accordingly, or run `cargo fmt` to autoformat it. We also use +[clippy](https://github.com/rust-lang/rust-clippy) to lint Rust code, which +you can check yourself with `cargo clippy`. + +Custom Python code (i.e. code not copied from CPython's standard library) should +follow the [PEP 8](https://www.python.org/dev/peps/pep-0008/) style. We also use +[ruff](https://beta.ruff.rs/docs/) to check Python code style. + +In addition to language specific tools, [cspell](https://github.com/streetsidesoftware/cspell), +a code spell checker, is used in order to ensure correct spellings for code. + +## Testing + +To test RustPython's functionality, a collection of Python snippets is located +in the `extra_tests/snippets` directory and can be run using `pytest`: + +```shell +$ cd extra_tests +$ pytest -v +``` + +Rust unit tests can be run with `cargo`: + +```shell +$ cargo test --workspace --exclude rustpython_wasm --exclude rustpython-venvlauncher +``` + +Python unit tests can be run by compiling RustPython and running the test module: + +```shell +$ cargo run --release -- -m test +``` + +There are a few test options that are especially useful: + +- `-j ` enables parallel testing (which is a lot faster), where `` is the +number of threads to be used, ideally the same as number of cores on your CPU. +If you don't know, `-j 4` or `-j 8` are good options. +- `-v` enables verbose mode, adding additional information about the tests being +run. +- `` specifies a single test to run instead of running all tests. + +For example, to run all tests in parallel: + +```shell +$ cargo run --release -- -m test -j 4 +``` + +To run only `test_cmath` (located at `Lib/test/test_cmath`) verbosely: + +```shell +$ cargo run --release -- -m test test_cmath -v +``` + +### Testing on Linux from macOS + +You can test RustPython on Linux from macOS using Apple's `container` CLI. + +**Setup (one-time):** + +```shell +# Install container CLI +$ brew install container + +# Disable Rosetta requirement for arm64-only builds +$ defaults write com.apple.container.defaults build.rosetta -bool false + +# Build the development image +$ container build --arch arm64 -t rustpython-dev -f .devcontainer/Dockerfile . +``` + +**Running tests:** + +```shell +# Start a persistent container in background (8GB memory, 4 CPUs for compilation) +$ container run -d --name rustpython-test -m 8G -c 4 \ + --mount type=bind,source=$(pwd),target=/workspace \ + -w /workspace rustpython-dev sleep infinity + +# Run tests inside the container +$ container exec rustpython-test sh -c "cargo run --release -- -m test test_ensurepip" + +# Run any command +$ container exec rustpython-test sh -c "cargo test --workspace" + +# Stop and remove the container when done +$ container rm -f rustpython-test +``` + +## Profiling + +To profile RustPython, build it in `release` mode with the `flame-it` feature. +This will generate a file `flamescope.json`, which can be viewed at +https://speedscope.app. + +```shell +$ cargo run --release --features flame-it script.py +$ cat flamescope.json +{} +``` + +You can specify another file name other than the default by using the +`--output-file` option to specify a file name (or `stdout` if you specify `-`). +The `--output-format` option determines the format of the output file. +The speedscope json format (default), text, or raw html can be passed. There +exists a raw html viewer which is currently broken, and we welcome a PR to fix it. + +## Code organization + +Understanding a new codebase takes time. Here's a brief view of the +repository's structure: + +- `crates/compiler/src`: python compilation to bytecode + - `crates/compiler-core/src`: python bytecode representation in rust structures +- `crates/derive/src` and `crates/derive-impl/src`: Rust language extensions and macros specific to rustpython +- `Lib`: Carefully selected / copied files from CPython sourcecode. This is + the python side of the standard library. + - `test`: CPython test suite +- `crates/vm/src`: python virtual machine + - `builtins`: Builtin functions and types + - `stdlib`: Standard library parts implemented in rust. +- `src`: using the other subcrates to bring rustpython to life. +- `crates/wasm`: Binary crate and resources for WebAssembly build +- `extra_tests`: extra integration test snippets as a supplement to `Lib/test`. + Add new RustPython-only regression tests here; do not place new tests under `Lib/test`. + +## Understanding Internals + +The RustPython workspace includes the `rustpython` top-level crate. The `Cargo.toml` +file in the root of the repo provide configuration of the crate and the +implementation is found in the `src` directory (specifically, `src/lib.rs`). + +The top-level `rustpython` binary depends on several lower-level crates including: + +- `ruff_python_parser` and `ruff_python_ast` (external dependencies from the Ruff project) +- `rustpython-compiler` (implementation in `crates/compiler/src`) +- `rustpython-vm` (implementation in `crates/vm/src`) + +Together, these crates provide the functions of a programming language and +enable a line of code to go through a series of steps: + +- parse the line of source code into tokens +- determine if the tokens are valid syntax +- create an Abstract Syntax Tree (AST) +- compile the AST into bytecode +- execute the bytecode in the virtual machine (VM). + +### Parser and AST + +RustPython uses the Ruff project's parser and AST implementation: + +- Parser: `ruff_python_parser` is used to convert Python source code into tokens + and parse them into an Abstract Syntax Tree (AST) +- AST: `ruff_python_ast` provides the Rust types and expressions represented by + the AST nodes +- These are external dependencies maintained by the Ruff project +- For more information, visit the [Ruff GitHub repository](https://github.com/astral-sh/ruff) + +### rustpython-compiler + +The `rustpython-compiler` crate's purpose is to transform the AST (Abstract Syntax +Tree) to bytecode. The implementation of the compiler is found in the +`crates/compiler/src` directory. The compiler implements Python's symbol table, +ast->bytecode compiler, and bytecode optimizer in Rust. + +Implementation of bytecode structure in Rust is found in the `crates/compiler-core/src` +directory. `crates/compiler-core/src/bytecode.rs` contains the representation of +instructions and operations in Rust. Further information about Python's +bytecode instructions can be found in the +[Python documentation](https://docs.python.org/3/library/dis.html#bytecodes). + +### rustpython-vm + +The `rustpython-vm` crate has the important job of running the virtual machine that +executes Python's instructions. The `crates/vm/src` directory contains code to +implement the read and evaluation loop that fetches and dispatches +instructions. This directory also contains the implementation of the +Python Standard Library modules in Rust (`crates/vm/src/stdlib`). In Python +everything can be represented as an object. The `crates/vm/src/builtins` directory holds +the Rust code used to represent different Python objects and their methods. The +core implementation of what a Python object is can be found in +`crates/vm/src/object/core.rs`. + +### Code generation + +There are some code generations involved in building RustPython: + +- some part of the AST code is generated from `vm/src/stdlib/ast/gen.rs` to `compiler/ast/src/ast_gen.rs`. +- the `__doc__` attributes are generated by the + [__doc__](https://github.com/RustPython/__doc__) project which is then included as the `rustpython-doc` crate. + +## Questions + +Have you tried these steps and have a question, please chat with us on +[Discord](https://discord.gg/vru8NypEhv). diff --git a/Cargo.lock b/Cargo.lock index 70395fb1d5a..60080587885 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,17 +2,11 @@ # It is not intended for manual editing. version = 4 -[[package]] -name = "Inflector" -version = "0.11.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe438c63458706e03479442743baae6c88256498e6431708f6dfc520a26515d3" - [[package]] name = "adler2" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "adler32" @@ -21,32 +15,39 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" [[package]] -name = "ahash" -version = "0.8.11" +name = "aes" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +checksum = "66bd29a732b644c0431c6140f370d097879203d79b80c94a6747ba0872adaef8" dependencies = [ - "cfg-if", - "getrandom", - "once_cell", - "version_check", - "zerocopy", + "cipher", + "cpubits", + "cpufeatures 0.3.0", ] [[package]] name = "aho-corasick" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" dependencies = [ "memchr", ] [[package]] -name = "android-tzdata" -version = "0.1.1" +name = "alloca" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4" +dependencies = [ + "cc", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" [[package]] name = "android_system_properties" @@ -58,19 +59,66 @@ dependencies = [ ] [[package]] -name = "ansi_term" -version = "0.12.1" +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstream" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" dependencies = [ - "winapi", + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.60.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.60.2", ] [[package]] name = "anyhow" -version = "1.0.89" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" [[package]] name = "approx" @@ -82,10 +130,19 @@ dependencies = [ ] [[package]] -name = "arrayvec" -version = "0.7.6" +name = "ar_archive_writer" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b" +dependencies = [ + "object", +] + +[[package]] +name = "arbitrary" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" +checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" [[package]] name = "ascii" @@ -94,36 +151,177 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d92bec98840b8f03a5ff5413de5293bfcd8bf96467cf5452609f939ec6f5de16" [[package]] -name = "atomic" +name = "asn1-rs" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56624a96882bb8c26d61312ae18cb45868e5a9992ea73c58e45c3101e56a1e60" +dependencies = [ + "asn1-rs-derive", + "asn1-rs-impl", + "displaydoc", + "nom", + "num-traits", + "rusticata-macros", + "thiserror", + "time", +] + +[[package]] +name = "asn1-rs-derive" version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d818003e740b63afc82337e3160717f4f63078720a810b7b903e70a5d1d2994" +checksum = "3109e49b1e4909e9db6515a30c633684d68cdeaa252f215214cb4fa1a5bfee2c" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "asn1-rs-impl" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b18050c2cd6fe86c3a76584ef5e0baf286d038cda203eb6223df2cc413565f7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "atomic" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89cbf775b137e9b968e67227ef7f775587cde3fd31b0d8599dbd0f598a48340" dependencies = [ "bytemuck", ] [[package]] -name = "atty" -version = "0.2.14" +name = "attribute-derive" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +checksum = "05832cdddc8f2650cc2cc187cc2e952b8c133a48eb055f35211f61ee81502d77" dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", + "attribute-derive-macro", + "derive-where", + "manyhow", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "attribute-derive-macro" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a7cdbbd4bd005c5d3e2e9c885e6fa575db4f4a3572335b974d8db853b6beb61" +dependencies = [ + "collection_literals", + "interpolator", + "manyhow", + "proc-macro-utils", + "proc-macro2", + "quote", + "quote-use", + "syn", ] [[package]] name = "autocfg" -version = "1.3.0" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "aws-lc-fips-sys" +version = "0.13.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3d619165468401dec3caa3366ebffbcb83f2f31883e5b3932f8e2dec2ddc568" +dependencies = [ + "bindgen 0.72.1", + "cc", + "cmake", + "dunce", + "fs_extra", + "regex", +] + +[[package]] +name = "aws-lc-rs" +version = "1.16.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ec6fb3fe69024a75fa7e1bfb48aa6cf59706a101658ea01bfd33b2b248a038f" +dependencies = [ + "aws-lc-fips-sys", + "aws-lc-sys", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" +checksum = "f50037ee5e1e41e7b8f9d161680a725bd1626cb6f8c7e901f91f942850852fe7" +dependencies = [ + "cc", + "cmake", + "dunce", + "fs_extra", +] [[package]] name = "base64" -version = "0.13.1" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "base64ct" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" + +[[package]] +name = "bindgen" +version = "0.71.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" +dependencies = [ + "bitflags 2.13.0", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", +] + +[[package]] +name = "bindgen" +version = "0.72.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" +checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895" +dependencies = [ + "bitflags 2.13.0", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", +] [[package]] name = "bitflags" @@ -133,17 +331,29 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.6.0" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8" + +[[package]] +name = "bitflagset" +version = "0.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64b6ee310aa7af14142c8c9121775774ff601ae055ed98ba7fac96098bcde1b9" +dependencies = [ + "num-integer", + "num-traits", + "radium", + "ref-cast", +] [[package]] name = "blake2" -version = "0.10.6" +version = "0.11.0-rc.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +checksum = "061f1a09225e328e1ffbb378d2d49923c0ca5fee19fb5ac1cc9c1e9d52b93690" dependencies = [ - "digest", + "digest 0.11.3", ] [[package]] @@ -155,11 +365,29 @@ dependencies = [ "generic-array", ] +[[package]] +name = "block-buffer" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdd35008169921d80bc60d3d0ab416eecb028c4cd653352907921d95084790be" +dependencies = [ + "hybrid-array", +] + +[[package]] +name = "block-padding" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "710f1dd022ef4e93f8a438b4ba958de7f64308434fa6a87104481645cc30068b" +dependencies = [ + "hybrid-array", +] + [[package]] name = "bstr" -version = "1.10.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" dependencies = [ "memchr", "regex-automata", @@ -168,78 +396,84 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.16.0" +version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +dependencies = [ + "allocator-api2", +] [[package]] name = "bytemuck" -version = "1.18.0" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94bbb0ad554ad961ddc5da507a12a29b14e4ae5bda06b19f575a3e6079d2e2ae" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" [[package]] -name = "byteorder" -version = "1.5.0" +name = "bytes" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "bzip2" -version = "0.4.4" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c" dependencies = [ - "bzip2-sys", - "libc", + "libbz2-rs-sys", ] [[package]] -name = "bzip2-sys" -version = "0.1.11+1.0.8" +name = "cast" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" -dependencies = [ - "cc", - "libc", - "pkg-config", -] +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] -name = "caseless" -version = "0.2.2" +name = "castaway" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b6fd507454086c8edfd769ca6ada439193cdb209c7681712ef6275cccbfe5d8" +checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a" dependencies = [ - "unicode-normalization", + "rustversion", ] [[package]] -name = "cast" -version = "0.3.0" +name = "cbc" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" +checksum = "ce2dc9ee5f88d11e0beb842c88b33c8a5cf0d1329c4b19494af42b07dbfe8896" +dependencies = [ + "cipher", +] [[package]] name = "cc" -version = "1.1.21" +version = "1.2.61" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07b1695e2c7e8fc85310cde85aeaab7e3097f593c91d209d3f9df76c928100f0" +checksum = "d16d90359e986641506914ba71350897565610e87ce0ad9e6f28569db3dd5c6d" dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", "shlex", ] [[package]] -name = "cfg-if" -version = "1.0.0" +name = "cexpr" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] [[package]] -name = "cfg_aliases" -version = "0.1.1" +name = "cfg-if" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "cfg_aliases" @@ -247,155 +481,381 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chacha20" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "rand_core 0.10.1", +] + [[package]] name = "chrono" -version = "0.4.38" +version = "0.4.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +checksum = "1aa79e62e7697b8e29b513a68abacf485adcd1fe8284a4316c5ae868e6633327" dependencies = [ - "android-tzdata", "iana-time-zone", "js-sys", "num-traits", "wasm-bindgen", - "windows-targets 0.52.6", + "windows-link", ] [[package]] -name = "clap" -version = "2.34.0" +name = "ciborium" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" dependencies = [ - "ansi_term", - "atty", - "bitflags 1.3.2", - "strsim", - "textwrap 0.11.0", - "unicode-width", - "vec_map", + "ciborium-io", + "ciborium-ll", + "serde", ] [[package]] -name = "clipboard-win" -version = "5.4.0" +name = "ciborium-io" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15efe7a882b08f34e38556b14f2fb3daa98769d06c7f0c1b076dfd0d983bc892" -dependencies = [ - "error-code", -] +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" [[package]] -name = "console" -version = "0.15.8" +name = "ciborium-ll" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" dependencies = [ - "encode_unicode", - "lazy_static 1.5.0", - "libc", - "windows-sys 0.52.0", + "ciborium-io", + "half", ] [[package]] -name = "console_error_panic_hook" -version = "0.1.7" +name = "cipher" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" +checksum = "e8cf2a2c93cd704877c0858356ed03480ff301ee950b43f1cbe4573b088bfa6c" dependencies = [ - "cfg-if", - "wasm-bindgen", + "block-buffer 0.12.0", + "crypto-common 0.2.2", + "inout", ] [[package]] -name = "core-foundation" -version = "0.9.4" +name = "clang-sys" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" dependencies = [ - "core-foundation-sys", + "glob", "libc", + "libloading 0.8.9", ] [[package]] -name = "core-foundation-sys" -version = "0.8.7" +name = "clap" +version = "4.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" +dependencies = [ + "clap_builder", +] [[package]] -name = "cpufeatures" -version = "0.2.14" +name = "clap_builder" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" dependencies = [ - "libc", + "anstyle", + "clap_lex", ] [[package]] -name = "cranelift" -version = "0.88.2" +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + +[[package]] +name = "clipboard-win" +version = "5.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea1b0c164043c16a8ece6813eef609ac2262a32a0bb0f5ed6eecf5d7bfb79ba8" +checksum = "bde03770d3df201d4fb868f2c9c59e66a3e4e2bd06692a0fe701e7103c7e84d4" dependencies = [ - "cranelift-codegen", - "cranelift-frontend", + "error-code", ] [[package]] -name = "cranelift-bforest" -version = "0.88.2" +name = "cmake" +version = "0.1.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52056f6d0584484b57fa6c1a65c1fcb15f3780d8b6a758426d9e3084169b2ddd" +checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" dependencies = [ - "cranelift-entity", + "cc", ] [[package]] -name = "cranelift-codegen" -version = "0.88.2" +name = "cmov" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18fed94c8770dc25d01154c3ffa64ed0b3ba9d583736f305fed7beebe5d9cf74" -dependencies = [ - "arrayvec", - "bumpalo", - "cranelift-bforest", - "cranelift-codegen-meta", - "cranelift-codegen-shared", - "cranelift-entity", - "cranelift-isle", - "log", +checksum = "3f88a43d011fc4a6876cb7344703e297c71dda42494fee094d5f7c76bf13f746" + +[[package]] +name = "collection_literals" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2550f75b8cfac212855f6b1885455df8eaee8fe8e246b647d69146142e016084" + +[[package]] +name = "colorchoice" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + +[[package]] +name = "combine" +version = "4.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" +dependencies = [ + "bytes", + "memchr", +] + +[[package]] +name = "compact_str" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "static_assertions", +] + +[[package]] +name = "console" +version = "0.16.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d64e8af5551369d19cf50138de61f1c42074ab970f74e99be916646777f8fc87" +dependencies = [ + "encode_unicode", + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "console_error_panic_hook" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" +dependencies = [ + "cfg-if", + "wasm-bindgen", +] + +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + +[[package]] +name = "const-oid" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" + +[[package]] +name = "constant_time_eq" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a1ec0cfec728a79a5109075543131387f911cb4d07716436d7ae20533657a96" + +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "core-models" +version = "0.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "657f625ff361906f779745d08375ae3cc9fef87a35fba5f22874cf773010daf4" +dependencies = [ + "hax-lib", + "pastey", + "rand 0.9.4", +] + +[[package]] +name = "cpubits" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15b85f9c39137c3a891689859392b1bd49812121d0d61c9caf00d46ed5ce06ae" + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + +[[package]] +name = "cranelift" +version = "0.132.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5c702984722ad27d12c532df8467890f922bf5b1932286906dd2bb6779982c" +dependencies = [ + "cranelift-codegen", + "cranelift-frontend", + "cranelift-module", +] + +[[package]] +name = "cranelift-assembler-x64" +version = "0.132.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77c4ebb31662e2051dcc49b7342d222405a99e951720756cc4b93315972abd67" +dependencies = [ + "cranelift-assembler-x64-meta", +] + +[[package]] +name = "cranelift-assembler-x64-meta" +version = "0.132.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "106dfc2ec96ec1c3a8a250602e936712e00a381df032f7a8ad175c8f768c03bb" +dependencies = [ + "cranelift-srcgen", +] + +[[package]] +name = "cranelift-bforest" +version = "0.132.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5694aa8a2eb2571a15b3feee38d16ccaf2712200e7b5c9ae0479069bdfb46949" +dependencies = [ + "cranelift-entity", + "wasmtime-internal-core", +] + +[[package]] +name = "cranelift-bitset" +version = "0.132.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93ab349d30a5fad9699440ee7ccb435374e8a8735dcca26696a4245bcefcc47e" +dependencies = [ + "wasmtime-internal-core", +] + +[[package]] +name = "cranelift-codegen" +version = "0.132.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e95970bdb51d145c828a114a1084cb8b63e65569a51600ad398cb49fa78b062" +dependencies = [ + "bumpalo", + "cranelift-assembler-x64", + "cranelift-bforest", + "cranelift-bitset", + "cranelift-codegen-meta", + "cranelift-codegen-shared", + "cranelift-control", + "cranelift-entity", + "cranelift-isle", + "gimli", + "hashbrown 0.17.0", + "libm", + "log", "regalloc2", + "rustc-hash", + "serde", "smallvec", "target-lexicon", + "wasmtime-internal-core", ] [[package]] name = "cranelift-codegen-meta" -version = "0.88.2" +version = "0.132.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c451b81faf237d11c7e4f3165eeb6bac61112762c5cfe7b4c0fb7241474358f" +checksum = "0e8414b8ecc81f89f8a3f2c5cbc785b9ed690200cd6f9d780e96a92f88879704" dependencies = [ + "cranelift-assembler-x64-meta", "cranelift-codegen-shared", + "cranelift-srcgen", + "heck", ] [[package]] name = "cranelift-codegen-shared" -version = "0.88.2" +version = "0.132.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "631c4e5db42e6a0f9e7a68f18f7faba3692862114870c7c598aee7e0e5677e59" + +[[package]] +name = "cranelift-control" +version = "0.132.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7c940133198426d26128f08be2b40b0bd117b84771fd36798969c4d712d81fc" +checksum = "09c6e92c825abfbb739a4beaa5db3988f98a96a68d6ea656f562098efc142976" +dependencies = [ + "arbitrary", +] [[package]] name = "cranelift-entity" -version = "0.88.2" +version = "0.132.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87a0f1b2fdc18776956370cf8d9b009ded3f855350c480c1c52142510961f352" +checksum = "55e57cd185782abada9ab2606bfe88d0abc0d42d83a7d432dcf69991e17fe76e" +dependencies = [ + "cranelift-bitset", + "wasmtime-internal-core", +] [[package]] name = "cranelift-frontend" -version = "0.88.2" +version = "0.132.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34897538b36b216cc8dd324e73263596d51b8cf610da6498322838b2546baf8a" +checksum = "8a0f17e48d15e29552e2f264d302c31a661a830196d879bbdaf4d7b2bf2f7011" dependencies = [ "cranelift-codegen", "log", @@ -405,79 +865,88 @@ dependencies = [ [[package]] name = "cranelift-isle" -version = "0.88.2" +version = "0.132.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b2629a569fae540f16a76b70afcc87ad7decb38dc28fa6c648ac73b51e78470" +checksum = "407b80b46934c9dce9a6581f0e80d079b7a69e11372fb03d7dce4c7ba3fee4e3" [[package]] name = "cranelift-jit" -version = "0.88.2" +version = "0.132.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "625be33ce54cf906c408f5ad9d08caa6e2a09e52d05fd0bd1bd95b132bfbba73" +checksum = "ea72887b62db0e7c387f8c0b7af600772fd9190c6f7205fd2182b0ce10159c80" dependencies = [ "anyhow", "cranelift-codegen", + "cranelift-control", "cranelift-entity", "cranelift-module", "cranelift-native", "libc", "log", + "memmap2 0.2.3", "region", "target-lexicon", - "windows-sys 0.36.1", + "wasmtime-internal-jit-icache-coherence", + "windows-sys 0.61.2", ] [[package]] name = "cranelift-module" -version = "0.88.2" +version = "0.132.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "883f8d42e07fd6b283941688f6c41a9e3b97fbf2b4ddcfb2756e675b86dc5edb" +checksum = "4bd3b5369ba0f409b9b218e2356a71285c6e2815e187329a59db09228fd927c4" dependencies = [ "anyhow", "cranelift-codegen", + "cranelift-control", ] [[package]] name = "cranelift-native" -version = "0.88.2" +version = "0.132.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20937dab4e14d3e225c5adfc9c7106bafd4ac669bdb43027b911ff794c6fb318" +checksum = "3a40e056e421d9a6c757983f2184f765ae1c28a51555d3877e98afc97ce5705b" dependencies = [ "cranelift-codegen", "libc", "target-lexicon", ] +[[package]] +name = "cranelift-srcgen" +version = "0.132.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cdbadda21e49798825a1ec795dab30bcb03235891f662b5ccf23fa45b39682f" + [[package]] name = "crc32fast" -version = "1.4.2" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" dependencies = [ "cfg-if", ] [[package]] name = "criterion" -version = "0.3.6" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f" +checksum = "950046b2aa2492f9a536f5f4f9a3de7b9e2476e575e05bd6c333371add4d98f3" dependencies = [ - "atty", + "alloca", + "anes", "cast", + "ciborium", "clap", "criterion-plot", - "csv", - "itertools 0.10.5", - "lazy_static 1.5.0", + "itertools 0.13.0", "num-traits", "oorandom", + "page_size", "plotters", "rayon", "regex", "serde", - "serde_cbor", - "serde_derive", "serde_json", "tinytemplate", "walkdir", @@ -485,19 +954,19 @@ dependencies = [ [[package]] name = "criterion-plot" -version = "0.4.5" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2673cc8207403546f45f5fd319a974b1e6983ad1a3ee7e6041650013be041876" +checksum = "d8d80a2f4f5b554395e47b5d8305bc3d27813bacb73493eb1001e8f76dae29ea" dependencies = [ "cast", - "itertools 0.10.5", + "itertools 0.13.0", ] [[package]] name = "crossbeam-deque" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" dependencies = [ "crossbeam-epoch", "crossbeam-utils", @@ -514,66 +983,126 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.20" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crunchy" -version = "0.2.2" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "crypto-common" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", "typenum", ] [[package]] -name = "csv" -version = "1.3.0" +name = "crypto-common" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +checksum = "ce6e4c961d6cd6c9a86db418387425e8bdeaf05b3c8bc1411e6dca4c252f1453" dependencies = [ - "csv-core", - "itoa", - "ryu", - "serde", + "hybrid-array", ] [[package]] name = "csv-core" -version = "0.1.11" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782" dependencies = [ "memchr", ] [[package]] -name = "derive_more" -version = "1.0.0" +name = "ctutils" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a9b99b9cbbe49445b21764dc0625032a89b145a2642e67603e1c936f5458d05" +checksum = "7d5515a3834141de9eafb9717ad39eea8247b5674e6066c404e8c4b365d2a29e" dependencies = [ - "derive_more-impl", + "cmov", ] [[package]] -name = "derive_more-impl" -version = "1.0.0" +name = "data-encoding" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4ae5f15dda3c708c0ade84bfee31ccab44a3da4f88015ed22f63732abe300c8" + +[[package]] +name = "der" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb" +dependencies = [ + "const-oid 0.9.6", + "der_derive", + "flagset", + "pem-rfc7468 0.7.0", + "zeroize", +] + +[[package]] +name = "der" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fd89660b2dc699704064e59e9dba0147b903e85319429e131620d022be411b" +dependencies = [ + "const-oid 0.10.2", + "pem-rfc7468 1.0.0", + "zeroize", +] + +[[package]] +name = "der-parser" +version = "10.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07da5016415d5a3c4dd39b11ed26f915f52fc4e0dc197d87908bc916e51bc1a6" +dependencies = [ + "asn1-rs", + "displaydoc", + "nom", + "num-bigint", + "num-traits", + "rusticata-macros", +] + +[[package]] +name = "der_derive" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22" +checksum = "8034092389675178f570469e6c3b0465d3d30b4505c294a6550db47f3c17ad18" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", - "unicode-xid", + "syn", +] + +[[package]] +name = "deranged" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "derive-where" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d08b3a0bcc0d079199cd476b2cae8435016ec11d1c0986c6901c5ac223041534" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -582,90 +1111,130 @@ version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ - "block-buffer", - "crypto-common", - "subtle", + "block-buffer 0.10.4", + "crypto-common 0.1.7", ] [[package]] -name = "dirs-next" -version = "2.0.0" +name = "digest" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +checksum = "f1dd6dbb5841937940781866fa1281a1ff7bd3bf827091440879f9994983d5c2" dependencies = [ - "cfg-if", - "dirs-sys-next", + "block-buffer 0.12.0", + "const-oid 0.10.2", + "crypto-common 0.2.2", + "ctutils", ] [[package]] -name = "dirs-sys-next" -version = "0.1.2" +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" dependencies = [ "libc", + "option-ext", "redox_users", - "winapi", + "windows-sys 0.60.2", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] name = "dns-lookup" -version = "2.0.4" +version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5766087c2235fec47fafa4cfecc81e494ee679d0fd4a59887ea0919bfb0e4fc" +checksum = "6e39034cee21a2f5bbb66ba0e3689819c4bb5d00382a282006e802a7ffa6c41d" dependencies = [ "cfg-if", "libc", "socket2", - "windows-sys 0.48.0", + "windows-sys 0.60.2", ] +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + [[package]] name = "dyn-clone" -version = "1.0.17" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" [[package]] name = "either" -version = "1.13.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "encode_unicode" -version = "0.3.6" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" [[package]] name = "endian-type" -version = "0.1.2" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "869b0adbda23651a9c5c0c3d270aac9fcb52e8622a8f2b17e57802d7791962f2" + +[[package]] +name = "env_filter" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" +checksum = "32e90c2accc4b07a8456ea0debdc2e7587bdd890680d71173a15d4ae604f6eef" +dependencies = [ + "log", + "regex", +] [[package]] name = "env_logger" -version = "0.9.3" +version = "0.11.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" +checksum = "0621c04f2196ac3f488dd583365b9c09be011a4ab8b9f37248ffcc8f6198b56a" dependencies = [ - "atty", + "anstream", + "anstyle", + "env_filter", + "jiff", "log", - "termcolor", ] [[package]] name = "equivalent" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" -version = "0.3.9" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", "windows-sys 0.52.0", @@ -673,9 +1242,9 @@ dependencies = [ [[package]] name = "error-code" -version = "3.3.1" +version = "3.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5d9305ccc6942a704f4335694ecd3de2ea531b114ac2d51f5f843750787a92f" +checksum = "dea2df4cf52843e0452895c455a1a2cfbb842a1e7329671acf418fdc53ed4c59" [[package]] name = "exitcode" @@ -684,15 +1253,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de853764b47027c2e862a995c34978ffa63c1501f2e15f987ba11bd4f9bba193" [[package]] -name = "fd-lock" -version = "4.0.2" +name = "fastrand" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e5768da2206272c81ef0b5e951a41862938a6070da63bcea197899942d3b947" -dependencies = [ - "cfg-if", - "rustix", - "windows-sys 0.52.0", -] +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "flagset" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7ac824320a75a52197e8f2d787f6a38b6718bb6897a35142d749af3c0e8f4fe" [[package]] name = "flame" @@ -709,13 +1285,13 @@ dependencies = [ [[package]] name = "flamer" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36b732da54fd4ea34452f2431cf464ac7be94ca4b339c9cd3d3d12eb06fe7aab" +checksum = "7693d9dd1ec1c54f52195dfe255b627f7cec7da33b679cd56de949e662b3db10" dependencies = [ "flame", "quote", - "syn 1.0.109", + "syn", ] [[package]] @@ -732,13 +1308,12 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.33" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "324a1be68054ef05ad64b861cc9eaf1d623d2d8cb25b4bf2cb9cdd902b4bf253" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ - "crc32fast", - "libz-sys", "miniz_oxide", + "zlib-rs", ] [[package]] @@ -747,6 +1322,18 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "foreign-types" version = "0.3.2" @@ -763,12 +1350,33 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" [[package]] -name = "fxhash" -version = "0.2.1" +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + +[[package]] +name = "futures-core" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" + +[[package]] +name = "futures-task" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" + +[[package]] +name = "futures-util" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ - "byteorder", + "futures-core", + "futures-task", + "pin-project-lite", + "slab", ] [[package]] @@ -781,89 +1389,202 @@ dependencies = [ "version_check", ] +[[package]] +name = "get-size-derive2" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b6d1e2f75c16bfbcd0f95d84f99858a6e2f885c2287d1f5c3a96e8444a34b4" +dependencies = [ + "attribute-derive", + "quote", + "syn", +] + +[[package]] +name = "get-size2" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49cf31a6d70300cf81461098f7797571362387ef4bf85d32ac47eaa59b3a5a1a" +dependencies = [ + "compact_str", + "get-size-derive2", + "hashbrown 0.16.1", + "ordermap", + "smallvec", +] + [[package]] name = "gethostname" -version = "0.2.3" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1ebd34e35c46e00bb73e81363248d627782724609fe1b6396f553f68fe3862e" +checksum = "1bd49230192a3797a9a4d6abe9b3eed6f7fa4c8a8a4947977c6f80025f92cbd8" dependencies = [ - "libc", - "winapi", + "rustix", + "windows-link", ] [[package]] name = "getopts" -version = "0.2.21" +version = "0.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" dependencies = [ "unicode-width", ] [[package]] name = "getrandom" -version = "0.2.15" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", - "js-sys", "libc", "wasi", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi 5.3.0", + "wasip2", +] + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "r-efi 6.0.0", + "rand_core 0.10.1", + "wasip2", + "wasip3", "wasm-bindgen", ] +[[package]] +name = "gimli" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf7f043f89559805f8c7cacc432749b2fa0d0a0a9ee46ce47164ed5ba7f126c" +dependencies = [ + "fnv", + "hashbrown 0.16.1", + "indexmap", + "stable_deref_trait", +] + [[package]] name = "glob" -version = "0.3.1" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" [[package]] -name = "half" -version = "1.8.3" +name = "graviola" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" +checksum = "4387e0458389da24c6fe732531e65595c7c4a32b027f98f4789e512e28224465" +dependencies = [ + "cfg-if", + "getrandom 0.3.4", +] [[package]] name = "half" -version = "2.4.1" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" dependencies = [ "cfg-if", "crunchy", + "zerocopy", ] [[package]] name = "hashbrown" -version = "0.14.5" +version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ - "ahash", + "foldhash 0.1.5", ] [[package]] -name = "heck" -version = "0.5.0" +name = "hashbrown" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "foldhash 0.2.0", +] [[package]] -name = "hermit-abi" -version = "0.1.19" +name = "hashbrown" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51" dependencies = [ - "libc", + "foldhash 0.2.0", +] + +[[package]] +name = "hax-lib" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "543f93241d32b3f00569201bfce9d7a93c92c6421b23c77864ac929dc947b9fc" +dependencies = [ + "hax-lib-macros", + "num-bigint", + "num-traits", +] + +[[package]] +name = "hax-lib-macros" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8755751e760b11021765bb04cb4a6c4e24742688d9f3aa14c2079638f537b0f" +dependencies = [ + "hax-lib-macros-types", + "proc-macro-error2", + "proc-macro2", + "quote", + "syn", ] +[[package]] +name = "hax-lib-macros-types" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f177c9ae8ea456e2f71ff3c1ea47bf4464f772a05133fcbba56cd5ba169035a2" +dependencies = [ + "proc-macro2", + "quote", + "serde", + "serde_json", + "uuid", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "hermit-abi" -version = "0.3.9" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" [[package]] name = "hex" @@ -877,25 +1598,44 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfa686283ad6dd069f105e5ab091b04c62850d3e4cf5d67debad1933f55023df" +[[package]] +name = "hmac" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6303bc9732ae41b04cb554b844a762b4115a61bfaa81e3e83050991eeb56863f" +dependencies = [ + "digest 0.11.3", +] + [[package]] name = "home" -version = "0.5.9" +version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.61.2", +] + +[[package]] +name = "hybrid-array" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9155a582abd142abc056962c29e3ce5ff2ad5469f4246b537ed42c5deba857da" +dependencies = [ + "typenum", ] [[package]] name = "iana-time-zone" -version = "0.1.61" +version = "0.1.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", + "log", "wasm-bindgen", "windows-core", ] @@ -909,157 +1649,471 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_casemap" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "070f98b5b82798fcb93654bf96ed9f40064fc44c86f51a09ea711092cd5cc5be" +dependencies = [ + "icu_casemap_data", + "icu_collections", + "icu_locale_core", + "icu_properties", + "icu_provider", + "potential_utf", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_casemap_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "846b0857ca091204be3c874bc93daaf89d4777e8d2d20b0d3ffe8f671d98014b" + +[[package]] +name = "icu_collections" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" +dependencies = [ + "displaydoc", + "potential_utf", + "serde", + "utf8_iter", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5a396343c7208121dc86e35623d3dfe19814a7613cfd14964994cdc9c9a2e26" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_locale_data", + "icu_provider", + "potential_utf", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" +dependencies = [ + "displaydoc", + "litemap", + "serde", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locale_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5fdcc9ac77c6d74ff5cf6e65ef3181d6af32003b16fce3a77fb451d2f695993" + +[[package]] +name = "icu_normalizer" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" + +[[package]] +name = "icu_properties" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" + +[[package]] +name = "icu_provider" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" +dependencies = [ + "displaydoc", + "icu_locale_core", + "serde", + "stable_deref_trait", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + [[package]] name = "indexmap" -version = "2.5.0" +version = "2.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.17.0", + "serde", + "serde_core", ] [[package]] -name = "indoc" -version = "2.0.5" +name = "inout" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" +checksum = "4250ce6452e92010fdf7268ccc5d14faa80bb12fc741938534c58f16804e03c7" +dependencies = [ + "block-padding", + "hybrid-array", +] [[package]] name = "insta" -version = "1.40.0" +version = "1.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6593a41c7a73841868772495db7dc1e8ecab43bb5c0b6da2059246c4b506ab60" +checksum = "86f0f8fee8c926415c58d6ae43a08523a26faccb2323f5e6b644fe7dd4ef6b82" dependencies = [ "console", - "lazy_static 1.5.0", - "linked-hash-map", + "once_cell", "similar", + "tempfile", ] +[[package]] +name = "interpolator" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71dd52191aae121e8611f1e8dc3e324dd0dd1dee1e6dd91d10ee07a3cfb4d9d8" + [[package]] name = "is-macro" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2069faacbe981460232f880d26bf3c7634e322d49053aa48c27e3ae642f728f1" +checksum = "1d57a3e447e24c22647738e4607f1df1e0ec6f72e16182c4cd199f647cdfb0e4" dependencies = [ - "Inflector", + "heck", "proc-macro2", "quote", - "syn 2.0.77", + "syn", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + [[package]] name = "itertools" -version = "0.10.5" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" dependencies = [ "either", ] [[package]] name = "itertools" -version = "0.11.0" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "jiff" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f00b5dbd620d61dfdcb6007c9c1f6054ebd75319f163d886a9055cec1155073d" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde_core", +] + +[[package]] +name = "jiff-static" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e000de030ff8022ea1da3f466fbb0f3a809f5e51ed31f6dd931c35181ad8e6d7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "jni" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efd9a482cf3a427f00d6b35f14332adc7902ce91efb778580e180ff90fa3498" +dependencies = [ + "cfg-if", + "combine", + "jni-macros", + "jni-sys", + "log", + "simd_cesu8", + "thiserror", + "walkdir", + "windows-link", +] + +[[package]] +name = "jni-macros" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a00109accc170f0bdb141fed3e393c565b6f5e072365c3bd58f5b062591560a3" +dependencies = [ + "proc-macro2", + "quote", + "rustc_version", + "simd_cesu8", + "syn", +] + +[[package]] +name = "jni-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6377a88cb3910bee9b0fa88d4f42e1d2da8e79915598f65fb0c7ee14c878af2" +dependencies = [ + "jni-sys-macros", +] + +[[package]] +name = "jni-sys-macros" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38c0b942f458fe50cdac086d2f946512305e5631e720728f2a61aabcd47a6264" +dependencies = [ + "quote", + "syn", +] + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.97" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1840c94c045fbcf8ba2812c95db44499f7c64910a912551aaaa541decebcacf" +dependencies = [ + "cfg-if", + "futures-util", + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "junction" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "160f2eade097f30263b548aae5deb12ad349c909baa710fa24b92c9090b2e006" +dependencies = [ + "scopeguard", + "windows-sys 0.60.2", +] + +[[package]] +name = "keccak" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e24a010dd405bd7ed803e5253182815b41bf2e6a80cc3bfc066658e03a198aa" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", +] + +[[package]] +name = "lazy_static" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73" + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "lexical-parse-float" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56" dependencies = [ - "either", + "lexical-parse-integer", + "lexical-util", ] [[package]] -name = "itoa" -version = "1.0.11" +name = "lexical-parse-integer" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34" +dependencies = [ + "lexical-util", +] [[package]] -name = "js-sys" -version = "0.3.70" +name = "lexical-util" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" -dependencies = [ - "wasm-bindgen", -] +checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17" [[package]] -name = "junction" -version = "1.2.0" +name = "lexopt" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72bbdfd737a243da3dfc1f99ee8d6e166480f17ab4ac84d7c34aacd73fc7bd16" -dependencies = [ - "scopeguard", - "windows-sys 0.52.0", -] +checksum = "803ec87c9cfb29b9d2633f20cba1f488db3fd53f2158b1024cbefb47ba05d413" [[package]] -name = "keccak" -version = "0.1.5" +name = "libbz2-rs-sys" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecc2af9a1119c51f12a14607e783cb977bde58bc069ff0c3da1095e635d70654" -dependencies = [ - "cpufeatures", -] +checksum = "b3a6a8c165077efc8f3a971534c50ea6a1a18b329ef4a66e897a7e3a1494565f" [[package]] -name = "lalrpop-util" -version = "0.20.2" +name = "libc" +version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "507460a910eb7b32ee961886ff48539633b788a36b65692b95f225b844c82553" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" [[package]] -name = "lazy_static" -version = "0.2.11" +name = "libcrux-intrinsics" +version = "0.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73" +checksum = "b1b5db005ff8001e026b73a6842ee81bbef8ec5ff0e1915a67ae65fd2a9fafa5" +dependencies = [ + "core-models", + "hax-lib", +] [[package]] -name = "lazy_static" -version = "1.5.0" +name = "libcrux-ml-kem" +version = "0.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +checksum = "a14ab3e477de9df6ee1273a114018ff62c4996ca9220070c4e5cb1743f94a67d" +dependencies = [ + "hax-lib", + "libcrux-intrinsics", + "libcrux-platform", + "libcrux-secrets", + "libcrux-sha3", + "libcrux-traits", +] [[package]] -name = "lexical-parse-float" -version = "0.8.5" +name = "libcrux-platform" +version = "0.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +checksum = "1d9e21d7ed31a92ac539bd69a8c970b183ee883872d2d19ce27036e24cb8ecc4" dependencies = [ - "lexical-parse-integer", - "lexical-util", - "static_assertions", + "libc", ] [[package]] -name = "lexical-parse-integer" -version = "0.8.6" +name = "libcrux-secrets" +version = "0.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" +checksum = "1ce650f3041b44ba40d4263852347d007cd2cd9d1cc856a6f6c8b2e10c3fd40b" dependencies = [ - "lexical-util", - "static_assertions", + "hax-lib", ] [[package]] -name = "lexical-util" -version = "0.8.5" +name = "libcrux-sha3" +version = "0.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +checksum = "b1ae0b7d0e1cc4793a609fd0ff2ca3b3a3fabae523770c619a3d4bc86417b0d7" dependencies = [ - "static_assertions", + "hax-lib", + "libcrux-intrinsics", + "libcrux-platform", + "libcrux-traits", ] [[package]] -name = "libc" -version = "0.2.159" +name = "libcrux-traits" +version = "0.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" +checksum = "812e4fa89f3f5e34b47f928b22b1b78395a0d4ec23b1f583db635f128159d65f" +dependencies = [ + "libcrux-secrets", + "rand 0.9.4", +] [[package]] name = "libffi" -version = "3.2.0" +version = "5.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce826c243048e3d5cec441799724de52e2d42f820468431fc3fceee2341871e2" +checksum = "0498fe5655f857803e156523e644dcdcdc3b3c7edda42ea2afdae2e09b2db87b" dependencies = [ "libc", "libffi-sys", @@ -1067,34 +2121,53 @@ dependencies = [ [[package]] name = "libffi-sys" -version = "2.3.0" +version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f36115160c57e8529781b4183c2bb51fdc1f6d6d1ed345591d84be7703befb3c" +checksum = "71d4f1d4ce15091955144350b75db16a96d4a63728500122706fb4d29a26afbb" dependencies = [ "cc", ] +[[package]] +name = "libloading" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" +dependencies = [ + "cfg-if", + "windows-link", +] + +[[package]] +name = "libloading" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "754ca22de805bb5744484a5b151a9e1a8e837d5dc232c2d7d8c2e3492edc8b60" +dependencies = [ + "cfg-if", + "windows-link", +] + [[package]] name = "libm" -version = "0.2.11" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "libredox" -version = "0.1.3" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" +checksum = "e02f3bb43d335493c96bf3fd3a321600bf6bd07ed34bc64118e9293bdffea46c" dependencies = [ - "bitflags 2.6.0", "libc", ] [[package]] name = "libsqlite3-sys" -version = "0.28.0" +version = "0.38.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c10584274047cb335c23d3e61bcef8e323adae7c5c8c760540f73610177fc3f" +checksum = "f6c19a05435c21ac299d71b6a9c13db3e3f47c520517d58990a462a1397a61db" dependencies = [ "cc", "pkg-config", @@ -1102,103 +2175,89 @@ dependencies = [ ] [[package]] -name = "libz-sys" -version = "1.1.20" +name = "libz-rs-sys" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2d16453e800a8cf6dd2fc3eb4bc99b786a9b90c663b8559a5b1a041bf89e472" +checksum = "f1116a951fd9d5110720bb2ae66f72ce5d7b10bd8aa8744924677157089ce13f" dependencies = [ - "cc", - "pkg-config", - "vcpkg", + "zlib-rs", ] [[package]] -name = "linked-hash-map" -version = "0.5.6" +name = "linux-raw-sys" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] -name = "linux-raw-sys" -version = "0.4.14" +name = "litemap" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" [[package]] name = "lock_api" -version = "0.4.12" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" dependencies = [ - "autocfg", "scopeguard", ] [[package]] name = "log" -version = "0.4.22" +version = "0.4.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" [[package]] name = "lz4_flex" -version = "0.11.3" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" +checksum = "7ef0d4ed8669f8f8826eb00dc878084aa8f253506c4fd5e8f58f5bce72ddb97e" dependencies = [ "twox-hash", ] [[package]] name = "mac_address" -version = "1.1.7" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8836fae9d0d4be2c8b4efcdd79e828a2faa058a90d005abf42f91cac5493a08e" +checksum = "c0aeb26bf5e836cc1c341c8106051b573f1766dfa05aa87f0b98be5e51b02303" dependencies = [ - "nix 0.28.0", + "nix 0.29.0", "winapi", ] [[package]] -name = "mach" -version = "0.3.2" +name = "mach2" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa" +checksum = "d640282b302c0bb0a2a8e0233ead9035e3bed871f0b7e81fe4a1ec829765db44" dependencies = [ "libc", ] -[[package]] -name = "malachite" -version = "0.4.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fbdf9cb251732db30a7200ebb6ae5d22fe8e11397364416617d2c2cf0c51cb5" -dependencies = [ - "malachite-base", - "malachite-nz", - "malachite-q", -] - [[package]] name = "malachite-base" -version = "0.4.22" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ea0ed76adf7defc1a92240b5c36d5368cfe9251640dcce5bd2d0b7c1fd87aeb" +checksum = "a8b6f86fdbb1eb9955946be91775239dfcb0acdb1a51bb07d5fc9b8c854f5ccd" dependencies = [ - "hashbrown", - "itertools 0.11.0", + "hashbrown 0.16.1", + "itertools 0.14.0", "libm", "ryu", ] [[package]] name = "malachite-bigint" -version = "0.2.3" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d149aaa2965d70381709d9df4c7ee1fc0de1c614a4efc2ee356f5e43d68749f8" +checksum = "67fcd6e504ffc67db2b3c6d5e90e08054646e2b04f42115a5460bf1c1e37d3bc" dependencies = [ - "derive_more", - "malachite", + "malachite-base", + "malachite-nz", "num-integer", "num-traits", "paste", @@ -1206,59 +2265,80 @@ dependencies = [ [[package]] name = "malachite-nz" -version = "0.4.22" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34a79feebb2bc9aa7762047c8e5495269a367da6b5a90a99882a0aeeac1841f7" +checksum = "0197a2f5cfee19d59178e282985c6ca79a9233e26a2adcf40acb693896aa09f6" dependencies = [ - "itertools 0.11.0", + "itertools 0.14.0", "libm", "malachite-base", + "wide", ] [[package]] name = "malachite-q" -version = "0.4.22" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50f235d5747b1256b47620f5640c2a17a88c7569eebdf27cd9cb130e1a619191" +checksum = "be2add95162aede090c48f0ee51bea7d328847ce3180aa44588111f846cc116b" dependencies = [ - "itertools 0.11.0", + "itertools 0.14.0", "malachite-base", "malachite-nz", ] [[package]] -name = "maplit" -version = "1.0.2" +name = "manyhow" +version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" +checksum = "b33efb3ca6d3b07393750d4030418d594ab1139cee518f0dc88db70fec873587" +dependencies = [ + "manyhow-macros", + "proc-macro2", + "quote", + "syn", +] [[package]] -name = "matches" -version = "0.1.10" +name = "manyhow-macros" +version = "0.11.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" +checksum = "46fce34d199b78b6e6073abf984c9cf5fd3e9330145a93ee0738a7443e371495" +dependencies = [ + "proc-macro-utils", + "proc-macro2", + "quote", +] [[package]] name = "md-5" -version = "0.10.6" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +checksum = "69b6441f590336821bb897fb28fc622898ccceb1d6cea3fde5ea86b090c4de98" dependencies = [ "cfg-if", - "digest", + "digest 0.11.3", ] [[package]] name = "memchr" -version = "2.7.4" +version = "2.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4" + +[[package]] +name = "memmap2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +checksum = "723e3ebdcdc5c023db1df315364573789f8857c11b631a2fdfad7c00f5c046b4" +dependencies = [ + "libc", +] [[package]] name = "memmap2" -version = "0.5.10" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" dependencies = [ "libc", ] @@ -1272,22 +2352,29 @@ dependencies = [ "autocfg", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" -version = "0.8.0" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] name = "mt19937" -version = "2.0.1" +version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12ca7f22ed370d5991a9caec16a83187e865bc8a532f889670337d5a5689e3a1" +checksum = "25b4ab1a6f4b7820876af86b84adf545d53a52f59c5374856225aad9562d903e" dependencies = [ - "rand_core", + "rand_core 0.10.1", ] [[package]] @@ -1301,30 +2388,50 @@ dependencies = [ [[package]] name = "nix" -version = "0.28.0" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" +checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.13.0", "cfg-if", - "cfg_aliases 0.1.1", + "cfg_aliases", "libc", "memoffset", ] [[package]] name = "nix" -version = "0.29.0" +version = "0.31.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" +checksum = "cf20d2fde8ff38632c426f1165ed7436270b44f199fc55284c38276f9db47c3d" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.13.0", "cfg-if", - "cfg_aliases 0.2.1", + "cfg_aliases", "libc", "memoffset", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + [[package]] name = "num-complex" version = "0.4.6" @@ -1334,6 +2441,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-conv" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" + [[package]] name = "num-integer" version = "0.1.46" @@ -1354,57 +2467,81 @@ dependencies = [ [[package]] name = "num_cpus" -version = "1.16.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" dependencies = [ - "hermit-abi 0.3.9", + "hermit-abi", "libc", ] [[package]] name = "num_enum" -version = "0.7.3" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e613fc340b2220f734a8595782c551f1250e969d87d3be1ae0579e8d4065179" +checksum = "5d0bca838442ec211fa11de3a8b0e0e8f3a4522575b5c4c06ed722e005036f26" dependencies = [ "num_enum_derive", + "rustversion", ] [[package]] name = "num_enum_derive" -version = "0.7.3" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af1844ef2428cc3e1cb900be36181049ef3d3193c63e43026cfe202983b27a56" +checksum = "680998035259dcfcafe653688bf2aa6d3e2dc05e98be6ab46afb089dc84f1df8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn", +] + +[[package]] +name = "object" +version = "0.37.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" +dependencies = [ + "memchr", +] + +[[package]] +name = "oid-registry" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12f40cff3dde1b6087cc5d5f5d4d65712f34016a03ed60e9c08dcc392736b5b7" +dependencies = [ + "asn1-rs", ] [[package]] name = "once_cell" -version = "1.19.0" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] name = "oorandom" -version = "11.1.4" +version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" [[package]] name = "openssl" -version = "0.10.66" +version = "0.10.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9529f4786b70a3e8c61e11179af17ab6188ad8d0ded78c5529441ed39d4bd9c1" +checksum = "77823a27f0babb03091cb9ed9ef80af3b39dbc82f97e8fa530374b7dafd87a45" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.13.0", "cfg-if", "foreign-types", "libc", - "once_cell", "openssl-macros", "openssl-sys", ] @@ -1417,29 +2554,29 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn", ] [[package]] name = "openssl-probe" -version = "0.1.5" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" +checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" [[package]] name = "openssl-src" -version = "300.3.2+3.3.2" +version = "300.6.0+3.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a211a18d945ef7e648cc6e0058f4c548ee46aab922ea203e0d30e966ea23647b" +checksum = "a8e8cbfd3a4a8c8f089147fd7aaa33cf8c7450c4d09f8f80698a0cf093abeff4" dependencies = [ "cc", ] [[package]] name = "openssl-sys" -version = "0.9.103" +version = "0.9.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f9e8deee91df40a943c71b917e5874b951d32a802526c85721ce3b776c929d6" +checksum = "b47e7e6bb2c38cd930d25a23b40fa52e068c10e85f3e03a7f5ba5aaca5713695" dependencies = [ "cc", "libc", @@ -1448,17 +2585,32 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "optional" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "978aa494585d3ca4ad74929863093e87cac9790d81fe7aba2b3dc2890643a0fc" +[[package]] +name = "ordermap" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f7476a5b122ff1fce7208e7ee9dccd0a516e835f5b8b19b8f3c98a34cf757c1" +dependencies = [ + "indexmap", +] + [[package]] name = "page_size" -version = "0.4.2" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eebde548fbbf1ea81a99b128872779c437752fb99f217c45245e1a61dcd9edcd" +checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" dependencies = [ "libc", "winapi", @@ -1466,9 +2618,9 @@ dependencies = [ [[package]] name = "parking_lot" -version = "0.12.3" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" dependencies = [ "lock_api", "parking_lot_core", @@ -1476,15 +2628,14 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +version = "0.9.12" +source = "git+https://github.com/youknowone/parking_lot?branch=rustpython#4392edbe879acc9c0dd94eda53d2205d3ab912c9" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.5.5", + "redox_syscall 0.5.18", "smallvec", - "windows-targets 0.52.6", + "windows-link", ] [[package]] @@ -1493,49 +2644,160 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pastey" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ee67f1008b1ba2321834326597b8e186293b049a023cdef258527550b9935b4" + +[[package]] +name = "pbkdf2" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112d82ceb8c5bf524d9af484d4e4970c9fd5a0cc15ba14ad93dccd28873b0629" +dependencies = [ + "digest 0.11.3", + "hmac", +] + +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + +[[package]] +name = "pem-rfc7468" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6305423e0e7738146434843d1694d621cce767262b2a86910beab705e4493d9" +dependencies = [ + "base64ct", +] + +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_shared 0.11.3", +] + [[package]] name = "phf" -version = "0.11.2" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_macros", + "phf_shared 0.13.1", + "serde", +] + +[[package]] +name = "phf_codegen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +dependencies = [ + "phf_generator 0.11.3", + "phf_shared 0.11.3", +] + +[[package]] +name = "phf_generator" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +dependencies = [ + "phf_shared 0.11.3", + "rand 0.8.6", +] + +[[package]] +name = "phf_generator" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" +dependencies = [ + "fastrand", + "phf_shared 0.13.1", +] + +[[package]] +name = "phf_macros" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef" +dependencies = [ + "phf_generator 0.13.1", + "phf_shared 0.13.1", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" dependencies = [ - "phf_shared", + "siphasher", ] [[package]] -name = "phf_codegen" -version = "0.11.2" +name = "phf_shared" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" dependencies = [ - "phf_generator", - "phf_shared", + "siphasher", ] [[package]] -name = "phf_generator" -version = "0.11.2" +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "pkcs5" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +checksum = "279a91971a1d8eb1260a30938eae3be9cb67b472dffecb222fbbbe2fd2dc1453" dependencies = [ - "phf_shared", - "rand", + "aes", + "cbc", + "der 0.8.0", + "pbkdf2", + "rand_core 0.10.1", + "scrypt", + "sha2", + "spki 0.8.0", ] [[package]] -name = "phf_shared" -version = "0.11.2" +name = "pkcs8" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +checksum = "451913da69c775a56034ea8d9003d27ee8948e12443eae7c038ba100a4f21cb7" dependencies = [ - "siphasher", + "der 0.8.0", + "pkcs5", + "rand_core 0.10.1", + "spki 0.8.0", ] [[package]] name = "pkg-config" -version = "0.3.31" +version = "0.3.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" [[package]] name = "plotters" @@ -1573,72 +2835,154 @@ checksum = "52a40bc70c2c58040d2d8b167ba9a5ff59fc9dab7ad44771cfde3dcfde7a09c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn", ] [[package]] name = "portable-atomic" -version = "1.8.0" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + +[[package]] +name = "portable-atomic-util" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a106d1259c23fac8e543272398ae0e3c0b8d33c88ed73d0cc71b0f1d902618" +dependencies = [ + "portable-atomic", +] + +[[package]] +name = "potential_utf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" +dependencies = [ + "serde_core", + "writeable", + "zerovec", +] + +[[package]] +name = "powerfmt" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d30538d42559de6b034bc76fd6dd4c38961b1ee5c6c56e3808c50128fdbc22ce" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" [[package]] name = "ppv-lite86" -version = "0.2.20" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ "zerocopy", ] +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro-error-attr2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96de42df36bb9bba5542fe9f1a054b8cc87e172759a1868aa05c1f3acc89dfc5" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "proc-macro-error2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802" +dependencies = [ + "proc-macro-error-attr2", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "proc-macro-utils" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eeaf08a13de400bc215877b5bdc088f241b12eb42f0a548d3390dc1c56bb7071" +dependencies = [ + "proc-macro2", + "quote", + "smallvec", +] + [[package]] name = "proc-macro2" -version = "1.0.86" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] [[package]] -name = "puruspe" -version = "0.2.5" +name = "psm" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "645dbe486e346d9b5de3ef16ede18c26e6c70ad97418f4874b8b1889d6e761ea" +dependencies = [ + "ar_archive_writer", + "cc", +] + +[[package]] +name = "pymath" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3804877ffeba468c806c2ad9057bbbae92e4b2c410c2f108baaa0042f241fa4c" +checksum = "bc10e50b7a1f2cc3887e983721cb51fc7574be0066c84bff3ef9e5c096e8d6d5" +dependencies = [ + "libc", + "libm", + "malachite-bigint", + "num-complex", + "num-integer", + "num-traits", +] [[package]] name = "pyo3" -version = "0.22.4" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00e89ce2565d6044ca31a3eb79a334c3a79a841120a98f64eea9f579564cb691" +checksum = "cd274650b21d4bfc26a0a47587962c1edb425f69287324355cd040c3ea66071c" dependencies = [ - "cfg-if", - "indoc", "libc", - "memoffset", "once_cell", "portable-atomic", "pyo3-build-config", "pyo3-ffi", "pyo3-macros", - "unindent", ] [[package]] name = "pyo3-build-config" -version = "0.22.4" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8afbaf3abd7325e08f35ffb8deb5892046fcb2608b703db6a583a5ba4cea01e" +checksum = "c5e2a7d2f0d013342f295c048ad19237add5154a55b1c5a254c0ec93d4109078" dependencies = [ - "once_cell", "target-lexicon", ] [[package]] name = "pyo3-ffi" -version = "0.22.4" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec15a5ba277339d04763f4c23d85987a5b08cbb494860be141e6a10a8eb88022" +checksum = "ca85c467da1bbc8d866eea5deff9cf29ea5f7785054a17da36e65bda9c05845b" dependencies = [ "libc", "pyo3-build-config", @@ -1646,49 +2990,85 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.22.4" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15e0f01b5364bcfbb686a52fc4181d412b708a68ed20c330db9fc8d2c2bf5a43" +checksum = "9ac53762fd065daa3194dd09337a38bd793a188100fd1a9304c4ab312d901771" dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.77", + "syn", ] [[package]] name = "pyo3-macros-backend" -version = "0.22.4" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a09b550200e1e5ed9176976d0060cbc2ea82dc8515da07885e7b8153a85caacb" +checksum = "4ca3a1557399783172dc5bf39cfca835157732532cba56b71d2292161e53b362" dependencies = [ "heck", "proc-macro2", - "pyo3-build-config", "quote", - "syn 2.0.77", + "syn", ] [[package]] name = "quote" -version = "1.0.37" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "quote-use" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9619db1197b497a36178cfc736dc96b271fe918875fbf1344c436a7e93d0321e" +dependencies = [ + "quote", + "quote-use-macros", +] + +[[package]] +name = "quote-use-macros" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +checksum = "82ebfb7faafadc06a7ab141a6f67bcfb24cb8beb158c6fe933f2f035afa99f35" dependencies = [ + "proc-macro-utils", "proc-macro2", + "quote", + "syn", ] +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + [[package]] name = "radium" -version = "0.7.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" +checksum = "1775bc532a9bfde46e26eba441ca1171b91608d14a3bae71fea371f18a00cffe" +dependencies = [ + "cfg-if", +] [[package]] name = "radix_trie" -version = "0.2.1" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd" +checksum = "3b4431027dcd37fc2a73ef740b5f233aa805897935b8bce0195e41bbf9a3289a" dependencies = [ "endian-type", "nibble_vec", @@ -1696,13 +3076,34 @@ dependencies = [ [[package]] name = "rand" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a" dependencies = [ "libc", - "rand_chacha", - "rand_core", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.5", +] + +[[package]] +name = "rand" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207" +dependencies = [ + "chacha20", + "getrandom 0.4.2", + "rand_core 0.10.1", ] [[package]] @@ -1712,7 +3113,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.5", ] [[package]] @@ -1721,14 +3132,38 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom", + "getrandom 0.2.17", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "rand_core" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69" + +[[package]] +name = "rapidhash" +version = "4.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e48930979c155e2f33aa36ab3119b5ee81332beb6482199a8ecd6029b80b59" +dependencies = [ + "rustversion", ] [[package]] name = "rayon" -version = "1.10.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" dependencies = [ "either", "rayon-core", @@ -1736,9 +3171,9 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.12.1" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" dependencies = [ "crossbeam-deque", "crossbeam-utils", @@ -1752,41 +3187,63 @@ checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" [[package]] name = "redox_syscall" -version = "0.5.5" +version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62871f2d65009c0256aed1b9cfeeb8ac272833c404e13d53d400cd0dad7a2ac0" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.13.0", ] [[package]] name = "redox_users" -version = "0.4.6" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" dependencies = [ - "getrandom", + "getrandom 0.2.17", "libredox", "thiserror", ] +[[package]] +name = "ref-cast" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "regalloc2" -version = "0.3.2" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d43a209257d978ef079f3d446331d0f1794f5e0fc19b306a199983857833a779" +checksum = "de2c52737737f8609e94f975dee22854a2d5c125772d4b1cf292120f4d45c186" dependencies = [ - "fxhash", + "allocator-api2", + "bumpalo", + "hashbrown 0.17.0", "log", - "slice-group-by", + "rustc-hash", "smallvec", ] [[package]] name = "regex" -version = "1.10.6" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", @@ -1796,9 +3253,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.7" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", @@ -1807,48 +3264,62 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.4" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" [[package]] name = "region" -version = "2.2.0" +version = "3.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877e54ea2adcd70d80e9179344c97f93ef0dffd6b03e1f4529e6e83ab2fa9ae0" +checksum = "e6b6ebd13bc009aef9cd476c1310d49ac354d36e240cf1bd753290f3dc7199a7" dependencies = [ "bitflags 1.3.2", "libc", - "mach", - "winapi", + "mach2", + "windows-sys 0.52.0", ] [[package]] name = "result-like" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abf7172fef6a7d056b5c26bf6c826570267562d51697f4982ff3ba4aec68a9df" +checksum = "bffa194499266bd8a1ac7da6ac7355aa0f81ffa1a5db2baaf20dd13854fd6f4e" dependencies = [ "result-like-derive", ] [[package]] name = "result-like-derive" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8d6574c02e894d66370cfc681e5d68fedbc9a548fb55b30a96b3f0ae22d0fe5" +checksum = "01d3b03471c9700a3a6bd166550daaa6124cb4a146ea139fb028e4edaa8f4277" dependencies = [ "pmutil", "proc-macro2", "quote", - "syn 2.0.77", + "syn", +] + +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.17", + "libc", + "untrusted", + "windows-sys 0.52.0", ] [[package]] name = "rustc-hash" -version = "1.1.0" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" [[package]] name = "rustc_version" @@ -1859,79 +3330,191 @@ dependencies = [ "semver", ] +[[package]] +name = "rusticata-macros" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "faf0c4a6ece9950b9abdb62b1cfcf2a68b3b67a10ba445b3bb85be2a293d0632" +dependencies = [ + "nom", +] + [[package]] name = "rustix" -version = "0.38.37" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags 2.13.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustls" +version = "0.23.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b" +dependencies = [ + "aws-lc-rs", + "once_cell", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-graviola" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "323c712e50c59ceb2ba9ad4d79dcfd3e0046a082d61efa87fcdf8f59af04473c" +dependencies = [ + "graviola", + "libcrux-ml-kem", + "rustls", +] + +[[package]] +name = "rustls-native-certs" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dab5152771c58876a2146916e53e35057e1a4dfa2b9df0f0305b07f611fdea4d" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework", +] + +[[package]] +name = "rustls-pemfile" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30a7197ae7eb376e574fe940d068c30fe0462554a3ddbe4eca7838e049c937a9" +dependencies = [ + "zeroize", +] + +[[package]] +name = "rustls-platform-verifier" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d1e2536ce4f35f4846aa13bff16bd0ff40157cdb14cc056c7b14ba41233ba0" +dependencies = [ + "core-foundation 0.10.1", + "core-foundation-sys", + "jni", + "log", + "once_cell", + "rustls", + "rustls-native-certs", + "rustls-platform-verifier-android", + "rustls-webpki", + "security-framework", + "security-framework-sys", + "webpki-root-certs", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustls-platform-verifier-android" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" + +[[package]] +name = "rustls-webpki" +version = "0.103.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" +checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e" dependencies = [ - "bitflags 2.6.0", - "errno", - "libc", - "linux-raw-sys", - "windows-sys 0.52.0", + "aws-lc-rs", + "ring", + "rustls-pki-types", + "untrusted", ] [[package]] name = "rustpython" -version = "0.4.0" +version = "0.5.0" dependencies = [ - "cfg-if", - "clap", "criterion", - "dirs-next", + "dirs", "env_logger", "flame", "flamescope", + "lexopt", "libc", "log", "pyo3", + "rustls", + "rustls-graviola", + "rustpython-capi", "rustpython-compiler", - "rustpython-parser", "rustpython-pylib", + "rustpython-ruff_python_parser", "rustpython-stdlib", "rustpython-vm", "rustyline", + "winresource", ] [[package]] -name = "rustpython-ast" -version = "0.4.0" -source = "git+https://github.com/RustPython/Parser.git?rev=d2f137b372ec08ce4a243564a80f8f9153c45a23#d2f137b372ec08ce4a243564a80f8f9153c45a23" +name = "rustpython-capi" +version = "0.5.0" dependencies = [ - "is-macro", - "malachite-bigint", - "rustpython-literal", - "rustpython-parser-core", - "static_assertions", + "bitflags 2.13.0", + "itertools 0.14.0", + "num-complex", + "pyo3", + "rustpython-pylib", + "rustpython-stdlib", + "rustpython-vm", ] [[package]] name = "rustpython-codegen" -version = "0.4.0" +version = "0.5.0" dependencies = [ - "ahash", - "bitflags 2.6.0", + "bitflags 2.13.0", "indexmap", - "insta", - "itertools 0.11.0", + "itertools 0.14.0", "log", + "malachite-bigint", + "memchr", "num-complex", "num-traits", - "rustpython-ast", + "rapidhash", "rustpython-compiler-core", - "rustpython-parser", - "rustpython-parser-core", + "rustpython-literal", + "rustpython-ruff_python_ast", + "rustpython-ruff_python_parser", + "rustpython-ruff_text_size", + "rustpython-wtf8", + "thiserror", + "unicode_names2 2.0.0", ] [[package]] name = "rustpython-common" -version = "0.4.0" +version = "0.5.0" dependencies = [ "ascii", - "bitflags 2.6.0", - "cfg-if", - "itertools 0.11.0", + "bitflags 2.13.0", + "getrandom 0.4.2", + "itertools 0.14.0", "libc", "lock_api", "malachite-base", @@ -1939,88 +3522,107 @@ dependencies = [ "malachite-q", "num-complex", "num-traits", - "once_cell", "parking_lot", "radium", - "rand", - "rustpython-format", + "rustpython-literal", + "rustpython-wtf8", "siphasher", - "volatile", - "widestring", - "windows-sys 0.52.0", + "unicode_names2 2.0.0", ] [[package]] name = "rustpython-compiler" -version = "0.4.0" +version = "0.5.0" dependencies = [ "rustpython-codegen", "rustpython-compiler-core", - "rustpython-parser", + "rustpython-ruff_python_ast", + "rustpython-ruff_python_parser", + "rustpython-ruff_source_file", + "rustpython-ruff_text_size", + "thiserror", ] [[package]] name = "rustpython-compiler-core" -version = "0.4.0" +version = "0.5.0" dependencies = [ - "bitflags 2.6.0", - "itertools 0.11.0", + "bitflags 2.13.0", + "bitflagset", + "itertools 0.14.0", "lz4_flex", "malachite-bigint", "num-complex", - "rustpython-parser-core", - "serde", + "num-traits", + "rustpython-ruff_source_file", + "rustpython-wtf8", +] + +[[package]] +name = "rustpython-compiler-source" +version = "0.4.1+deprecated" +dependencies = [ + "rustpython-ruff_source_file", + "rustpython-ruff_text_size", ] [[package]] name = "rustpython-derive" -version = "0.4.0" +version = "0.5.0" dependencies = [ "rustpython-compiler", "rustpython-derive-impl", - "syn 1.0.109", + "syn", ] [[package]] name = "rustpython-derive-impl" -version = "0.4.0" +version = "0.5.0" dependencies = [ - "itertools 0.11.0", - "maplit", - "once_cell", + "itertools 0.14.0", "proc-macro2", "quote", "rustpython-compiler-core", "rustpython-doc", - "rustpython-parser-core", - "syn 1.0.109", + "syn", "syn-ext", - "textwrap 0.15.2", + "textwrap", ] [[package]] name = "rustpython-doc" -version = "0.3.0" -source = "git+https://github.com/RustPython/__doc__?tag=0.3.0#8b62ce5d796d68a091969c9fa5406276cb483f79" +version = "0.5.0" dependencies = [ - "once_cell", + "phf 0.13.1", ] [[package]] -name = "rustpython-format" -version = "0.4.0" -source = "git+https://github.com/RustPython/Parser.git?rev=d2f137b372ec08ce4a243564a80f8f9153c45a23#d2f137b372ec08ce4a243564a80f8f9153c45a23" +name = "rustpython-host_env" +version = "0.5.0" dependencies = [ - "bitflags 2.6.0", - "itertools 0.11.0", - "malachite-bigint", + "bitflags 2.13.0", + "getrandom 0.4.2", + "junction", + "libc", + "libffi", + "libloading 0.9.0", + "memmap2 0.9.10", + "nix 0.31.3", "num-traits", - "rustpython-literal", + "num_cpus", + "parking_lot", + "paste", + "rustix", + "rustpython-wtf8", + "schannel", + "termios", + "widestring", + "windows-sys 0.61.2", ] [[package]] name = "rustpython-jit" -version = "0.4.0" +version = "0.5.0" dependencies = [ "approx", "cranelift", @@ -2030,247 +3632,288 @@ dependencies = [ "num-traits", "rustpython-compiler-core", "rustpython-derive", + "rustpython-wtf8", "thiserror", ] [[package]] name = "rustpython-literal" -version = "0.4.0" -source = "git+https://github.com/RustPython/Parser.git?rev=d2f137b372ec08ce4a243564a80f8f9153c45a23#d2f137b372ec08ce4a243564a80f8f9153c45a23" +version = "0.5.0" dependencies = [ "hexf-parse", + "icu_properties", "is-macro", "lexical-parse-float", "num-traits", - "unic-ucd-category", + "rand 0.10.1", + "rustpython-wtf8", ] [[package]] -name = "rustpython-parser" -version = "0.4.0" -source = "git+https://github.com/RustPython/Parser.git?rev=d2f137b372ec08ce4a243564a80f8f9153c45a23#d2f137b372ec08ce4a243564a80f8f9153c45a23" +name = "rustpython-pylib" +version = "0.5.0" dependencies = [ - "anyhow", + "glob", + "rustpython-compiler-core", + "rustpython-derive", +] + +[[package]] +name = "rustpython-ruff_python_ast" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f021ff72cabf5e2cd6d8ec8813d376a8445a228dc610ab56c27bd9054cda70d4" +dependencies = [ + "aho-corasick", + "bitflags 2.13.0", + "compact_str", + "get-size2", "is-macro", - "itertools 0.11.0", - "lalrpop-util", - "log", - "malachite-bigint", - "num-traits", - "phf", - "phf_codegen", + "memchr", "rustc-hash", - "rustpython-ast", - "rustpython-parser-core", - "tiny-keccak", - "unic-emoji-char", - "unic-ucd-ident", - "unicode_names2", + "rustpython-ruff_python_trivia", + "rustpython-ruff_source_file", + "rustpython-ruff_text_size", + "thiserror", ] [[package]] -name = "rustpython-parser-core" -version = "0.4.0" -source = "git+https://github.com/RustPython/Parser.git?rev=d2f137b372ec08ce4a243564a80f8f9153c45a23#d2f137b372ec08ce4a243564a80f8f9153c45a23" +name = "rustpython-ruff_python_parser" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01e6ee78bd9671fb5766664b2695fe1f2a92a961f4d9101646c570d8acdb1e0b" dependencies = [ - "is-macro", + "bitflags 2.13.0", + "bstr", + "compact_str", + "get-size2", "memchr", - "rustpython-parser-vendored", + "rustc-hash", + "rustpython-ruff_python_ast", + "rustpython-ruff_python_trivia", + "rustpython-ruff_text_size", + "static_assertions", + "unicode-ident", + "unicode-normalization", + "unicode_names2 1.3.0", ] [[package]] -name = "rustpython-parser-vendored" -version = "0.4.0" -source = "git+https://github.com/RustPython/Parser.git?rev=d2f137b372ec08ce4a243564a80f8f9153c45a23#d2f137b372ec08ce4a243564a80f8f9153c45a23" +name = "rustpython-ruff_python_trivia" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79e7cfd1056f3a02ff0d2d0e4474286ca963260782f878b7b81c1dd87432e682" +dependencies = [ + "itertools 0.14.0", + "rustpython-ruff_source_file", + "rustpython-ruff_text_size", + "unicode-ident", +] + +[[package]] +name = "rustpython-ruff_source_file" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "948107aad62ddb12a11fc7bf68a49e52a0b0a3737d415a2505e54f5a9edac737" dependencies = [ "memchr", - "once_cell", + "rustpython-ruff_text_size", ] [[package]] -name = "rustpython-pylib" -version = "0.4.0" +name = "rustpython-ruff_text_size" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8291ee0f5a779e54ccd4e0151a0c426f8b49a123f99b5b6545db17ccdd4277aa" dependencies = [ - "glob", - "rustpython-compiler-core", - "rustpython-derive", + "get-size2", ] [[package]] name = "rustpython-sre_engine" -version = "0.4.0" +version = "0.5.0" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.13.0", + "criterion", + "icu_properties", "num_enum", "optional", + "rustpython-wtf8", ] [[package]] name = "rustpython-stdlib" -version = "0.4.0" +version = "0.5.0" dependencies = [ "adler32", - "ahash", "ascii", "base64", "blake2", "bzip2", - "cfg-if", + "chrono", + "constant_time_eq", "crc32fast", "crossbeam-utils", "csv-core", - "digest", + "der 0.8.0", + "digest 0.11.3", "dns-lookup", "dyn-clone", + "flame", "flate2", "foreign-types-shared", "gethostname", "hex", + "hmac", + "icu_normalizer", + "icu_properties", "indexmap", - "itertools 0.11.0", - "junction", + "insta", + "itertools 0.14.0", "libc", "libsqlite3-sys", - "libz-sys", + "libz-rs-sys", "mac_address", "malachite-bigint", "md-5", "memchr", - "memmap2", "mt19937", - "nix 0.29.0", "num-complex", - "num-integer", "num-traits", "num_enum", - "once_cell", + "oid-registry", "openssl", "openssl-probe", "openssl-sys", - "page_size", "parking_lot", "paste", - "puruspe", - "rand", - "rand_core", - "rustix", + "pbkdf2", + "pem-rfc7468 1.0.0", + "phf 0.13.1", + "pkcs8", + "pymath", + "rand 0.10.1", + "rapidhash", + "rustls", + "rustls-native-certs", + "rustls-pemfile", + "rustls-platform-verifier", "rustpython-common", "rustpython-derive", + "rustpython-host_env", + "rustpython-pylib", + "rustpython-ruff_python_ast", + "rustpython-ruff_python_parser", + "rustpython-ruff_source_file", + "rustpython-ruff_text_size", "rustpython-vm", - "schannel", - "sha-1", + "sha1 0.11.0", "sha2", "sha3", + "shake", "socket2", "system-configuration", - "termios", - "thread_local", - "ucd", - "unic-char-property", - "unic-normal", - "unic-ucd-age", - "unic-ucd-bidi", - "unic-ucd-category", - "unic-ucd-ident", - "unicode-casing", - "unicode_names2", + "tcl-sys", + "tk-sys", + "unicode_names2 2.0.0", "uuid", + "webpki-roots", "widestring", - "winapi", - "windows-sys 0.52.0", - "xml-rs", + "x509-cert", + "x509-parser", + "xml", + "xz", + "xz-sys", ] +[[package]] +name = "rustpython-venvlauncher" +version = "0.5.0" + [[package]] name = "rustpython-vm" -version = "0.4.0" +version = "0.5.0" dependencies = [ - "ahash", "ascii", - "bitflags 2.6.0", + "bitflags 2.13.0", "bstr", - "caseless", - "cfg-if", "chrono", + "constant_time_eq", "crossbeam-utils", "exitcode", "flame", "flamer", - "getrandom", "glob", - "half 2.4.1", + "half", "hex", + "icu_casemap", + "icu_locale", + "icu_properties", "indexmap", "is-macro", - "itertools 0.11.0", - "junction", + "itertools 0.14.0", "libc", "log", "malachite-bigint", "memchr", - "memoffset", - "nix 0.29.0", "num-complex", "num-integer", "num-traits", - "num_cpus", "num_enum", - "once_cell", "optional", "parking_lot", "paste", - "rand", + "psm", + "rapidhash", "result-like", - "rustc_version", - "rustix", - "rustpython-ast", "rustpython-codegen", "rustpython-common", "rustpython-compiler", "rustpython-compiler-core", "rustpython-derive", - "rustpython-format", + "rustpython-host_env", "rustpython-jit", "rustpython-literal", - "rustpython-parser", - "rustpython-parser-core", + "rustpython-ruff_python_ast", + "rustpython-ruff_python_parser", + "rustpython-ruff_text_size", "rustpython-sre_engine", "rustyline", - "schannel", - "serde", + "scopeguard", + "serde_core", "static_assertions", "strum", "strum_macros", "thiserror", - "thread_local", "timsort", - "uname", - "unic-ucd-bidi", - "unic-ucd-category", - "unic-ucd-ident", - "unicode-casing", - "unicode_names2", "wasm-bindgen", "which", "widestring", - "windows", - "windows-sys 0.52.0", - "winreg", + "writeable", +] + +[[package]] +name = "rustpython-wtf8" +version = "0.5.0" +dependencies = [ + "ascii", + "bstr", + "itertools 0.14.0", + "memchr", ] [[package]] name = "rustpython_wasm" -version = "0.4.0" +version = "0.5.0" dependencies = [ "console_error_panic_hook", "js-sys", "rustpython-common", - "rustpython-parser", "rustpython-pylib", "rustpython-stdlib", "rustpython-vm", - "serde", "serde-wasm-bindgen", + "serde_core", "wasm-bindgen", "wasm-bindgen-futures", "web-sys", @@ -2278,37 +3921,55 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.17" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "rustyline" -version = "14.0.0" +version = "18.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7803e8936da37efd9b6d4478277f4b2b9bb5cdb37a113e8d63222e58da647e63" +checksum = "4a990b25f351b25139ddc7f21ee3f6f56f86d6846b74ac8fad3a719a287cd4a0" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.13.0", "cfg-if", "clipboard-win", - "fd-lock", "home", "libc", "log", "memchr", - "nix 0.28.0", + "nix 0.31.3", "radix_trie", "unicode-segmentation", "unicode-width", "utf8parse", - "windows-sys 0.52.0", + "windows-sys 0.61.2", ] [[package]] name = "ryu" -version = "1.0.18" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + +[[package]] +name = "safe_arch" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f7caad094bd561859bcd467734a720c3c1f5d1f338995351fefe2190c45efed" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "salsa20" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +checksum = "2f874456e72520ff1375a06c588eaf074b0f01f9e9e1aada45bd9b7954a6e42c" +dependencies = [ + "cfg-if", + "cipher", +] [[package]] name = "same-file" @@ -2321,11 +3982,11 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.24" +version = "0.1.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9aaafd5a2b6e3d657ff009d82fbd630b6bd54dd4eb06f21693925cdf80f9b8b" +checksum = "91c1b7e4904c873ef0710c1f407dde2e6287de2bebc1bbbf7d430bb7cbffd939" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2334,167 +3995,298 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scrypt" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d87af57419b594aa23fa95f09f0e06d80d84ba01c26148c43844cad6ff4485f0" +dependencies = [ + "cfg-if", + "pbkdf2", + "salsa20", + "sha2", +] + +[[package]] +name = "security-framework" +version = "3.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" +dependencies = [ + "bitflags 2.13.0", + "core-foundation 0.10.1", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2691df843ecc5d231c0b14ece2acc3efb62c0a398c7e1d875f3983ce020e3" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "semver" -version = "1.0.23" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" [[package]] name = "serde" -version = "1.0.210" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ + "serde_core", "serde_derive", ] [[package]] name = "serde-wasm-bindgen" -version = "0.3.1" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "618365e8e586c22123d692b72a7d791d5ee697817b65a218cdf12a98870af0f7" +checksum = "8302e169f0eddcc139c70f139d19d6467353af16f9fce27e8c30158036a1e16b" dependencies = [ - "fnv", "js-sys", "serde", "wasm-bindgen", ] [[package]] -name = "serde_cbor" -version = "0.11.2" +name = "serde_core" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ - "half 1.8.3", - "serde", + "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.210" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn", ] [[package]] name = "serde_json" -version = "1.0.128" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "itoa", "memchr", - "ryu", "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "serde_spanned" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6662b5879511e06e8999a8a235d848113e942c9124f211511b16466ee2995f26" +dependencies = [ + "serde_core", +] + +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures 0.2.17", + "digest 0.10.7", +] + +[[package]] +name = "sha1" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aacc4cc499359472b4abe1bf11d0b12e688af9a805fa5e3016f9a386dc2d0214" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "digest 0.11.3", +] + +[[package]] +name = "sha2" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "446ba717509524cb3f22f17ecc096f10f4822d76ab5c0b9822c5f9c284e825f4" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "digest 0.11.3", +] + +[[package]] +name = "sha3" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc9bad02c26382724b2d2692c6f179285e4b54eeecd7968f52a50059c3c11759" +dependencies = [ + "digest 0.11.3", + "keccak", + "sponge-cursor", +] + +[[package]] +name = "shake" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09057cb2149ad4cbd2da1e26b351f9a4c354219421229c69c3063e6f61947c4a" +dependencies = [ + "digest 0.11.3", + "keccak", + "sponge-cursor", +] + +[[package]] +name = "shared-build" +version = "0.2.0" +source = "git+https://github.com/arihant2math/tkinter.git?tag=v0.2.0#198fc35b1f18f4eda401f97a641908f321b1403a" +dependencies = [ + "bindgen 0.71.1", ] [[package]] -name = "sha-1" -version = "0.10.1" +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "signature" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5058ada175748e33390e40e872bd0fe59a19f265d0158daa551c5a88a76009c" +checksum = "77549399552de45a898a580c1b41d445bf730df867cc44e6c0233bbc4b8329de" dependencies = [ - "cfg-if", - "cpufeatures", - "digest", + "rand_core 0.6.4", ] [[package]] -name = "sha2" -version = "0.10.8" +name = "simd-adler32" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" [[package]] -name = "sha3" -version = "0.10.8" +name = "simd_cesu8" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75872d278a8f37ef87fa0ddbda7802605cb18344497949862c0d4dcb291eba60" +checksum = "94f90157bb87cddf702797c5dadfa0be7d266cdf49e22da2fcaa32eff75b2c33" dependencies = [ - "digest", - "keccak", + "rustc_version", + "simdutf8", ] [[package]] -name = "shlex" -version = "1.3.0" +name = "simdutf8" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" [[package]] name = "similar" -version = "2.6.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1de1d4f81173b03af4c0cbed3c898f6bff5b870e4a7f5d6f4057d62a7a4b686e" +checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" [[package]] name = "siphasher" -version = "0.3.11" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" +checksum = "8ee5873ec9cce0195efcb7a4e9507a04cd49aec9c83d0389df45b1ef7ba2e649" [[package]] -name = "slice-group-by" -version = "0.3.1" +name = "slab" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "826167069c09b99d56f31e9ae5c99049e932a98c9dc2dac47645b08dbbf76ba7" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" [[package]] name = "smallvec" -version = "1.13.2" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "socket2" -version = "0.5.7" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +checksum = "52d1cfed4120b4d927bf7c0f86d2087a4a7d6027c906d9f9d525a80573b9be51" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.60.2", ] [[package]] -name = "static_assertions" -version = "1.1.0" +name = "spki" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" +dependencies = [ + "base64ct", + "der 0.7.10", +] [[package]] -name = "strsim" +name = "spki" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" +checksum = "1d9efca8738c78ee9484207732f728b1ef517bbb1833d6fc0879ca898a522f6f" +dependencies = [ + "base64ct", + "der 0.8.0", +] + +[[package]] +name = "sponge-cursor" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a0219bd7d979d58245a4f41f695e1ac9f8befdffadd7f61f1bae9e39abc6620" + +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "strum" -version = "0.26.3" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" +checksum = "9628de9b8791db39ceda2b119bbe13134770b56c138ec1d3af810d045c04f9bd" [[package]] name = "strum_macros" -version = "0.26.4" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +checksum = "ab85eea0270ee17587ed4156089e10b9e6880ee688791d45a905f5b1ca36f664" dependencies = [ "heck", "proc-macro2", "quote", - "rustversion", - "syn 2.0.77", + "syn", ] [[package]] @@ -2505,9 +4297,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "1.0.109" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -2515,41 +4307,43 @@ dependencies = [ ] [[package]] -name = "syn" -version = "2.0.77" +name = "syn-ext" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" +checksum = "b126de4ef6c2a628a68609dd00733766c3b015894698a438ebdf374933fc31d1" dependencies = [ "proc-macro2", "quote", - "unicode-ident", + "syn", ] [[package]] -name = "syn-ext" -version = "0.4.0" +name = "synstructure" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b86cb2b68c5b3c078cac02588bc23f3c04bb828c5d3aedd17980876ec6a7be6" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ - "syn 1.0.109", + "proc-macro2", + "quote", + "syn", ] [[package]] name = "system-configuration" -version = "0.5.1" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" dependencies = [ - "bitflags 1.3.2", - "core-foundation", + "bitflags 2.13.0", + "core-foundation 0.9.4", "system-configuration-sys", ] [[package]] name = "system-configuration-sys" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" dependencies = [ "core-foundation-sys", "libc", @@ -2557,61 +4351,65 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.12.16" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" +checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" [[package]] -name = "termcolor" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +name = "tcl-sys" +version = "0.2.0" +source = "git+https://github.com/arihant2math/tkinter.git?tag=v0.2.0#198fc35b1f18f4eda401f97a641908f321b1403a" dependencies = [ - "winapi-util", + "pkg-config", + "shared-build", ] [[package]] -name = "termios" -version = "0.3.3" +name = "tempfile" +version = "3.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "411c5bf740737c7918b8b1fe232dca4dc9f8e754b8ad5e20966814001ed0ac6b" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ - "libc", + "fastrand", + "getrandom 0.3.4", + "once_cell", + "rustix", + "windows-sys 0.52.0", ] [[package]] -name = "textwrap" -version = "0.11.0" +name = "termios" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +checksum = "411c5bf740737c7918b8b1fe232dca4dc9f8e754b8ad5e20966814001ed0ac6b" dependencies = [ - "unicode-width", + "libc", ] [[package]] name = "textwrap" -version = "0.15.2" +version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7b3e525a49ec206798b40326a44121291b530c963cfb01018f63e135bac543d" +checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057" [[package]] name = "thiserror" -version = "1.0.64" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.64" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn", ] [[package]] @@ -2626,13 +4424,34 @@ dependencies = [ ] [[package]] -name = "thread_local" -version = "1.1.8" +name = "time" +version = "0.3.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" dependencies = [ - "cfg-if", - "once_cell", + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde_core", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", ] [[package]] @@ -2642,12 +4461,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "639ce8ef6d2ba56be0383a94dd13b92138d58de44c62618303bb798fa92bdc00" [[package]] -name = "tiny-keccak" -version = "2.0.2" +name = "tinystr" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" dependencies = [ - "crunchy", + "displaydoc", + "serde_core", + "zerovec", ] [[package]] @@ -2662,9 +4483,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.8.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" dependencies = [ "tinyvec_macros", ] @@ -2676,184 +4497,112 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] -name = "twox-hash" -version = "1.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" -dependencies = [ - "cfg-if", - "static_assertions", -] - -[[package]] -name = "typenum" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" - -[[package]] -name = "ucd" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe4fa6e588762366f1eb4991ce59ad1b93651d0b769dfb4e4d1c5c4b943d1159" - -[[package]] -name = "uname" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b72f89f0ca32e4db1c04e2a72f5345d59796d4866a1ee0609084569f73683dc8" -dependencies = [ - "libc", -] - -[[package]] -name = "unic-char-property" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221" -dependencies = [ - "unic-char-range", -] - -[[package]] -name = "unic-char-range" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc" - -[[package]] -name = "unic-common" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" - -[[package]] -name = "unic-emoji-char" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d" -dependencies = [ - "unic-char-property", - "unic-char-range", - "unic-ucd-version", -] - -[[package]] -name = "unic-normal" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f09d64d33589a94628bc2aeb037f35c2e25f3f049c7348b5aa5580b48e6bba62" +name = "tk-sys" +version = "0.2.0" +source = "git+https://github.com/arihant2math/tkinter.git?tag=v0.2.0#198fc35b1f18f4eda401f97a641908f321b1403a" dependencies = [ - "unic-ucd-normal", + "pkg-config", + "shared-build", ] [[package]] -name = "unic-ucd-age" -version = "0.9.0" +name = "tls_codec" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8cfdfe71af46b871dc6af2c24fcd360e2f3392ee4c5111877f2947f311671c" +checksum = "0de2e01245e2bb89d6f05801c564fa27624dbd7b1846859876c7dad82e90bf6b" dependencies = [ - "unic-char-property", - "unic-char-range", - "unic-ucd-version", + "tls_codec_derive", + "zeroize", ] [[package]] -name = "unic-ucd-bidi" -version = "0.9.0" +name = "tls_codec_derive" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1d568b51222484e1f8209ce48caa6b430bf352962b877d592c29ab31fb53d8c" +checksum = "2d2e76690929402faae40aebdda620a2c0e25dd6d3b9afe48867dfd95991f4bd" dependencies = [ - "unic-char-property", - "unic-char-range", - "unic-ucd-version", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "unic-ucd-category" -version = "0.9.0" +name = "toml" +version = "1.1.2+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b8d4591f5fcfe1bd4453baaf803c40e1b1e69ff8455c47620440b46efef91c0" +checksum = "81f3d15e84cbcd896376e6730314d59fb5a87f31e4b038454184435cd57defee" dependencies = [ - "matches", - "unic-char-property", - "unic-char-range", - "unic-ucd-version", + "indexmap", + "serde_core", + "serde_spanned", + "toml_datetime", + "toml_parser", + "toml_writer", + "winnow", ] [[package]] -name = "unic-ucd-hangul" -version = "0.9.0" +name = "toml_datetime" +version = "1.1.1+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb1dc690e19010e1523edb9713224cba5ef55b54894fe33424439ec9a40c0054" +checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7" dependencies = [ - "unic-ucd-version", + "serde_core", ] [[package]] -name = "unic-ucd-ident" -version = "0.9.0" +name = "toml_parser" +version = "1.1.2+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e230a37c0381caa9219d67cf063aa3a375ffed5bf541a452db16e744bdab6987" +checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" dependencies = [ - "unic-char-property", - "unic-char-range", - "unic-ucd-version", + "winnow", ] [[package]] -name = "unic-ucd-normal" -version = "0.9.0" +name = "toml_writer" +version = "1.1.1+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86aed873b8202d22b13859dda5fe7c001d271412c31d411fd9b827e030569410" -dependencies = [ - "unic-char-property", - "unic-char-range", - "unic-ucd-hangul", - "unic-ucd-version", -] +checksum = "756daf9b1013ebe47a8776667b466417e2d4c5679d441c26230efd9ef78692db" [[package]] -name = "unic-ucd-version" -version = "0.9.0" +name = "twox-hash" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4" -dependencies = [ - "unic-common", -] +checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" [[package]] -name = "unicode-casing" -version = "0.1.0" +name = "typenum" +version = "1.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "623f59e6af2a98bdafeb93fa277ac8e1e40440973001ca15cf4ae1541cd16d56" +checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" [[package]] name = "unicode-ident" -version = "1.0.13" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-normalization" -version = "0.1.24" +version = "0.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" +checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" dependencies = [ "tinyvec", ] [[package]] name = "unicode-segmentation" -version = "1.12.0" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" [[package]] name = "unicode-width" -version = "0.1.14" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" [[package]] name = "unicode-xid" @@ -2867,8 +4616,18 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d1673eca9782c84de5f81b82e4109dcfb3611c8ba0d52930ec4a9478f547b2dd" dependencies = [ - "phf", - "unicode_names2_generator", + "phf 0.11.3", + "unicode_names2_generator 1.3.0", +] + +[[package]] +name = "unicode_names2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d189085656ca1203291e965444e7f6a2723fbdd1dd9f34f8482e79bafd8338a0" +dependencies = [ + "phf 0.11.3", + "unicode_names2_generator 2.0.0", ] [[package]] @@ -2880,14 +4639,36 @@ dependencies = [ "getopts", "log", "phf_codegen", - "rand", + "rand 0.8.6", ] [[package]] -name = "unindent" -version = "0.2.3" +name = "unicode_names2_generator" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1262662dc96937c71115228ce2e1d30f41db71a7a45d3459e98783ef94052214" +dependencies = [ + "phf_codegen", + "rand 0.8.6", +] + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" [[package]] name = "utf8parse" @@ -2897,13 +4678,14 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.10.0" +version = "1.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" +checksum = "144d6b123cef80b301b8f72a9e2ca4370ddec21950d0a103dd22c437006d2db7" dependencies = [ "atomic", - "getrandom", - "rand", + "getrandom 0.4.2", + "js-sys", + "wasm-bindgen", ] [[package]] @@ -2912,24 +4694,12 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" -[[package]] -name = "vec_map" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" - [[package]] name = "version_check" version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" -[[package]] -name = "volatile" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8e76fae08f03f96e166d2dfda232190638c10e0383841252416f9cfe2ae60e6" - [[package]] name = "walkdir" version = "2.5.0" @@ -2942,103 +4712,191 @@ dependencies = [ [[package]] name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.3+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" +dependencies = [ + "wit-bindgen 0.57.1", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen 0.51.0", +] [[package]] name = "wasm-bindgen" -version = "0.2.99" +version = "0.2.120" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396" +checksum = "df52b6d9b87e0c74c9edfa1eb2d9bf85e5d63515474513aa50fa181b3c4f5db1" dependencies = [ "cfg-if", "once_cell", + "rustversion", "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af934872acec734c2d80e6617bbb5ff4f12b052dd8e6332b0817bce889516084" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.120" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b1041f495fb322e64aca85f5756b2172e35cd459376e67f2a6c9dffcedb103" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", ] [[package]] -name = "wasm-bindgen-backend" -version = "0.2.99" +name = "wasm-bindgen-macro-support" +version = "0.2.120" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79" +checksum = "9dcd0ff20416988a18ac686d4d4d0f6aae9ebf08a389ff5d29012b05af2a1b41" dependencies = [ "bumpalo", - "log", "proc-macro2", "quote", - "syn 2.0.77", + "syn", "wasm-bindgen-shared", ] [[package]] -name = "wasm-bindgen-futures" -version = "0.4.43" +name = "wasm-bindgen-shared" +version = "0.2.120" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49757b3c82ebf16c57d69365a142940b384176c24df52a087fb748e2085359ea" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags 2.13.0", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "wasmtime-internal-core" +version = "45.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "110bf85122cd451d3b9ff67f8911d428ec9b729208abe950a0333c3244660e88" +dependencies = [ + "hashbrown 0.17.0", + "libm", +] + +[[package]] +name = "wasmtime-internal-jit-icache-coherence" +version = "45.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e9300f63a621e96ed275155c108eb6f843b6a26d053f122ab69724559dc8ed" +checksum = "aa6818a4864719772680694f4e4649a8600bb5efcf71111ebaf7419b266463e8" dependencies = [ "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", + "libc", + "wasmtime-internal-core", + "windows-sys 0.61.2", ] [[package]] -name = "wasm-bindgen-macro" -version = "0.2.99" +name = "web-sys" +version = "0.3.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe" +checksum = "2eadbac71025cd7b0834f20d1fe8472e8495821b4e9801eb0a60bd1f19827602" dependencies = [ - "quote", - "wasm-bindgen-macro-support", + "js-sys", + "wasm-bindgen", ] [[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.99" +name = "webpki-root-certs" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" +checksum = "f31141ce3fc3e300ae89b78c0dd67f9708061d1d2eda54b8209346fd6be9a92c" dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.77", - "wasm-bindgen-backend", - "wasm-bindgen-shared", + "rustls-pki-types", ] [[package]] -name = "wasm-bindgen-shared" -version = "0.2.99" +name = "webpki-roots" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" +checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d" +dependencies = [ + "rustls-pki-types", +] [[package]] -name = "web-sys" -version = "0.3.70" +name = "which" +version = "8.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0" +checksum = "c789537cf2f7f55be8e6192f92e464174ee55f91af622777f7f1ceb0dbccd03e" dependencies = [ - "js-sys", - "wasm-bindgen", + "libc", ] [[package]] -name = "which" -version = "6.0.3" +name = "wide" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ee928febd44d98f2f459a4a79bd4d928591333a494a10a868418ac1b39cf1f" +checksum = "c9479f84a757f819cfab37295955906479181395de83add28f74975fde083141" dependencies = [ - "either", - "home", - "rustix", - "winsafe", + "bytemuck", + "safe_arch", ] [[package]] name = "widestring" -version = "1.1.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7219d36b6eac893fa81e84ebe06485e7dcbb616177469b142df14f1f4deb1311" +checksum = "72069c3113ab32ab29e5584db3c6ec55d416895e60715417b5b883a357c3e471" [[package]] name = "winapi" @@ -3058,11 +4916,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.9" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.52.0", ] [[package]] @@ -3072,44 +4930,62 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] -name = "windows" -version = "0.52.0" +name = "windows-core" +version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ - "windows-core", - "windows-targets 0.52.6", + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", ] [[package]] -name = "windows-core" -version = "0.52.0" +name = "windows-implement" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ - "windows-targets 0.52.6", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "windows-sys" -version = "0.36.1" +name = "windows-interface" +version = "0.59.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-result" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" dependencies = [ - "windows_aarch64_msvc 0.36.1", - "windows_i686_gnu 0.36.1", - "windows_i686_msvc 0.36.1", - "windows_x86_64_gnu 0.36.1", - "windows_x86_64_msvc 0.36.1", + "windows-link", ] [[package]] -name = "windows-sys" -version = "0.48.0" +name = "windows-strings" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" dependencies = [ - "windows-targets 0.48.5", + "windows-link", ] [[package]] @@ -3123,26 +4999,20 @@ dependencies = [ [[package]] name = "windows-sys" -version = "0.59.0" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" dependencies = [ - "windows-targets 0.52.6", + "windows-targets 0.53.5", ] [[package]] -name = "windows-targets" -version = "0.48.5" +name = "windows-sys" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", + "windows-link", ] [[package]] @@ -3154,7 +5024,7 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", @@ -3162,10 +5032,21 @@ dependencies = [ ] [[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" +name = "windows-targets" +version = "0.53.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] [[package]] name = "windows_aarch64_gnullvm" @@ -3174,16 +5055,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] -name = "windows_aarch64_msvc" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" +name = "windows_aarch64_gnullvm" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" [[package]] name = "windows_aarch64_msvc" @@ -3192,22 +5067,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] -name = "windows_i686_gnu" -version = "0.36.1" +name = "windows_aarch64_msvc" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" [[package]] name = "windows_i686_gnu" -version = "0.48.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnu" -version = "0.52.6" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" [[package]] name = "windows_i686_gnullvm" @@ -3216,110 +5091,375 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] -name = "windows_i686_msvc" -version = "0.36.1" +name = "windows_i686_gnullvm" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" [[package]] name = "windows_i686_msvc" -version = "0.48.5" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_i686_msvc" -version = "0.52.6" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" [[package]] name = "windows_x86_64_gnu" -version = "0.36.1" +version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnu" -version = "0.48.5" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" [[package]] -name = "windows_x86_64_gnu" +name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_gnullvm" -version = "0.48.5" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" [[package]] -name = "windows_x86_64_gnullvm" +name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "windows_x86_64_msvc" -version = "0.36.1" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" +name = "winnow" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +checksum = "2ee1708bef14716a11bae175f579062d4554d95be2c6829f518df847b7b3fdd0" [[package]] -name = "windows_x86_64_msvc" -version = "0.52.6" +name = "winresource" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +checksum = "0986a8b1d586b7d3e4fe3d9ea39fb451ae22869dcea4aa109d287a374d866087" +dependencies = [ + "toml", + "version_check", +] [[package]] -name = "winreg" -version = "0.52.0" +name = "wit-bindgen" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" dependencies = [ - "cfg-if", - "windows-sys 0.48.0", + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags 2.13.0", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" + +[[package]] +name = "x509-cert" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1301e935010a701ae5f8655edc0ad17c44bad3ac5ce8c39185f75453b720ae94" +dependencies = [ + "const-oid 0.9.6", + "der 0.7.10", + "sha1 0.10.6", + "signature", + "spki 0.7.3", + "tls_codec", +] + +[[package]] +name = "x509-parser" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d43b0f71ce057da06bc0851b23ee24f3f86190b07203dd8f567d0b706a185202" +dependencies = [ + "asn1-rs", + "data-encoding", + "der-parser", + "lazy_static 1.5.0", + "nom", + "oid-registry", + "rusticata-macros", + "thiserror", + "time", +] + +[[package]] +name = "xml" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "636f85e5ca6488e96401b61eb7de54f4e44755c988af0f52cf90230c312a1a89" + +[[package]] +name = "xz" +version = "0.4.6-rc.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7af8851c229f574c5e4f4e9b6d90ca06c43974a563f7f42de4b88d651bf8f19c" +dependencies = [ + "xz-core", +] + +[[package]] +name = "xz-core" +version = "0.1.0-rc.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef4d12cf9c122b2523cce22d6ab3083f4eb56d1221e5d30ddcc70fbaac553589" +dependencies = [ + "libc", + "memchr", + "windows-sys 0.61.2", +] + +[[package]] +name = "xz-sys" +version = "0.4.6-rc.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "114e3a14e5bb2d46513305741650595041b7e7e1677aa5c9715a09b7a8da08fd" +dependencies = [ + "libc", + "xz-core", ] [[package]] -name = "winsafe" -version = "0.0.19" +name = "yoke" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904" +checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] [[package]] -name = "xml-rs" -version = "0.8.22" +name = "yoke-derive" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af4e2e2f7cba5a093896c1e150fbfe177d1883e7448200efb81d40b9d339ef26" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] [[package]] name = "zerocopy" -version = "0.7.35" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" dependencies = [ - "byteorder", "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.35" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zerofrom" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" +dependencies = [ + "zeroize_derive", +] + +[[package]] +name = "zeroize_derive" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85a5b4158499876c763cb03bc4e49185d3cccbabb15b33c627f7884f43db852e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zerotrie" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "zerovec" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" +dependencies = [ + "serde", + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" dependencies = [ "proc-macro2", "quote", - "syn 2.0.77", + "syn", ] + +[[package]] +name = "zlib-rs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be3d40e40a133f9c916ee3f9f4fa2d9d63435b5fbe1bfc6d9dae0aa0ada1513" + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/Cargo.toml b/Cargo.toml index f1391d000ea..677de28ef1c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,36 +10,47 @@ repository.workspace = true license.workspace = true [features] -default = ["threading", "stdlib", "zlib", "importlib"] +capi = ["dep:rustpython-capi", "threading"] +default = ["threading", "stdlib", "stdio", "importlib", "ssl-rustls-aws-lc", "host_env"] +host_env = ["rustpython-vm/host_env", "rustpython-stdlib?/host_env"] importlib = ["rustpython-vm/importlib"] encodings = ["rustpython-vm/encodings"] +stdio = ["rustpython-vm/stdio"] stdlib = ["rustpython-stdlib", "rustpython-pylib", "encodings"] -flame-it = ["rustpython-vm/flame-it", "flame", "flamescope"] +flame-it = ["rustpython-vm/flame-it", "rustpython-stdlib/flame-it", "flame", "flamescope"] freeze-stdlib = ["stdlib", "rustpython-vm/freeze-stdlib", "rustpython-pylib?/freeze-stdlib"] jit = ["rustpython-vm/jit"] threading = ["rustpython-vm/threading", "rustpython-stdlib/threading"] -zlib = ["stdlib", "rustpython-stdlib/zlib"] -bz2 = ["stdlib", "rustpython-stdlib/bz2"] sqlite = ["rustpython-stdlib/sqlite"] -ssl = ["rustpython-stdlib/ssl"] -ssl-vendor = ["ssl", "rustpython-stdlib/ssl-vendor"] +ssl = ["host_env"] +ssl-rustls = ["ssl", "rustpython-stdlib/ssl-rustls"] +ssl-rustls-aws-lc = ["ssl-rustls", "dep:rustls", "rustls/aws_lc_rs"] +ssl-rustls-aws-lc-fips = ["ssl-rustls-aws-lc", "rustls/fips"] +ssl-openssl = ["ssl", "rustpython-stdlib/ssl-openssl"] +ssl-openssl-vendor = ["ssl-openssl", "rustpython-stdlib/ssl-openssl-vendor"] +tkinter = ["rustpython-stdlib/tkinter"] + +[build-dependencies] +winresource = "0.1" [dependencies] +rustpython-capi = { workspace = true, optional = true } rustpython-compiler = { workspace = true } rustpython-pylib = { workspace = true, optional = true } rustpython-stdlib = { workspace = true, optional = true, features = ["compiler"] } -rustpython-vm = { workspace = true, features = ["compiler"] } -rustpython-parser = { workspace = true } +rustpython-vm = { workspace = true, features = ["compiler", "gc"] } -cfg-if = { workspace = true } log = { workspace = true } flame = { workspace = true, optional = true } -clap = "2.34" -dirs = { package = "dirs-next", version = "2.0.0" } -env_logger = { version = "0.9.0", default-features = false, features = ["atty", "termcolor"] } +lexopt = "0.3" +dirs = "6" +env_logger = "0.11" flamescope = { version = "0.1.2", optional = true } +rustls = { workspace = true, optional = true } +rustls-graviola = { workspace = true, optional = true } + [target.'cfg(windows)'.dependencies] libc = { workspace = true } @@ -47,8 +58,10 @@ libc = { workspace = true } rustyline = { workspace = true } [dev-dependencies] -criterion = { version = "0.3.5", features = ["html_reports"] } -pyo3 = { version = "0.22", features = ["auto-initialize"] } +criterion = { workspace = true } +pyo3 = { workspace = true, features = ["auto-initialize"] } +rustpython-stdlib = { workspace = true } +ruff_python_parser = { workspace = true } [[bench]] name = "execution" @@ -62,6 +75,17 @@ harness = false name = "rustpython" path = "src/main.rs" +[[example]] +name = "custom_tls_providers" +path = "examples/custom_tls_providers.rs" +required-features = [ + "rustls-graviola", + "rustls/ring", + "rustpython-pylib/freeze-stdlib", + "rustpython-stdlib/ssl-rustls", + "rustpython-vm/freeze-stdlib", +] + [profile.dev.package."*"] opt-level = 3 @@ -70,6 +94,21 @@ opt-level = 3 # https://github.com/rust-lang/rust/issues/92869 # lto = "thin" +# Some crates don't change as much but benefit more from +# more expensive optimization passes, so we selectively +# decrease codegen-units in some cases. +[profile.release.package.rustpython-doc] +codegen-units = 1 + +[profile.release.package.rustpython-literal] +codegen-units = 1 + +[profile.release.package.rustpython-common] +codegen-units = 1 + +[profile.release.package.rustpython-wtf8] +codegen-units = 1 + [profile.bench] lto = "thin" codegen-units = 1 @@ -78,120 +117,322 @@ opt-level = 3 [profile.release] lto = "thin" +[profile.wasm-release] +inherits = "release" +opt-level = "s" +lto = true +codegen-units = 1 +strip = true +panic = "abort" + [patch.crates-io] +parking_lot_core = { git = "https://github.com/youknowone/parking_lot", branch = "rustpython" } # REDOX START, Uncomment when you want to compile/check with redoxer # REDOX END -# Used only on Windows to build the vcpkg dependencies -[package.metadata.vcpkg] -git = "https://github.com/microsoft/vcpkg" -# The revision of the vcpkg repository to use -# https://github.com/microsoft/vcpkg/tags -rev = "2024.02.14" +[package.metadata.packager] +product-name = "RustPython" +identifier = "com.rustpython.rustpython" +description = "An open source Python 3 interpreter written in Rust" +homepage = "https://rustpython.github.io/" +license_file = "LICENSE" +authors = ["RustPython Team"] +publisher = "RustPython Team" +resources = ["LICENSE", "README.md", "Lib"] +icons = ["32x32.png"] + +[package.metadata.packager.nsis] +installer_mode = "both" +template = "installer-config/installer.nsi" + +[package.metadata.packager.wix] +template = "installer-config/installer.wxs" -[package.metadata.vcpkg.target] -x86_64-pc-windows-msvc = { triplet = "x64-windows-static-md", dev-dependencies = ["openssl" ] } [workspace] resolver = "2" members = [ - "compiler", "compiler/core", "compiler/codegen", - ".", "common", "derive", "jit", "vm", "vm/sre_engine", "pylib", "stdlib", "derive-impl", - "wasm/lib", + ".", + "crates/*", ] +exclude = ["pymath"] [workspace.package] -version = "0.4.0" +version = "0.5.0" authors = ["RustPython Team"] -edition = "2021" -rust-version = "1.83.0" +edition = "2024" +rust-version = "1.95.0" repository = "https://github.com/RustPython/RustPython" license = "MIT" [workspace.dependencies] -rustpython-compiler-core = { path = "compiler/core", version = "0.4.0" } -rustpython-compiler = { path = "compiler", version = "0.4.0" } -rustpython-codegen = { path = "compiler/codegen", version = "0.4.0" } -rustpython-common = { path = "common", version = "0.4.0" } -rustpython-derive = { path = "derive", version = "0.4.0" } -rustpython-derive-impl = { path = "derive-impl", version = "0.4.0" } -rustpython-jit = { path = "jit", version = "0.4.0" } -rustpython-vm = { path = "vm", default-features = false, version = "0.4.0" } -rustpython-pylib = { path = "pylib", version = "0.4.0" } -rustpython-stdlib = { path = "stdlib", default-features = false, version = "0.4.0" } -rustpython-sre_engine = { path = "vm/sre_engine", version = "0.4.0" } -rustpython-doc = { git = "https://github.com/RustPython/__doc__", tag = "0.3.0", version = "0.3.0" } - -# rustpython-literal = { version = "0.4.0" } -# rustpython-parser-core = { version = "0.4.0" } -# rustpython-parser = { version = "0.4.0" } -# rustpython-ast = { version = "0.4.0" } -# rustpython-format= { version = "0.4.0" } -rustpython-literal = { git = "https://github.com/RustPython/Parser.git", version = "0.4.0", rev = "d2f137b372ec08ce4a243564a80f8f9153c45a23" } -rustpython-parser-core = { git = "https://github.com/RustPython/Parser.git", version = "0.4.0", rev = "d2f137b372ec08ce4a243564a80f8f9153c45a23" } -rustpython-parser = { git = "https://github.com/RustPython/Parser.git", version = "0.4.0", rev = "d2f137b372ec08ce4a243564a80f8f9153c45a23" } -rustpython-ast = { git = "https://github.com/RustPython/Parser.git", version = "0.4.0", rev = "d2f137b372ec08ce4a243564a80f8f9153c45a23" } -rustpython-format = { git = "https://github.com/RustPython/Parser.git", version = "0.4.0", rev = "d2f137b372ec08ce4a243564a80f8f9153c45a23" } -# rustpython-literal = { path = "../RustPython-parser/literal" } -# rustpython-parser-core = { path = "../RustPython-parser/core" } -# rustpython-parser = { path = "../RustPython-parser/parser" } -# rustpython-ast = { path = "../RustPython-parser/ast" } -# rustpython-format = { path = "../RustPython-parser/format" } - -ahash = "0.8.11" -ascii = "1.0" -bitflags = "2.4.1" +rustpython-capi = { path = "crates/capi", version = "0.5.0" } +rustpython-compiler-core = { path = "crates/compiler-core", version = "0.5.0" } +rustpython-compiler = { path = "crates/compiler", version = "0.5.0" } +rustpython-codegen = { path = "crates/codegen", version = "0.5.0" } +rustpython-common = { path = "crates/common", version = "0.5.0" } +rustpython-host_env = { path = "crates/host_env", version = "0.5.0" } +rustpython-derive = { path = "crates/derive", version = "0.5.0" } +rustpython-derive-impl = { path = "crates/derive-impl", version = "0.5.0" } +rustpython-jit = { path = "crates/jit", version = "0.5.0" } +rustpython-literal = { path = "crates/literal", version = "0.5.0" } +rustpython-vm = { path = "crates/vm", default-features = false, version = "0.5.0" } +rustpython-pylib = { path = "crates/pylib", version = "0.5.0" } +rustpython-stdlib = { path = "crates/stdlib", default-features = false, version = "0.5.0" } +rustpython-sre_engine = { path = "crates/sre_engine", version = "0.5.0" } +rustpython-wtf8 = { path = "crates/wtf8", version = "0.5.0" } +rustpython-doc = { path = "crates/doc", version = "0.5.0" } + +# Use RustPython-packaged Ruff crates from the published fork while keeping +# existing crate names in the codebase. +ruff_python_parser = { package = "rustpython-ruff_python_parser", version = "0.15.8" } +ruff_python_ast = { package = "rustpython-ruff_python_ast", version = "0.15.8" } +ruff_text_size = { package = "rustpython-ruff_text_size", version = "0.15.8" } +ruff_source_file = { package = "rustpython-ruff_source_file", version = "0.15.8" } +# To update ruff crates, comment out the above lines and uncomment the following lines to pull directly from the Ruff repository at the specified commit hash. +# Ruff tag 0.15.8 is based on commit c2a8815842f9dc5d24ec19385eae0f1a7188b0d9 +# at the time of this capture. We use the commit hash to ensure reproducible builds. +# ruff_python_parser = { git = "https://github.com/astral-sh/ruff.git", rev = "c2a8815842f9dc5d24ec19385eae0f1a7188b0d9" } +# ruff_python_ast = { git = "https://github.com/astral-sh/ruff.git", rev = "c2a8815842f9dc5d24ec19385eae0f1a7188b0d9" } +# ruff_text_size = { git = "https://github.com/astral-sh/ruff.git", rev = "c2a8815842f9dc5d24ec19385eae0f1a7188b0d9" } +# ruff_source_file = { git = "https://github.com/astral-sh/ruff.git", rev = "c2a8815842f9dc5d24ec19385eae0f1a7188b0d9" } + +der = { version = "0.8", features = ["alloc", "oid", "pem", "zeroize"] } +phf = { version = "0.13.1", default-features = false, features = ["macros"]} +adler32 = "1.2.0" +approx = "0.5.1" +ascii = "1.1" +base64 = "0.22" +blake2 = "0.11.0-rc.6" +bitflags = "2.11.0" +bitflagset = "0.0.3" bstr = "1" -cfg-if = "1.0" -chrono = "0.4.37" -crossbeam-utils = "0.8.19" +bzip2 = "0.6" +chrono = { version = "0.4.44", default-features = false, features = ["clock", "std"] } +console_error_panic_hook = "0.1" +constant_time_eq = "0.5" +cranelift = "0.132.0" +cranelift-jit = "0.132.0" +cranelift-module = "0.132.0" +crc32fast = "1.3.2" +criterion = { version = "0.8", features = ["html_reports"] } +crossbeam-utils = "0.8.21" +csv-core = "0.1.11" +digest = "0.11" +dns-lookup = "3.0" +dyn-clone = "1.0.10" +exitcode = "1.1.2" flame = "0.2.2" -getrandom = "0.2.12" +flamer = "0.5" +flate2 = { version = "1.1.9", default-features = false } +# Bump only when the openssl crate bumps it +foreign-types-shared = "0.1" +gethostname = "1.0.2" +getrandom = { version = "0.4", features = ["std"] } glob = "0.3" +half = "2" hex = "0.4.3" -indexmap = { version = "2.2.6", features = ["std"] } -insta = "1.38.0" -itertools = "0.11.0" -is-macro = "0.3.0" -junction = "1.0.0" -libc = "0.2.153" -log = "0.4.16" -nix = { version = "0.29", features = ["fs", "user", "process", "term", "time", "signal", "ioctl", "socket", "sched", "zerocopy", "dir", "hostname", "net", "poll"] } -malachite-bigint = "0.2.3" -malachite-q = "0.4.22" -malachite-base = "0.4.22" -memchr = "2.7.2" -num-complex = "0.4.0" -num-integer = "0.1.44" +hexf-parse = "0.2.1" +hmac = "0.13" +indexmap = { version = "2.14.0", features = ["std"] } +insta = "1.47" +itertools = "0.14.0" +is-macro = "0.3.7" +js-sys = "0.3" +junction = "2.0.0" +lexical-parse-float = "1.0.6" +libc = "0.2.186" +libffi = "5" +libloading = "0.9" +xz = "0.4.6-rc.0" +xz-sys = "0.4.6-rc.0" +libsqlite3-sys = "0.38" +libz-rs-sys = "0.6" +lock_api = "0.4" +log = "0.4.30" +lz4_flex = "0.13" +nix = { version = "0.31", features = ["fs", "user", "process", "term", "time", "signal", "ioctl", "socket", "sched", "zerocopy", "dir", "hostname", "net", "poll"] } +mac_address = "1.1.3" +malachite-bigint = "0.9.1" +malachite-q = "0.9.1" +malachite-base = "0.9.1" +md-5 = "0.11" +memchr = "2.8.1" +memmap2 = "0.9.10" +mt19937 = "3.3" +num-complex = "0.4.6" +num-integer = "0.1.46" num-traits = "0.2" +num_cpus = "1.17.0" num_enum = { version = "0.7", default-features = false } -once_cell = "1.19.0" -parking_lot = "0.12.1" -paste = "1.0.7" -rand = "0.8.5" -rustix = { version = "0.38", features = ["event"] } -rustyline = "14.0.0" -serde = { version = "1.0.133", default-features = false } -schannel = "0.1.22" +oid-registry = "0.8" +openssl = "0.10.80" +openssl-sys = "0.9.110" +openssl-probe = "0.2.1" +optional = "0.5" +parking_lot = "0.12.3" +paste = "1.0.15" +pbkdf2 = "0.13" +pem-rfc7468 = "1.0" +pkcs8 = "0.11" +proc-macro2 = "1.0.105" +psm = "0.1" +pymath = { version = "0.2.0", features = ["mul_add", "malachite-bigint", "complex"] } +pyo3 = "0.29" +quote = "1.0.45" +radium = "1.1.1" +rand = "0.10" +rapidhash = "4.4.1" +result-like = "0.5.0" +rustix = { version = "1.1", features = ["event", "fs", "param", "system"] } +rustls = { version = "0.23.39", default-features = false } +rustls-graviola = "0.3" +rustls-native-certs = "0.8" +rustls-pemfile = "2.2" +rustls-platform-verifier = "0.7" +rustyline = "18" +serde = { package = "serde_core", version = "1.0.225", default-features = false, features = ["alloc"] } +schannel = "0.1.29" +scopeguard = "1" +serde-wasm-bindgen = "0.6.5" +sha1 = "0.11" +sha2 = "0.11" +sha3 = "0.12" +shake = "0.1" +siphasher = "1" +socket2 = "0.6.4" static_assertions = "1.1" -strum = "0.26" -strum_macros = "0.26" -syn = "1.0.109" -thiserror = "1.0" -thread_local = "1.1.4" -unicode_names2 = "1.2.0" -widestring = "1.1.0" -windows-sys = "0.52.0" -wasm-bindgen = "0.2.92" +strum = "0.28" +strum_macros = "0.28" +syn = "2" +syn-ext = "0.5.0" +system-configuration = "0.7.0" +tcl-sys = { git = "https://github.com/arihant2math/tkinter.git", tag = "v0.2.0" } +textwrap = { version = "0.16.2", default-features = false } +termios = "0.3.3" +thiserror = "2.0" +timsort = "0.1.2" +tk-sys = { git = "https://github.com/arihant2math/tkinter.git", tag = "v0.2.0" } +icu_casemap = "2" +icu_locale = "2" +icu_properties = "2" +icu_normalizer = "2" +uuid = "1.23.2" +unicode_names2 = "2.0.0" +widestring = "1.2.0" +windows-sys = "0.61.2" +wasm-bindgen = "0.2.106" +wasm-bindgen-futures = "0.4" +web-sys = "0.3" +webpki-roots = "1.0" +which = "8" +x509-cert = "0.2.5" +x509-parser = "0.18" +xml = "1.3" +writeable = "0.6" # Lints [workspace.lints.rust] unsafe_code = "allow" +unsafe_op_in_unsafe_fn = "deny" +elided_lifetimes_in_paths = "warn" +unreachable_pub = "warn" [workspace.lints.clippy] -perf = "warn" -style = "warn" -complexity = "warn" -suspicious = "warn" -correctness = "warn" +correctness = { level = "warn", priority = -2 } +suspicious = { level = "warn", priority = -2 } +perf = { level = "warn", priority = -2 } +style = { level = "warn", priority = -2 } +complexity = { level = "warn", priority = -2 } +# pedantic = { level = "warn", priority = -2 } # TODO: Enable this + +missing_errors_doc = "allow" # Too many errors. No auto-fix available +missing_panics_doc = "allow" # Too many errors. No auto-fix available +match_same_arms = "allow" # Not always more readable +if_not_else = "allow" # Not always more readable +single_match_else = "allow" +similar_names = "allow" + +# restriction lints +alloc_instead_of_core = "warn" +cfg_not_test = "warn" +redundant_test_prefix = "warn" +std_instead_of_alloc = "warn" +std_instead_of_core = "warn" +tests_outside_test_module = "warn" + +# nursery lints to enforce gradually +collection_is_never_read = "warn" +debug_assert_with_mut_call = "warn" +derive_partial_eq_without_eq = "warn" +imprecise_flops = "warn" +iter_on_empty_collections = "warn" +iter_on_single_items = "warn" +literal_string_with_formatting_args = "warn" +needless_collect = "warn" +or_fun_call = "warn" +redundant_clone = "warn" +search_is_some = "warn" +significant_drop_in_scrutinee = "warn" +single_option_map = "warn" +trait_duplication_in_bounds = "warn" +type_repetition_in_bounds = "warn" +unnecessary_struct_initialization = "warn" +unused_peekable = "warn" +unused_rounding = "warn" +use_self = "warn" +useless_let_if_seq = "warn" + +# pedantic lints to enforce gradually +bool_to_int_with_if = "warn" +checked_conversions = "warn" +cloned_instead_of_copied = "warn" +collapsible_else_if = "warn" +comparison_chain = "warn" +copy_iterator = "warn" +doc_link_with_quotes = "warn" +duration_suboptimal_units = "warn" +elidable_lifetime_names = "warn" +enum_glob_use = "warn" +explicit_deref_methods = "warn" +explicit_into_iter_loop = "warn" +explicit_iter_loop = "warn" +filter_map_next = "warn" +flat_map_option = "warn" +format_collect = "warn" +from_iter_instead_of_collect = "warn" +inconsistent_struct_constructor = "warn" +index_refutable_slice = "warn" +inefficient_to_string = "warn" +ip_constant = "warn" +iter_filter_is_ok = "warn" +iter_filter_is_some = "warn" +large_futures = "warn" +large_types_passed_by_value = "warn" +manual_instant_elapsed = "warn" +manual_is_variant_and = "warn" +map_unwrap_or = "warn" +match_bool = "warn" +mismatching_type_param_order = "warn" +must_use_candidate = "warn" +mut_mut = "warn" +needless_bitwise_bool = "warn" +needless_for_each = "warn" +non_std_lazy_statics = "warn" +option_as_ref_cloned = "warn" +ptr_offset_by_literal = "warn" +range_minus_one = "warn" +range_plus_one = "warn" +redundant_else = "warn" +ref_option = "warn" +return_self_not_must_use = "warn" +same_functions_in_if_condition = "warn" +single_char_pattern = "warn" +trivially_copy_pass_by_ref = "warn" +unchecked_time_subtraction = "warn" +uninlined_format_args = "warn" +unnecessary_box_returns = "warn" +unnecessary_join = "warn" +unnecessary_wraps = "warn" +unnested_or_patterns = "warn" diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md deleted file mode 100644 index 7c79a011bab..00000000000 --- a/DEVELOPMENT.md +++ /dev/null @@ -1,208 +0,0 @@ -# RustPython Development Guide and Tips - -RustPython attracts developers with interest and experience in Rust, Python, -or WebAssembly. Whether you are familiar with Rust, Python, or -WebAssembly, the goal of this Development Guide is to give you the basics to -get set up for developing RustPython and contributing to this project. - -The contents of the Development Guide include: - -- [Setting up a development environment](#setting-up-a-development-environment) -- [Code style](#code-style) -- [Testing](#testing) -- [Profiling](#profiling) -- [Code organization](#code-organization) -- [Understanding internals](#understanding-internals) -- [Questions](#questions) - -## Setting up a development environment - -RustPython requires the following: - -- Rust latest stable version (e.g 1.69.0 as of Apr 20 2023) - - To check Rust version: `rustc --version` - - If you have `rustup` on your system, enter to update to the latest - stable version: `rustup update stable` - - If you do not have Rust installed, use [rustup](https://rustup.rs/) to - do so. -- CPython version 3.12 or higher - - CPython can be installed by your operating system's package manager, - from the [Python website](https://www.python.org/downloads/), or - using a third-party distribution, such as - [Anaconda](https://www.anaconda.com/distribution/). -- [macOS] In case of libffi-sys compilation error, make sure autoconf, automake, - libtool are installed - - To install with [Homebrew](https://brew.sh), enter - `brew install autoconf automake libtool` -- [Optional] The Python package, `pytest`, is used for testing Python code - snippets. To install, enter `python3 -m pip install pytest`. - -## Code style - -The Rust code style used is the default -[rustfmt](https://github.com/rust-lang/rustfmt) codestyle. Please format your -code accordingly, or run `cargo fmt` to autoformat it. We also use -[clippy](https://github.com/rust-lang/rust-clippy) to lint Rust code, which -you can check yourself with `cargo clippy`. - -Custom Python code (i.e. code not copied from CPython's standard library) should -follow the [PEP 8](https://www.python.org/dev/peps/pep-0008/) style. We also use -[ruff](https://beta.ruff.rs/docs/) to check Python code style. - -In addition to language specific tools, [cspell](https://github.com/streetsidesoftware/cspell), -a code spell checker, is used in order to ensure correct spellings for code. - -## Testing - -To test RustPython's functionality, a collection of Python snippets is located -in the `extra_tests/snippets` directory and can be run using `pytest`: - -```shell -$ cd extra_tests -$ pytest -v -``` - -Rust unit tests can be run with `cargo`: - -```shell -$ cargo test --workspace --exclude rustpython_wasm -``` - -Python unit tests can be run by compiling RustPython and running the test module: - -```shell -$ cargo run --release -- -m test -``` - -There are a few test options that are especially useful: - -- `-j ` enables parallel testing (which is a lot faster), where `` is the -number of threads to be used, ideally the same as number of cores on your CPU. -If you don't know, `-j 4` or `-j 8` are good options. -- `-v` enables verbose mode, adding additional information about the tests being -run. -- `` specifies a single test to run instead of running all tests. - -For example, to run all tests in parallel: - -```shell -$ cargo run --release -- -m test -j 4 -``` - -To run only `test_cmath` (located at `Lib/test/test_cmath`) verbosely: - -```shell -$ cargo run --release -- -m test test_cmath -v -``` - -## Profiling - -To profile RustPython, build it in `release` mode with the `flame-it` feature. -This will generate a file `flamescope.json`, which can be viewed at -https://speedscope.app. - -```shell -$ cargo run --release --features flame-it script.py -$ cat flamescope.json -{} -``` - -You can specify another file name other than the default by using the -`--output-file` option to specify a file name (or `stdout` if you specify `-`). -The `--output-format` option determines the format of the output file. -The speedscope json format (default), text, or raw html can be passed. There -exists a raw html viewer which is currently broken, and we welcome a PR to fix it. - -## Code organization - -Understanding a new codebase takes time. Here's a brief view of the -repository's structure: - -- `compiler/src`: python compilation to bytecode - - `core/src`: python bytecode representation in rust structures - - `parser/src`: python lexing, parsing and ast -- `derive/src`: Rust language extensions and macros specific to rustpython -- `Lib`: Carefully selected / copied files from CPython sourcecode. This is - the python side of the standard library. - - `test`: CPython test suite -- `vm/src`: python virtual machine - - `builtins`: Builtin functions and types - - `stdlib`: Standard library parts implemented in rust. -- `src`: using the other subcrates to bring rustpython to life. -- `wasm`: Binary crate and resources for WebAssembly build -- `extra_tests`: extra integration test snippets as a supplement to `Lib/test` - -## Understanding Internals - -The RustPython workspace includes the `rustpython` top-level crate. The `Cargo.toml` -file in the root of the repo provide configuration of the crate and the -implementation is found in the `src` directory (specifically, `src/lib.rs`). - -The top-level `rustpython` binary depends on several lower-level crates including: - -- `rustpython-parser` (implementation in `compiler/parser/src`) -- `rustpython-compiler` (implementation in `compiler/src`) -- `rustpython-vm` (implementation in `vm/src`) - -Together, these crates provide the functions of a programming language and -enable a line of code to go through a series of steps: - -- parse the line of source code into tokens -- determine if the tokens are valid syntax -- create an Abstract Syntax Tree (AST) -- compile the AST into bytecode -- execute the bytecode in the virtual machine (VM). - -### rustpython-parser - -This crate contains the lexer and parser to convert a line of code to -an Abstract Syntax Tree (AST): - -- Lexer: `compiler/parser/src/lexer.rs` converts Python source code into tokens -- Parser: `compiler/parser/src/parser.rs` takes the tokens generated by the lexer and parses - the tokens into an AST (Abstract Syntax Tree) where the nodes of the syntax - tree are Rust structs and enums. - - The Parser relies on `LALRPOP`, a Rust parser generator framework. The - LALRPOP definition of Python's grammar is in `compiler/parser/src/python.lalrpop`. - - More information on parsers and a tutorial can be found in the - [LALRPOP book](https://lalrpop.github.io/lalrpop/). -- AST: `compiler/ast/` implements in Rust the Python types and expressions - represented by the AST nodes. - -### rustpython-compiler - -The `rustpython-compiler` crate's purpose is to transform the AST (Abstract Syntax -Tree) to bytecode. The implementation of the compiler is found in the -`compiler/src` directory. The compiler implements Python's symbol table, -ast->bytecode compiler, and bytecode optimizer in Rust. - -Implementation of bytecode structure in Rust is found in the `compiler/core/src` -directory. `compiler/core/src/bytecode.rs` contains the representation of -instructions and operations in Rust. Further information about Python's -bytecode instructions can be found in the -[Python documentation](https://docs.python.org/3/library/dis.html#bytecodes). - -### rustpython-vm - -The `rustpython-vm` crate has the important job of running the virtual machine that -executes Python's instructions. The `vm/src` directory contains code to -implement the read and evaluation loop that fetches and dispatches -instructions. This directory also contains the implementation of the -Python Standard Library modules in Rust (`vm/src/stdlib`). In Python -everything can be represented as an object. The `vm/src/builtins` directory holds -the Rust code used to represent different Python objects and their methods. The -core implementation of what a Python object is can be found in -`vm/src/object/core.rs`. - -### Code generation - -There are some code generations involved in building RustPython: - -- some part of the AST code is generated from `vm/src/stdlib/ast/gen.rs` to `compiler/ast/src/ast_gen.rs`. -- the `__doc__` attributes are generated by the - [__doc__](https://github.com/RustPython/__doc__) project which is then included as the `rustpython-doc` crate. - -## Questions - -Have you tried these steps and have a question, please chat with us on -[Discord](https://discord.gg/vru8NypEhv). diff --git a/LICENSE b/LICENSE index 7213274e0f0..78442e7c9b2 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2020 RustPython Team +Copyright (c) 2026 RustPython Team Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Lib/_aix_support.py b/Lib/_aix_support.py new file mode 100644 index 00000000000..dadc75c2bf4 --- /dev/null +++ b/Lib/_aix_support.py @@ -0,0 +1,108 @@ +"""Shared AIX support functions.""" + +import sys +import sysconfig + + +# Taken from _osx_support _read_output function +def _read_cmd_output(commandstring, capture_stderr=False): + """Output from successful command execution or None""" + # Similar to os.popen(commandstring, "r").read(), + # but without actually using os.popen because that + # function is not usable during python bootstrap. + import os + import contextlib + fp = open("/tmp/_aix_support.%s"%( + os.getpid(),), "w+b") + + with contextlib.closing(fp) as fp: + if capture_stderr: + cmd = "%s >'%s' 2>&1" % (commandstring, fp.name) + else: + cmd = "%s 2>/dev/null >'%s'" % (commandstring, fp.name) + return fp.read() if not os.system(cmd) else None + + +def _aix_tag(vrtl, bd): + # type: (List[int], int) -> str + # Infer the ABI bitwidth from maxsize (assuming 64 bit as the default) + _sz = 32 if sys.maxsize == (2**31-1) else 64 + _bd = bd if bd != 0 else 9988 + # vrtl[version, release, technology_level] + return "aix-{:1x}{:1d}{:02d}-{:04d}-{}".format(vrtl[0], vrtl[1], vrtl[2], _bd, _sz) + + +# extract version, release and technology level from a VRMF string +def _aix_vrtl(vrmf): + # type: (str) -> List[int] + v, r, tl = vrmf.split(".")[:3] + return [int(v[-1]), int(r), int(tl)] + + +def _aix_bos_rte(): + # type: () -> Tuple[str, int] + """ + Return a Tuple[str, int] e.g., ['7.1.4.34', 1806] + The fileset bos.rte represents the current AIX run-time level. It's VRMF and + builddate reflect the current ABI levels of the runtime environment. + If no builddate is found give a value that will satisfy pep425 related queries + """ + # All AIX systems to have lslpp installed in this location + # subprocess may not be available during python bootstrap + try: + import subprocess + out = subprocess.check_output(["/usr/bin/lslpp", "-Lqc", "bos.rte"]) + except ImportError: + out = _read_cmd_output("/usr/bin/lslpp -Lqc bos.rte") + out = out.decode("utf-8") + out = out.strip().split(":") # type: ignore + _bd = int(out[-1]) if out[-1] != '' else 9988 + return (str(out[2]), _bd) + + +def aix_platform(): + # type: () -> str + """ + AIX filesets are identified by four decimal values: V.R.M.F. + V (version) and R (release) can be retrieved using ``uname`` + Since 2007, starting with AIX 5.3 TL7, the M value has been + included with the fileset bos.rte and represents the Technology + Level (TL) of AIX. The F (Fix) value also increases, but is not + relevant for comparing releases and binary compatibility. + For binary compatibility the so-called builddate is needed. + Again, the builddate of an AIX release is associated with bos.rte. + AIX ABI compatibility is described as guaranteed at: https://www.ibm.com/\ + support/knowledgecenter/en/ssw_aix_72/install/binary_compatability.html + + For pep425 purposes the AIX platform tag becomes: + "aix-{:1x}{:1d}{:02d}-{:04d}-{}".format(v, r, tl, builddate, bitsize) + e.g., "aix-6107-1415-32" for AIX 6.1 TL7 bd 1415, 32-bit + and, "aix-6107-1415-64" for AIX 6.1 TL7 bd 1415, 64-bit + """ + vrmf, bd = _aix_bos_rte() + return _aix_tag(_aix_vrtl(vrmf), bd) + + +# extract vrtl from the BUILD_GNU_TYPE as an int +def _aix_bgt(): + # type: () -> List[int] + gnu_type = sysconfig.get_config_var("BUILD_GNU_TYPE") + if not gnu_type: + raise ValueError("BUILD_GNU_TYPE is not defined") + return _aix_vrtl(vrmf=gnu_type) + + +def aix_buildtag(): + # type: () -> str + """ + Return the platform_tag of the system Python was built on. + """ + # AIX_BUILDDATE is defined by configure with: + # lslpp -Lcq bos.rte | awk -F: '{ print $NF }' + build_date = sysconfig.get_config_var("AIX_BUILDDATE") + try: + build_date = int(build_date) + except (ValueError, TypeError): + raise ValueError(f"AIX_BUILDDATE is not defined or invalid: " + f"{build_date!r}") + return _aix_tag(_aix_bgt(), build_date) diff --git a/Lib/_android_support.py b/Lib/_android_support.py new file mode 100644 index 00000000000..320dab52acd --- /dev/null +++ b/Lib/_android_support.py @@ -0,0 +1,192 @@ +import io +import sys +from threading import RLock +from time import sleep, time + +# The maximum length of a log message in bytes, including the level marker and +# tag, is defined as LOGGER_ENTRY_MAX_PAYLOAD at +# https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log.h;l=71. +# Messages longer than this will be truncated by logcat. This limit has already +# been reduced at least once in the history of Android (from 4076 to 4068 between +# API level 23 and 26), so leave some headroom. +MAX_BYTES_PER_WRITE = 4000 + +# UTF-8 uses a maximum of 4 bytes per character, so limiting text writes to this +# size ensures that we can always avoid exceeding MAX_BYTES_PER_WRITE. +# However, if the actual number of bytes per character is smaller than that, +# then we may still join multiple consecutive text writes into binary +# writes containing a larger number of characters. +MAX_CHARS_PER_WRITE = MAX_BYTES_PER_WRITE // 4 + + +# When embedded in an app on current versions of Android, there's no easy way to +# monitor the C-level stdout and stderr. The testbed comes with a .c file to +# redirect them to the system log using a pipe, but that wouldn't be convenient +# or appropriate for all apps. So we redirect at the Python level instead. +def init_streams(android_log_write, stdout_prio, stderr_prio): + if sys.executable: + return # Not embedded in an app. + + global logcat + logcat = Logcat(android_log_write) + sys.stdout = TextLogStream(stdout_prio, "python.stdout", sys.stdout) + sys.stderr = TextLogStream(stderr_prio, "python.stderr", sys.stderr) + + +class TextLogStream(io.TextIOWrapper): + def __init__(self, prio, tag, original=None, **kwargs): + # Respect the -u option. + if original: + kwargs.setdefault("write_through", original.write_through) + fileno = original.fileno() + else: + fileno = None + + # The default is surrogateescape for stdout and backslashreplace for + # stderr, but in the context of an Android log, readability is more + # important than reversibility. + kwargs.setdefault("encoding", "UTF-8") + kwargs.setdefault("errors", "backslashreplace") + + super().__init__(BinaryLogStream(prio, tag, fileno), **kwargs) + self._lock = RLock() + self._pending_bytes = [] + self._pending_bytes_count = 0 + + def __repr__(self): + return f"" + + def write(self, s): + if not isinstance(s, str): + raise TypeError( + f"write() argument must be str, not {type(s).__name__}") + + # In case `s` is a str subclass that writes itself to stdout or stderr + # when we call its methods, convert it to an actual str. + s = str.__str__(s) + + # We want to emit one log message per line wherever possible, so split + # the string into lines first. Note that "".splitlines() == [], so + # nothing will be logged for an empty string. + with self._lock: + for line in s.splitlines(keepends=True): + while line: + chunk = line[:MAX_CHARS_PER_WRITE] + line = line[MAX_CHARS_PER_WRITE:] + self._write_chunk(chunk) + + return len(s) + + # The size and behavior of TextIOWrapper's buffer is not part of its public + # API, so we handle buffering ourselves to avoid truncation. + def _write_chunk(self, s): + b = s.encode(self.encoding, self.errors) + if self._pending_bytes_count + len(b) > MAX_BYTES_PER_WRITE: + self.flush() + + self._pending_bytes.append(b) + self._pending_bytes_count += len(b) + if ( + self.write_through + or b.endswith(b"\n") + or self._pending_bytes_count > MAX_BYTES_PER_WRITE + ): + self.flush() + + def flush(self): + with self._lock: + self.buffer.write(b"".join(self._pending_bytes)) + self._pending_bytes.clear() + self._pending_bytes_count = 0 + + # Since this is a line-based logging system, line buffering cannot be turned + # off, i.e. a newline always causes a flush. + @property + def line_buffering(self): + return True + + +class BinaryLogStream(io.RawIOBase): + def __init__(self, prio, tag, fileno=None): + self.prio = prio + self.tag = tag + self._fileno = fileno + + def __repr__(self): + return f"" + + def writable(self): + return True + + def write(self, b): + if type(b) is not bytes: + try: + b = bytes(memoryview(b)) + except TypeError: + raise TypeError( + f"write() argument must be bytes-like, not {type(b).__name__}" + ) from None + + # Writing an empty string to the stream should have no effect. + if b: + logcat.write(self.prio, self.tag, b) + return len(b) + + # This is needed by the test suite --timeout option, which uses faulthandler. + def fileno(self): + if self._fileno is None: + raise io.UnsupportedOperation("fileno") + return self._fileno + + +# When a large volume of data is written to logcat at once, e.g. when a test +# module fails in --verbose3 mode, there's a risk of overflowing logcat's own +# buffer and losing messages. We avoid this by imposing a rate limit using the +# token bucket algorithm, based on a conservative estimate of how fast `adb +# logcat` can consume data. +MAX_BYTES_PER_SECOND = 1024 * 1024 + +# The logcat buffer size of a device can be determined by running `logcat -g`. +# We set the token bucket size to half of the buffer size of our current minimum +# API level, because other things on the system will be producing messages as +# well. +BUCKET_SIZE = 128 * 1024 + +# https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log_read.h;l=39 +PER_MESSAGE_OVERHEAD = 28 + + +class Logcat: + def __init__(self, android_log_write): + self.android_log_write = android_log_write + self._lock = RLock() + self._bucket_level = 0 + self._prev_write_time = time() + + def write(self, prio, tag, message): + # Encode null bytes using "modified UTF-8" to avoid them truncating the + # message. + message = message.replace(b"\x00", b"\xc0\x80") + + # On API level 30 and higher, Logcat will strip any number of leading + # newlines. This is visible in all `logcat` modes, even --binary. Work + # around this by adding a leading space, which shouldn't make any + # difference to the log's usability. + if message.startswith(b"\n"): + message = b" " + message + + with self._lock: + now = time() + self._bucket_level += ( + (now - self._prev_write_time) * MAX_BYTES_PER_SECOND) + + # If the bucket level is still below zero, the clock must have gone + # backwards, so reset it to zero and continue. + self._bucket_level = max(0, min(self._bucket_level, BUCKET_SIZE)) + self._prev_write_time = now + + self._bucket_level -= PER_MESSAGE_OVERHEAD + len(tag) + len(message) + if self._bucket_level < 0: + sleep(-self._bucket_level / MAX_BYTES_PER_SECOND) + + self.android_log_write(prio, tag, message) diff --git a/Lib/_apple_support.py b/Lib/_apple_support.py new file mode 100644 index 00000000000..92febdcf587 --- /dev/null +++ b/Lib/_apple_support.py @@ -0,0 +1,66 @@ +import io +import sys + + +def init_streams(log_write, stdout_level, stderr_level): + # Redirect stdout and stderr to the Apple system log. This method is + # invoked by init_apple_streams() (initconfig.c) if config->use_system_logger + # is enabled. + sys.stdout = SystemLog(log_write, stdout_level, errors=sys.stderr.errors) + sys.stderr = SystemLog(log_write, stderr_level, errors=sys.stderr.errors) + + +class SystemLog(io.TextIOWrapper): + def __init__(self, log_write, level, **kwargs): + kwargs.setdefault("encoding", "UTF-8") + kwargs.setdefault("line_buffering", True) + super().__init__(LogStream(log_write, level), **kwargs) + + def __repr__(self): + return f"" + + def write(self, s): + if not isinstance(s, str): + raise TypeError( + f"write() argument must be str, not {type(s).__name__}") + + # In case `s` is a str subclass that writes itself to stdout or stderr + # when we call its methods, convert it to an actual str. + s = str.__str__(s) + + # We want to emit one log message per line, so split + # the string before sending it to the superclass. + for line in s.splitlines(keepends=True): + super().write(line) + + return len(s) + + +class LogStream(io.RawIOBase): + def __init__(self, log_write, level): + self.log_write = log_write + self.level = level + + def __repr__(self): + return f"" + + def writable(self): + return True + + def write(self, b): + if type(b) is not bytes: + try: + b = bytes(memoryview(b)) + except TypeError: + raise TypeError( + f"write() argument must be bytes-like, not {type(b).__name__}" + ) from None + + # Writing an empty string to the stream should have no effect. + if b: + # Encode null bytes using "modified UTF-8" to avoid truncating the + # message. This should not affect the return value, as the caller + # may be expecting it to match the length of the input. + self.log_write(self.level, b.replace(b"\x00", b"\xc0\x80")) + + return len(b) diff --git a/Lib/_ast_unparse.py b/Lib/_ast_unparse.py new file mode 100644 index 00000000000..1c8741b5a55 --- /dev/null +++ b/Lib/_ast_unparse.py @@ -0,0 +1,1161 @@ +# This module contains ``ast.unparse()``, defined here +# to improve the import time for the ``ast`` module. +import sys +from _ast import * +from ast import NodeVisitor +from contextlib import contextmanager, nullcontext +from enum import IntEnum, auto, _simple_enum + +# Large float and imaginary literals get turned into infinities in the AST. +# We unparse those infinities to INFSTR. +_INFSTR = "1e" + repr(sys.float_info.max_10_exp + 1) + +@_simple_enum(IntEnum) +class _Precedence: + """Precedence table that originated from python grammar.""" + + NAMED_EXPR = auto() # := + TUPLE = auto() # , + YIELD = auto() # 'yield', 'yield from' + TEST = auto() # 'if'-'else', 'lambda' + OR = auto() # 'or' + AND = auto() # 'and' + NOT = auto() # 'not' + CMP = auto() # '<', '>', '==', '>=', '<=', '!=', + # 'in', 'not in', 'is', 'is not' + EXPR = auto() + BOR = EXPR # '|' + BXOR = auto() # '^' + BAND = auto() # '&' + SHIFT = auto() # '<<', '>>' + ARITH = auto() # '+', '-' + TERM = auto() # '*', '@', '/', '%', '//' + FACTOR = auto() # unary '+', '-', '~' + POWER = auto() # '**' + AWAIT = auto() # 'await' + ATOM = auto() + + def next(self): + try: + return self.__class__(self + 1) + except ValueError: + return self + + +_SINGLE_QUOTES = ("'", '"') +_MULTI_QUOTES = ('"""', "'''") +_ALL_QUOTES = (*_SINGLE_QUOTES, *_MULTI_QUOTES) + +class Unparser(NodeVisitor): + """Methods in this class recursively traverse an AST and + output source code for the abstract syntax; original formatting + is disregarded.""" + + def __init__(self): + self._source = [] + self._precedences = {} + self._type_ignores = {} + self._indent = 0 + self._in_try_star = False + self._in_interactive = False + + def interleave(self, inter, f, seq): + """Call f on each item in seq, calling inter() in between.""" + seq = iter(seq) + try: + f(next(seq)) + except StopIteration: + pass + else: + for x in seq: + inter() + f(x) + + def items_view(self, traverser, items): + """Traverse and separate the given *items* with a comma and append it to + the buffer. If *items* is a single item sequence, a trailing comma + will be added.""" + if len(items) == 1: + traverser(items[0]) + self.write(",") + else: + self.interleave(lambda: self.write(", "), traverser, items) + + def maybe_newline(self): + """Adds a newline if it isn't the start of generated source""" + if self._source: + self.write("\n") + + def maybe_semicolon(self): + """Adds a "; " delimiter if it isn't the start of generated source""" + if self._source: + self.write("; ") + + def fill(self, text="", *, allow_semicolon=True): + """Indent a piece of text and append it, according to the current + indentation level, or only delineate with semicolon if applicable""" + if self._in_interactive and not self._indent and allow_semicolon: + self.maybe_semicolon() + self.write(text) + else: + self.maybe_newline() + self.write(" " * self._indent + text) + + def write(self, *text): + """Add new source parts""" + self._source.extend(text) + + @contextmanager + def buffered(self, buffer = None): + if buffer is None: + buffer = [] + + original_source = self._source + self._source = buffer + yield buffer + self._source = original_source + + @contextmanager + def block(self, *, extra = None): + """A context manager for preparing the source for blocks. It adds + the character':', increases the indentation on enter and decreases + the indentation on exit. If *extra* is given, it will be directly + appended after the colon character. + """ + self.write(":") + if extra: + self.write(extra) + self._indent += 1 + yield + self._indent -= 1 + + @contextmanager + def delimit(self, start, end): + """A context manager for preparing the source for expressions. It adds + *start* to the buffer and enters, after exit it adds *end*.""" + + self.write(start) + yield + self.write(end) + + def delimit_if(self, start, end, condition): + if condition: + return self.delimit(start, end) + else: + return nullcontext() + + def require_parens(self, precedence, node): + """Shortcut to adding precedence related parens""" + return self.delimit_if("(", ")", self.get_precedence(node) > precedence) + + def get_precedence(self, node): + return self._precedences.get(node, _Precedence.TEST) + + def set_precedence(self, precedence, *nodes): + for node in nodes: + self._precedences[node] = precedence + + def get_raw_docstring(self, node): + """If a docstring node is found in the body of the *node* parameter, + return that docstring node, None otherwise. + + Logic mirrored from ``_PyAST_GetDocString``.""" + if not isinstance( + node, (AsyncFunctionDef, FunctionDef, ClassDef, Module) + ) or len(node.body) < 1: + return None + node = node.body[0] + if not isinstance(node, Expr): + return None + node = node.value + if isinstance(node, Constant) and isinstance(node.value, str): + return node + + def get_type_comment(self, node): + comment = self._type_ignores.get(node.lineno) or node.type_comment + if comment is not None: + return f" # type: {comment}" + + def traverse(self, node): + if isinstance(node, list): + for item in node: + self.traverse(item) + else: + super().visit(node) + + # Note: as visit() resets the output text, do NOT rely on + # NodeVisitor.generic_visit to handle any nodes (as it calls back in to + # the subclass visit() method, which resets self._source to an empty list) + def visit(self, node): + """Outputs a source code string that, if converted back to an ast + (using ast.parse) will generate an AST equivalent to *node*""" + self._source = [] + self.traverse(node) + return "".join(self._source) + + def _write_docstring_and_traverse_body(self, node): + if (docstring := self.get_raw_docstring(node)): + self._write_docstring(docstring) + self.traverse(node.body[1:]) + else: + self.traverse(node.body) + + def visit_Module(self, node): + self._type_ignores = { + ignore.lineno: f"ignore{ignore.tag}" + for ignore in node.type_ignores + } + try: + self._write_docstring_and_traverse_body(node) + finally: + self._type_ignores.clear() + + def visit_Interactive(self, node): + self._in_interactive = True + try: + self._write_docstring_and_traverse_body(node) + finally: + self._in_interactive = False + + def visit_FunctionType(self, node): + with self.delimit("(", ")"): + self.interleave( + lambda: self.write(", "), self.traverse, node.argtypes + ) + + self.write(" -> ") + self.traverse(node.returns) + + def visit_Expr(self, node): + self.fill() + self.set_precedence(_Precedence.YIELD, node.value) + self.traverse(node.value) + + def visit_NamedExpr(self, node): + with self.require_parens(_Precedence.NAMED_EXPR, node): + self.set_precedence(_Precedence.ATOM, node.target, node.value) + self.traverse(node.target) + self.write(" := ") + self.traverse(node.value) + + def visit_Import(self, node): + self.fill("import ") + self.interleave(lambda: self.write(", "), self.traverse, node.names) + + def visit_ImportFrom(self, node): + self.fill("from ") + self.write("." * (node.level or 0)) + if node.module: + self.write(node.module) + self.write(" import ") + self.interleave(lambda: self.write(", "), self.traverse, node.names) + + def visit_Assign(self, node): + self.fill() + for target in node.targets: + self.set_precedence(_Precedence.TUPLE, target) + self.traverse(target) + self.write(" = ") + self.traverse(node.value) + if type_comment := self.get_type_comment(node): + self.write(type_comment) + + def visit_AugAssign(self, node): + self.fill() + self.traverse(node.target) + self.write(" " + self.binop[node.op.__class__.__name__] + "= ") + self.traverse(node.value) + + def visit_AnnAssign(self, node): + self.fill() + with self.delimit_if("(", ")", not node.simple and isinstance(node.target, Name)): + self.traverse(node.target) + self.write(": ") + self.traverse(node.annotation) + if node.value: + self.write(" = ") + self.traverse(node.value) + + def visit_Return(self, node): + self.fill("return") + if node.value: + self.write(" ") + self.traverse(node.value) + + def visit_Pass(self, node): + self.fill("pass") + + def visit_Break(self, node): + self.fill("break") + + def visit_Continue(self, node): + self.fill("continue") + + def visit_Delete(self, node): + self.fill("del ") + self.interleave(lambda: self.write(", "), self.traverse, node.targets) + + def visit_Assert(self, node): + self.fill("assert ") + self.traverse(node.test) + if node.msg: + self.write(", ") + self.traverse(node.msg) + + def visit_Global(self, node): + self.fill("global ") + self.interleave(lambda: self.write(", "), self.write, node.names) + + def visit_Nonlocal(self, node): + self.fill("nonlocal ") + self.interleave(lambda: self.write(", "), self.write, node.names) + + def visit_Await(self, node): + with self.require_parens(_Precedence.AWAIT, node): + self.write("await") + if node.value: + self.write(" ") + self.set_precedence(_Precedence.ATOM, node.value) + self.traverse(node.value) + + def visit_Yield(self, node): + with self.require_parens(_Precedence.YIELD, node): + self.write("yield") + if node.value: + self.write(" ") + self.set_precedence(_Precedence.ATOM, node.value) + self.traverse(node.value) + + def visit_YieldFrom(self, node): + with self.require_parens(_Precedence.YIELD, node): + self.write("yield from ") + if not node.value: + raise ValueError("Node can't be used without a value attribute.") + self.set_precedence(_Precedence.ATOM, node.value) + self.traverse(node.value) + + def visit_Raise(self, node): + self.fill("raise") + if not node.exc: + if node.cause: + raise ValueError(f"Node can't use cause without an exception.") + return + self.write(" ") + self.traverse(node.exc) + if node.cause: + self.write(" from ") + self.traverse(node.cause) + + def do_visit_try(self, node): + self.fill("try", allow_semicolon=False) + with self.block(): + self.traverse(node.body) + for ex in node.handlers: + self.traverse(ex) + if node.orelse: + self.fill("else", allow_semicolon=False) + with self.block(): + self.traverse(node.orelse) + if node.finalbody: + self.fill("finally", allow_semicolon=False) + with self.block(): + self.traverse(node.finalbody) + + def visit_Try(self, node): + prev_in_try_star = self._in_try_star + try: + self._in_try_star = False + self.do_visit_try(node) + finally: + self._in_try_star = prev_in_try_star + + def visit_TryStar(self, node): + prev_in_try_star = self._in_try_star + try: + self._in_try_star = True + self.do_visit_try(node) + finally: + self._in_try_star = prev_in_try_star + + def visit_ExceptHandler(self, node): + self.fill("except*" if self._in_try_star else "except", allow_semicolon=False) + if node.type: + self.write(" ") + self.traverse(node.type) + if node.name: + self.write(" as ") + self.write(node.name) + with self.block(): + self.traverse(node.body) + + def visit_ClassDef(self, node): + self.maybe_newline() + for deco in node.decorator_list: + self.fill("@", allow_semicolon=False) + self.traverse(deco) + self.fill("class " + node.name, allow_semicolon=False) + if hasattr(node, "type_params"): + self._type_params_helper(node.type_params) + with self.delimit_if("(", ")", condition = node.bases or node.keywords): + comma = False + for e in node.bases: + if comma: + self.write(", ") + else: + comma = True + self.traverse(e) + for e in node.keywords: + if comma: + self.write(", ") + else: + comma = True + self.traverse(e) + + with self.block(): + self._write_docstring_and_traverse_body(node) + + def visit_FunctionDef(self, node): + self._function_helper(node, "def") + + def visit_AsyncFunctionDef(self, node): + self._function_helper(node, "async def") + + def _function_helper(self, node, fill_suffix): + self.maybe_newline() + for deco in node.decorator_list: + self.fill("@", allow_semicolon=False) + self.traverse(deco) + def_str = fill_suffix + " " + node.name + self.fill(def_str, allow_semicolon=False) + if hasattr(node, "type_params"): + self._type_params_helper(node.type_params) + with self.delimit("(", ")"): + self.traverse(node.args) + if node.returns: + self.write(" -> ") + self.traverse(node.returns) + with self.block(extra=self.get_type_comment(node)): + self._write_docstring_and_traverse_body(node) + + def _type_params_helper(self, type_params): + if type_params is not None and len(type_params) > 0: + with self.delimit("[", "]"): + self.interleave(lambda: self.write(", "), self.traverse, type_params) + + def visit_TypeVar(self, node): + self.write(node.name) + if node.bound: + self.write(": ") + self.traverse(node.bound) + if node.default_value: + self.write(" = ") + self.traverse(node.default_value) + + def visit_TypeVarTuple(self, node): + self.write("*" + node.name) + if node.default_value: + self.write(" = ") + self.traverse(node.default_value) + + def visit_ParamSpec(self, node): + self.write("**" + node.name) + if node.default_value: + self.write(" = ") + self.traverse(node.default_value) + + def visit_TypeAlias(self, node): + self.fill("type ") + self.traverse(node.name) + self._type_params_helper(node.type_params) + self.write(" = ") + self.traverse(node.value) + + def visit_For(self, node): + self._for_helper("for ", node) + + def visit_AsyncFor(self, node): + self._for_helper("async for ", node) + + def _for_helper(self, fill, node): + self.fill(fill, allow_semicolon=False) + self.set_precedence(_Precedence.TUPLE, node.target) + self.traverse(node.target) + self.write(" in ") + self.traverse(node.iter) + with self.block(extra=self.get_type_comment(node)): + self.traverse(node.body) + if node.orelse: + self.fill("else", allow_semicolon=False) + with self.block(): + self.traverse(node.orelse) + + def visit_If(self, node): + self.fill("if ", allow_semicolon=False) + self.traverse(node.test) + with self.block(): + self.traverse(node.body) + # collapse nested ifs into equivalent elifs. + while node.orelse and len(node.orelse) == 1 and isinstance(node.orelse[0], If): + node = node.orelse[0] + self.fill("elif ", allow_semicolon=False) + self.traverse(node.test) + with self.block(): + self.traverse(node.body) + # final else + if node.orelse: + self.fill("else", allow_semicolon=False) + with self.block(): + self.traverse(node.orelse) + + def visit_While(self, node): + self.fill("while ", allow_semicolon=False) + self.traverse(node.test) + with self.block(): + self.traverse(node.body) + if node.orelse: + self.fill("else", allow_semicolon=False) + with self.block(): + self.traverse(node.orelse) + + def visit_With(self, node): + self.fill("with ", allow_semicolon=False) + self.interleave(lambda: self.write(", "), self.traverse, node.items) + with self.block(extra=self.get_type_comment(node)): + self.traverse(node.body) + + def visit_AsyncWith(self, node): + self.fill("async with ", allow_semicolon=False) + self.interleave(lambda: self.write(", "), self.traverse, node.items) + with self.block(extra=self.get_type_comment(node)): + self.traverse(node.body) + + def _str_literal_helper( + self, string, *, quote_types=_ALL_QUOTES, escape_special_whitespace=False + ): + """Helper for writing string literals, minimizing escapes. + Returns the tuple (string literal to write, possible quote types). + """ + def escape_char(c): + # \n and \t are non-printable, but we only escape them if + # escape_special_whitespace is True + if not escape_special_whitespace and c in "\n\t": + return c + # Always escape backslashes and other non-printable characters + if c == "\\" or not c.isprintable(): + return c.encode("unicode_escape").decode("ascii") + return c + + escaped_string = "".join(map(escape_char, string)) + possible_quotes = quote_types + if "\n" in escaped_string: + possible_quotes = [q for q in possible_quotes if q in _MULTI_QUOTES] + possible_quotes = [q for q in possible_quotes if q not in escaped_string] + if not possible_quotes: + # If there aren't any possible_quotes, fallback to using repr + # on the original string. Try to use a quote from quote_types, + # e.g., so that we use triple quotes for docstrings. + string = repr(string) + quote = next((q for q in quote_types if string[0] in q), string[0]) + return string[1:-1], [quote] + if escaped_string: + # Sort so that we prefer '''"''' over """\"""" + possible_quotes.sort(key=lambda q: q[0] == escaped_string[-1]) + # If we're using triple quotes and we'd need to escape a final + # quote, escape it + if possible_quotes[0][0] == escaped_string[-1]: + assert len(possible_quotes[0]) == 3 + escaped_string = escaped_string[:-1] + "\\" + escaped_string[-1] + return escaped_string, possible_quotes + + def _write_str_avoiding_backslashes(self, string, *, quote_types=_ALL_QUOTES): + """Write string literal value with a best effort attempt to avoid backslashes.""" + string, quote_types = self._str_literal_helper(string, quote_types=quote_types) + quote_type = quote_types[0] + self.write(f"{quote_type}{string}{quote_type}") + + def _ftstring_helper(self, parts): + new_parts = [] + quote_types = list(_ALL_QUOTES) + fallback_to_repr = False + for value, is_constant in parts: + if is_constant: + value, new_quote_types = self._str_literal_helper( + value, + quote_types=quote_types, + escape_special_whitespace=True, + ) + if set(new_quote_types).isdisjoint(quote_types): + fallback_to_repr = True + break + quote_types = new_quote_types + else: + if "\n" in value: + quote_types = [q for q in quote_types if q in _MULTI_QUOTES] + assert quote_types + + new_quote_types = [q for q in quote_types if q not in value] + if new_quote_types: + quote_types = new_quote_types + new_parts.append(value) + + if fallback_to_repr: + # If we weren't able to find a quote type that works for all parts + # of the JoinedStr, fallback to using repr and triple single quotes. + quote_types = ["'''"] + new_parts.clear() + for value, is_constant in parts: + if is_constant: + value = repr('"' + value) # force repr to use single quotes + expected_prefix = "'\"" + assert value.startswith(expected_prefix), repr(value) + value = value[len(expected_prefix):-1] + new_parts.append(value) + + value = "".join(new_parts) + quote_type = quote_types[0] + self.write(f"{quote_type}{value}{quote_type}") + + def _write_ftstring(self, values, prefix): + self.write(prefix) + fstring_parts = [] + for value in values: + with self.buffered() as buffer: + self._write_ftstring_inner(value) + fstring_parts.append( + ("".join(buffer), isinstance(value, Constant)) + ) + self._ftstring_helper(fstring_parts) + + def visit_JoinedStr(self, node): + self._write_ftstring(node.values, "f") + + def visit_TemplateStr(self, node): + self._write_ftstring(node.values, "t") + + def _write_ftstring_inner(self, node, is_format_spec=False): + if isinstance(node, JoinedStr): + # for both the f-string itself, and format_spec + for value in node.values: + self._write_ftstring_inner(value, is_format_spec=is_format_spec) + elif isinstance(node, Constant) and isinstance(node.value, str): + value = node.value.replace("{", "{{").replace("}", "}}") + + if is_format_spec: + value = value.replace("\\", "\\\\") + value = value.replace("'", "\\'") + value = value.replace('"', '\\"') + value = value.replace("\n", "\\n") + self.write(value) + elif isinstance(node, FormattedValue): + self.visit_FormattedValue(node) + elif isinstance(node, Interpolation): + self.visit_Interpolation(node) + else: + raise ValueError(f"Unexpected node inside JoinedStr, {node!r}") + + def _unparse_interpolation_value(self, inner): + unparser = type(self)() + unparser.set_precedence(_Precedence.TEST.next(), inner) + return unparser.visit(inner) + + def _write_interpolation(self, node, use_str_attr=False): + with self.delimit("{", "}"): + if use_str_attr: + expr = node.str + else: + expr = self._unparse_interpolation_value(node.value) + if expr.startswith("{"): + # Separate pair of opening brackets as "{ {" + self.write(" ") + self.write(expr) + if node.conversion != -1: + self.write(f"!{chr(node.conversion)}") + if node.format_spec: + self.write(":") + self._write_ftstring_inner(node.format_spec, is_format_spec=True) + + def visit_FormattedValue(self, node): + self._write_interpolation(node) + + def visit_Interpolation(self, node): + # If `str` is set to `None`, use the `value` to generate the source code. + self._write_interpolation(node, use_str_attr=node.str is not None) + + def visit_Name(self, node): + self.write(node.id) + + def _write_docstring(self, node): + self.fill(allow_semicolon=False) + if node.kind == "u": + self.write("u") + self._write_str_avoiding_backslashes(node.value, quote_types=_MULTI_QUOTES) + + def _write_constant(self, value): + if isinstance(value, (float, complex)): + # Substitute overflowing decimal literal for AST infinities, + # and inf - inf for NaNs. + self.write( + repr(value) + .replace("inf", _INFSTR) + .replace("nan", f"({_INFSTR}-{_INFSTR})") + ) + else: + self.write(repr(value)) + + def visit_Constant(self, node): + value = node.value + if isinstance(value, tuple): + with self.delimit("(", ")"): + self.items_view(self._write_constant, value) + elif value is ...: + self.write("...") + else: + if node.kind == "u": + self.write("u") + self._write_constant(node.value) + + def visit_List(self, node): + with self.delimit("[", "]"): + self.interleave(lambda: self.write(", "), self.traverse, node.elts) + + def visit_ListComp(self, node): + with self.delimit("[", "]"): + self.traverse(node.elt) + for gen in node.generators: + self.traverse(gen) + + def visit_GeneratorExp(self, node): + with self.delimit("(", ")"): + self.traverse(node.elt) + for gen in node.generators: + self.traverse(gen) + + def visit_SetComp(self, node): + with self.delimit("{", "}"): + self.traverse(node.elt) + for gen in node.generators: + self.traverse(gen) + + def visit_DictComp(self, node): + with self.delimit("{", "}"): + self.traverse(node.key) + self.write(": ") + self.traverse(node.value) + for gen in node.generators: + self.traverse(gen) + + def visit_comprehension(self, node): + if node.is_async: + self.write(" async for ") + else: + self.write(" for ") + self.set_precedence(_Precedence.TUPLE, node.target) + self.traverse(node.target) + self.write(" in ") + self.set_precedence(_Precedence.TEST.next(), node.iter, *node.ifs) + self.traverse(node.iter) + for if_clause in node.ifs: + self.write(" if ") + self.traverse(if_clause) + + def visit_IfExp(self, node): + with self.require_parens(_Precedence.TEST, node): + self.set_precedence(_Precedence.TEST.next(), node.body, node.test) + self.traverse(node.body) + self.write(" if ") + self.traverse(node.test) + self.write(" else ") + self.set_precedence(_Precedence.TEST, node.orelse) + self.traverse(node.orelse) + + def visit_Set(self, node): + if node.elts: + with self.delimit("{", "}"): + self.interleave(lambda: self.write(", "), self.traverse, node.elts) + else: + # `{}` would be interpreted as a dictionary literal, and + # `set` might be shadowed. Thus: + self.write('{*()}') + + def visit_Dict(self, node): + def write_key_value_pair(k, v): + self.traverse(k) + self.write(": ") + self.traverse(v) + + def write_item(item): + k, v = item + if k is None: + # for dictionary unpacking operator in dicts {**{'y': 2}} + # see PEP 448 for details + self.write("**") + self.set_precedence(_Precedence.EXPR, v) + self.traverse(v) + else: + write_key_value_pair(k, v) + + with self.delimit("{", "}"): + self.interleave( + lambda: self.write(", "), write_item, zip(node.keys, node.values) + ) + + def visit_Tuple(self, node): + with self.delimit_if( + "(", + ")", + len(node.elts) == 0 or self.get_precedence(node) > _Precedence.TUPLE + ): + self.items_view(self.traverse, node.elts) + + unop = {"Invert": "~", "Not": "not", "UAdd": "+", "USub": "-"} + unop_precedence = { + "not": _Precedence.NOT, + "~": _Precedence.FACTOR, + "+": _Precedence.FACTOR, + "-": _Precedence.FACTOR, + } + + def visit_UnaryOp(self, node): + operator = self.unop[node.op.__class__.__name__] + operator_precedence = self.unop_precedence[operator] + with self.require_parens(operator_precedence, node): + self.write(operator) + # factor prefixes (+, -, ~) shouldn't be separated + # from the value they belong, (e.g: +1 instead of + 1) + if operator_precedence is not _Precedence.FACTOR: + self.write(" ") + self.set_precedence(operator_precedence, node.operand) + self.traverse(node.operand) + + binop = { + "Add": "+", + "Sub": "-", + "Mult": "*", + "MatMult": "@", + "Div": "/", + "Mod": "%", + "LShift": "<<", + "RShift": ">>", + "BitOr": "|", + "BitXor": "^", + "BitAnd": "&", + "FloorDiv": "//", + "Pow": "**", + } + + binop_precedence = { + "+": _Precedence.ARITH, + "-": _Precedence.ARITH, + "*": _Precedence.TERM, + "@": _Precedence.TERM, + "/": _Precedence.TERM, + "%": _Precedence.TERM, + "<<": _Precedence.SHIFT, + ">>": _Precedence.SHIFT, + "|": _Precedence.BOR, + "^": _Precedence.BXOR, + "&": _Precedence.BAND, + "//": _Precedence.TERM, + "**": _Precedence.POWER, + } + + binop_rassoc = frozenset(("**",)) + def visit_BinOp(self, node): + operator = self.binop[node.op.__class__.__name__] + operator_precedence = self.binop_precedence[operator] + with self.require_parens(operator_precedence, node): + if operator in self.binop_rassoc: + left_precedence = operator_precedence.next() + right_precedence = operator_precedence + else: + left_precedence = operator_precedence + right_precedence = operator_precedence.next() + + self.set_precedence(left_precedence, node.left) + self.traverse(node.left) + self.write(f" {operator} ") + self.set_precedence(right_precedence, node.right) + self.traverse(node.right) + + cmpops = { + "Eq": "==", + "NotEq": "!=", + "Lt": "<", + "LtE": "<=", + "Gt": ">", + "GtE": ">=", + "Is": "is", + "IsNot": "is not", + "In": "in", + "NotIn": "not in", + } + + def visit_Compare(self, node): + with self.require_parens(_Precedence.CMP, node): + self.set_precedence(_Precedence.CMP.next(), node.left, *node.comparators) + self.traverse(node.left) + for o, e in zip(node.ops, node.comparators): + self.write(" " + self.cmpops[o.__class__.__name__] + " ") + self.traverse(e) + + boolops = {"And": "and", "Or": "or"} + boolop_precedence = {"and": _Precedence.AND, "or": _Precedence.OR} + + def visit_BoolOp(self, node): + operator = self.boolops[node.op.__class__.__name__] + operator_precedence = self.boolop_precedence[operator] + + def increasing_level_traverse(node): + nonlocal operator_precedence + operator_precedence = operator_precedence.next() + self.set_precedence(operator_precedence, node) + self.traverse(node) + + with self.require_parens(operator_precedence, node): + s = f" {operator} " + self.interleave(lambda: self.write(s), increasing_level_traverse, node.values) + + def visit_Attribute(self, node): + self.set_precedence(_Precedence.ATOM, node.value) + self.traverse(node.value) + # Special case: 3.__abs__() is a syntax error, so if node.value + # is an integer literal then we need to either parenthesize + # it or add an extra space to get 3 .__abs__(). + if isinstance(node.value, Constant) and isinstance(node.value.value, int): + self.write(" ") + self.write(".") + self.write(node.attr) + + def visit_Call(self, node): + self.set_precedence(_Precedence.ATOM, node.func) + self.traverse(node.func) + with self.delimit("(", ")"): + comma = False + for e in node.args: + if comma: + self.write(", ") + else: + comma = True + self.traverse(e) + for e in node.keywords: + if comma: + self.write(", ") + else: + comma = True + self.traverse(e) + + def visit_Subscript(self, node): + def is_non_empty_tuple(slice_value): + return ( + isinstance(slice_value, Tuple) + and slice_value.elts + ) + + self.set_precedence(_Precedence.ATOM, node.value) + self.traverse(node.value) + with self.delimit("[", "]"): + if is_non_empty_tuple(node.slice): + # parentheses can be omitted if the tuple isn't empty + self.items_view(self.traverse, node.slice.elts) + else: + self.traverse(node.slice) + + def visit_Starred(self, node): + self.write("*") + self.set_precedence(_Precedence.EXPR, node.value) + self.traverse(node.value) + + def visit_Ellipsis(self, node): + self.write("...") + + def visit_Slice(self, node): + if node.lower: + self.traverse(node.lower) + self.write(":") + if node.upper: + self.traverse(node.upper) + if node.step: + self.write(":") + self.traverse(node.step) + + def visit_Match(self, node): + self.fill("match ", allow_semicolon=False) + self.traverse(node.subject) + with self.block(): + for case in node.cases: + self.traverse(case) + + def visit_arg(self, node): + self.write(node.arg) + if node.annotation: + self.write(": ") + self.traverse(node.annotation) + + def visit_arguments(self, node): + first = True + # normal arguments + all_args = node.posonlyargs + node.args + defaults = [None] * (len(all_args) - len(node.defaults)) + node.defaults + for index, elements in enumerate(zip(all_args, defaults), 1): + a, d = elements + if first: + first = False + else: + self.write(", ") + self.traverse(a) + if d: + self.write("=") + self.traverse(d) + if index == len(node.posonlyargs): + self.write(", /") + + # varargs, or bare '*' if no varargs but keyword-only arguments present + if node.vararg or node.kwonlyargs: + if first: + first = False + else: + self.write(", ") + self.write("*") + if node.vararg: + self.write(node.vararg.arg) + if node.vararg.annotation: + self.write(": ") + self.traverse(node.vararg.annotation) + + # keyword-only arguments + if node.kwonlyargs: + for a, d in zip(node.kwonlyargs, node.kw_defaults): + self.write(", ") + self.traverse(a) + if d: + self.write("=") + self.traverse(d) + + # kwargs + if node.kwarg: + if first: + first = False + else: + self.write(", ") + self.write("**" + node.kwarg.arg) + if node.kwarg.annotation: + self.write(": ") + self.traverse(node.kwarg.annotation) + + def visit_keyword(self, node): + if node.arg is None: + self.write("**") + else: + self.write(node.arg) + self.write("=") + self.traverse(node.value) + + def visit_Lambda(self, node): + with self.require_parens(_Precedence.TEST, node): + self.write("lambda") + with self.buffered() as buffer: + self.traverse(node.args) + if buffer: + self.write(" ", *buffer) + self.write(": ") + self.set_precedence(_Precedence.TEST, node.body) + self.traverse(node.body) + + def visit_alias(self, node): + self.write(node.name) + if node.asname: + self.write(" as " + node.asname) + + def visit_withitem(self, node): + self.traverse(node.context_expr) + if node.optional_vars: + self.write(" as ") + self.traverse(node.optional_vars) + + def visit_match_case(self, node): + self.fill("case ", allow_semicolon=False) + self.traverse(node.pattern) + if node.guard: + self.write(" if ") + self.traverse(node.guard) + with self.block(): + self.traverse(node.body) + + def visit_MatchValue(self, node): + self.traverse(node.value) + + def visit_MatchSingleton(self, node): + self._write_constant(node.value) + + def visit_MatchSequence(self, node): + with self.delimit("[", "]"): + self.interleave( + lambda: self.write(", "), self.traverse, node.patterns + ) + + def visit_MatchStar(self, node): + name = node.name + if name is None: + name = "_" + self.write(f"*{name}") + + def visit_MatchMapping(self, node): + def write_key_pattern_pair(pair): + k, p = pair + self.traverse(k) + self.write(": ") + self.traverse(p) + + with self.delimit("{", "}"): + keys = node.keys + self.interleave( + lambda: self.write(", "), + write_key_pattern_pair, + zip(keys, node.patterns, strict=True), + ) + rest = node.rest + if rest is not None: + if keys: + self.write(", ") + self.write(f"**{rest}") + + def visit_MatchClass(self, node): + self.set_precedence(_Precedence.ATOM, node.cls) + self.traverse(node.cls) + with self.delimit("(", ")"): + patterns = node.patterns + self.interleave( + lambda: self.write(", "), self.traverse, patterns + ) + attrs = node.kwd_attrs + if attrs: + def write_attr_pattern(pair): + attr, pattern = pair + self.write(f"{attr}=") + self.traverse(pattern) + + if patterns: + self.write(", ") + self.interleave( + lambda: self.write(", "), + write_attr_pattern, + zip(attrs, node.kwd_patterns, strict=True), + ) + + def visit_MatchAs(self, node): + name = node.name + pattern = node.pattern + if name is None: + self.write("_") + elif pattern is None: + self.write(node.name) + else: + with self.require_parens(_Precedence.TEST, node): + self.set_precedence(_Precedence.BOR, node.pattern) + self.traverse(node.pattern) + self.write(f" as {node.name}") + + def visit_MatchOr(self, node): + with self.require_parens(_Precedence.BOR, node): + self.set_precedence(_Precedence.BOR.next(), *node.patterns) + self.interleave(lambda: self.write(" | "), self.traverse, node.patterns) diff --git a/Lib/_collections_abc.py b/Lib/_collections_abc.py index 601107d2d86..996b094377a 100644 --- a/Lib/_collections_abc.py +++ b/Lib/_collections_abc.py @@ -85,6 +85,10 @@ def _f(): pass dict_items = type({}.items()) ## misc ## mappingproxy = type(type.__dict__) +def _get_framelocalsproxy(): + return type(sys._getframe().f_locals) +framelocalsproxy = _get_framelocalsproxy() +del _get_framelocalsproxy generator = type((lambda: (yield))()) ## coroutine ## async def _coro(): pass @@ -457,8 +461,8 @@ def __subclasshook__(cls, C): class _CallableGenericAlias(GenericAlias): """ Represent `Callable[argtypes, resulttype]`. - This sets ``__args__`` to a tuple containing the flattened ``argtypes`` - followed by ``resulttype``. + This sets ``__args__`` to a tuple containing the flattened + ``argtypes`` followed by ``resulttype``. Example: ``Callable[[int, str], float]`` sets ``__args__`` to ``(int, str, float)``. @@ -481,9 +485,10 @@ def __new__(cls, origin, args): def __repr__(self): if len(self.__args__) == 2 and _is_param_expr(self.__args__[0]): return super().__repr__() + from annotationlib import type_repr return (f'collections.abc.Callable' - f'[[{", ".join([_type_repr(a) for a in self.__args__[:-1]])}], ' - f'{_type_repr(self.__args__[-1])}]') + f'[[{", ".join([type_repr(a) for a in self.__args__[:-1]])}], ' + f'{type_repr(self.__args__[-1])}]') def __reduce__(self): args = self.__args__ @@ -520,23 +525,6 @@ def _is_param_expr(obj): names = ('ParamSpec', '_ConcatenateGenericAlias') return obj.__module__ == 'typing' and any(obj.__name__ == name for name in names) -def _type_repr(obj): - """Return the repr() of an object, special-casing types (internal helper). - - Copied from :mod:`typing` since collections.abc - shouldn't depend on that module. - (Keep this roughly in sync with the typing version.) - """ - if isinstance(obj, type): - if obj.__module__ == 'builtins': - return obj.__qualname__ - return f'{obj.__module__}.{obj.__qualname__}' - if obj is Ellipsis: - return '...' - if isinstance(obj, FunctionType): - return obj.__name__ - return repr(obj) - class Callable(metaclass=ABCMeta): @@ -836,6 +824,7 @@ def __eq__(self, other): __reversed__ = None Mapping.register(mappingproxy) +Mapping.register(framelocalsproxy) class MappingView(Sized): @@ -938,8 +927,9 @@ def __delitem__(self, key): __marker = object() def pop(self, key, default=__marker): - '''D.pop(k[,d]) -> v, remove specified key and return the corresponding value. - If key is not found, d is returned if given, otherwise KeyError is raised. + '''D.pop(k[,d]) -> v, remove specified key and return the corresponding + value. If key is not found, d is returned if given, otherwise + KeyError is raised. ''' try: value = self[key] @@ -973,9 +963,12 @@ def clear(self): def update(self, other=(), /, **kwds): ''' D.update([E, ]**F) -> None. Update D from mapping/iterable E and F. - If E present and has a .keys() method, does: for k in E: D[k] = E[k] - If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v - In either case, this is followed by: for k, v in F.items(): D[k] = v + If E present and has a .keys() method, does: + for k in E.keys(): D[k] = E[k] + If E present and lacks .keys() method, does: + for (k, v) in E: D[k] = v + In either case, this is followed by: + for k, v in F.items(): D[k] = v ''' if isinstance(other, Mapping): for key in other: @@ -1040,8 +1033,8 @@ def __reversed__(self): yield self[i] def index(self, value, start=0, stop=None): - '''S.index(value, [start, [stop]]) -> integer -- return first index of value. - Raises ValueError if the value is not present. + '''S.index(value, [start, [stop]]) -> integer -- return first index of + value. Raises ValueError if the value is not present. Supporting start and stop arguments is optional, but recommended. @@ -1068,6 +1061,7 @@ def count(self, value): Sequence.register(tuple) Sequence.register(str) +Sequence.register(bytes) Sequence.register(range) Sequence.register(memoryview) @@ -1078,7 +1072,7 @@ def __new__(cls, name, bases, namespace, **kwargs): warnings._deprecated( "collections.abc.ByteString", - remove=(3, 14), + remove=(3, 17), ) return super().__new__(cls, name, bases, namespace, **kwargs) @@ -1087,14 +1081,18 @@ def __instancecheck__(cls, instance): warnings._deprecated( "collections.abc.ByteString", - remove=(3, 14), + remove=(3, 17), ) return super().__instancecheck__(instance) class ByteString(Sequence, metaclass=_DeprecateByteStringMeta): - """This unifies bytes and bytearray. + """Deprecated ABC serving as a common supertype of ``bytes`` and ``bytearray``. - XXX Should add all their methods. + This ABC is scheduled for removal in Python 3.17. + Use ``isinstance(obj, collections.abc.Buffer)`` to test if ``obj`` + implements the buffer protocol at runtime. For use in type annotations, + either use ``Buffer`` or a union that explicitly specifies the types your + code supports (e.g., ``bytes | bytearray | memoryview``). """ __slots__ = () @@ -1144,15 +1142,16 @@ def reverse(self): self[i], self[n-i-1] = self[n-i-1], self[i] def extend(self, values): - 'S.extend(iterable) -- extend sequence by appending elements from the iterable' + """S.extend(iterable) -- extend sequence by appending elements from the + iterable""" if values is self: values = list(values) for v in values: self.append(v) def pop(self, index=-1): - '''S.pop([index]) -> item -- remove and return item at index (default last). - Raise IndexError if list is empty or index is out of range. + '''S.pop([index]) -> item -- remove and return item at index (default + last). Raise IndexError if list is empty or index is out of range. ''' v = self[index] del self[index] @@ -1170,4 +1169,4 @@ def __iadd__(self, values): MutableSequence.register(list) -MutableSequence.register(bytearray) # Multiply inheriting, see ByteString +MutableSequence.register(bytearray) diff --git a/Lib/_colorize.py b/Lib/_colorize.py new file mode 100644 index 00000000000..d6673f6692f --- /dev/null +++ b/Lib/_colorize.py @@ -0,0 +1,355 @@ +import os +import sys + +from collections.abc import Callable, Iterator, Mapping +from dataclasses import dataclass, field, Field + +COLORIZE = True + + +# types +if False: + from typing import IO, Self, ClassVar + _theme: Theme + + +class ANSIColors: + RESET = "\x1b[0m" + + BLACK = "\x1b[30m" + BLUE = "\x1b[34m" + CYAN = "\x1b[36m" + GREEN = "\x1b[32m" + GREY = "\x1b[90m" + MAGENTA = "\x1b[35m" + RED = "\x1b[31m" + WHITE = "\x1b[37m" # more like LIGHT GRAY + YELLOW = "\x1b[33m" + + BOLD = "\x1b[1m" + BOLD_BLACK = "\x1b[1;30m" # DARK GRAY + BOLD_BLUE = "\x1b[1;34m" + BOLD_CYAN = "\x1b[1;36m" + BOLD_GREEN = "\x1b[1;32m" + BOLD_MAGENTA = "\x1b[1;35m" + BOLD_RED = "\x1b[1;31m" + BOLD_WHITE = "\x1b[1;37m" # actual WHITE + BOLD_YELLOW = "\x1b[1;33m" + + # intense = like bold but without being bold + INTENSE_BLACK = "\x1b[90m" + INTENSE_BLUE = "\x1b[94m" + INTENSE_CYAN = "\x1b[96m" + INTENSE_GREEN = "\x1b[92m" + INTENSE_MAGENTA = "\x1b[95m" + INTENSE_RED = "\x1b[91m" + INTENSE_WHITE = "\x1b[97m" + INTENSE_YELLOW = "\x1b[93m" + + BACKGROUND_BLACK = "\x1b[40m" + BACKGROUND_BLUE = "\x1b[44m" + BACKGROUND_CYAN = "\x1b[46m" + BACKGROUND_GREEN = "\x1b[42m" + BACKGROUND_MAGENTA = "\x1b[45m" + BACKGROUND_RED = "\x1b[41m" + BACKGROUND_WHITE = "\x1b[47m" + BACKGROUND_YELLOW = "\x1b[43m" + + INTENSE_BACKGROUND_BLACK = "\x1b[100m" + INTENSE_BACKGROUND_BLUE = "\x1b[104m" + INTENSE_BACKGROUND_CYAN = "\x1b[106m" + INTENSE_BACKGROUND_GREEN = "\x1b[102m" + INTENSE_BACKGROUND_MAGENTA = "\x1b[105m" + INTENSE_BACKGROUND_RED = "\x1b[101m" + INTENSE_BACKGROUND_WHITE = "\x1b[107m" + INTENSE_BACKGROUND_YELLOW = "\x1b[103m" + + +ColorCodes = set() +NoColors = ANSIColors() + +for attr, code in ANSIColors.__dict__.items(): + if not attr.startswith("__"): + ColorCodes.add(code) + setattr(NoColors, attr, "") + + +# +# Experimental theming support (see gh-133346) +# + +# - Create a theme by copying an existing `Theme` with one or more sections +# replaced, using `default_theme.copy_with()`; +# - create a theme section by copying an existing `ThemeSection` with one or +# more colors replaced, using for example `default_theme.syntax.copy_with()`; +# - create a theme from scratch by instantiating a `Theme` data class with +# the required sections (which are also dataclass instances). +# +# Then call `_colorize.set_theme(your_theme)` to set it. +# +# Put your theme configuration in $PYTHONSTARTUP for the interactive shell, +# or sitecustomize.py in your virtual environment or Python installation for +# other uses. Your applications can call `_colorize.set_theme()` too. +# +# Note that thanks to the dataclasses providing default values for all fields, +# creating a new theme or theme section from scratch is possible without +# specifying all keys. +# +# For example, here's a theme that makes punctuation and operators less prominent: +# +# try: +# from _colorize import set_theme, default_theme, Syntax, ANSIColors +# except ImportError: +# pass +# else: +# theme_with_dim_operators = default_theme.copy_with( +# syntax=Syntax(op=ANSIColors.INTENSE_BLACK), +# ) +# set_theme(theme_with_dim_operators) +# del set_theme, default_theme, Syntax, ANSIColors, theme_with_dim_operators +# +# Guarding the import ensures that your .pythonstartup file will still work in +# Python 3.13 and older. Deleting the variables ensures they don't remain in your +# interactive shell's global scope. + +class ThemeSection(Mapping[str, str]): + """A mixin/base class for theme sections. + + It enables dictionary access to a section, as well as implements convenience + methods. + """ + + # The two types below are just that: types to inform the type checker that the + # mixin will work in context of those fields existing + __dataclass_fields__: ClassVar[dict[str, Field[str]]] + _name_to_value: Callable[[str], str] + + def __post_init__(self) -> None: + name_to_value = {} + for color_name in self.__dataclass_fields__: + name_to_value[color_name] = getattr(self, color_name) + super().__setattr__('_name_to_value', name_to_value.__getitem__) + + def copy_with(self, **kwargs: str) -> Self: + color_state: dict[str, str] = {} + for color_name in self.__dataclass_fields__: + color_state[color_name] = getattr(self, color_name) + color_state.update(kwargs) + return type(self)(**color_state) + + @classmethod + def no_colors(cls) -> Self: + color_state: dict[str, str] = {} + for color_name in cls.__dataclass_fields__: + color_state[color_name] = "" + return cls(**color_state) + + def __getitem__(self, key: str) -> str: + return self._name_to_value(key) + + def __len__(self) -> int: + return len(self.__dataclass_fields__) + + def __iter__(self) -> Iterator[str]: + return iter(self.__dataclass_fields__) + + +@dataclass(frozen=True, kw_only=True) +class Argparse(ThemeSection): + usage: str = ANSIColors.BOLD_BLUE + prog: str = ANSIColors.BOLD_MAGENTA + prog_extra: str = ANSIColors.MAGENTA + heading: str = ANSIColors.BOLD_BLUE + summary_long_option: str = ANSIColors.CYAN + summary_short_option: str = ANSIColors.GREEN + summary_label: str = ANSIColors.YELLOW + summary_action: str = ANSIColors.GREEN + long_option: str = ANSIColors.BOLD_CYAN + short_option: str = ANSIColors.BOLD_GREEN + label: str = ANSIColors.BOLD_YELLOW + action: str = ANSIColors.BOLD_GREEN + reset: str = ANSIColors.RESET + + +@dataclass(frozen=True) +class Syntax(ThemeSection): + prompt: str = ANSIColors.BOLD_MAGENTA + keyword: str = ANSIColors.BOLD_BLUE + keyword_constant: str = ANSIColors.BOLD_BLUE + builtin: str = ANSIColors.CYAN + comment: str = ANSIColors.RED + string: str = ANSIColors.GREEN + number: str = ANSIColors.YELLOW + op: str = ANSIColors.RESET + definition: str = ANSIColors.BOLD + soft_keyword: str = ANSIColors.BOLD_BLUE + reset: str = ANSIColors.RESET + + +@dataclass(frozen=True) +class Traceback(ThemeSection): + type: str = ANSIColors.BOLD_MAGENTA + message: str = ANSIColors.MAGENTA + filename: str = ANSIColors.MAGENTA + line_no: str = ANSIColors.MAGENTA + frame: str = ANSIColors.MAGENTA + error_highlight: str = ANSIColors.BOLD_RED + error_range: str = ANSIColors.RED + reset: str = ANSIColors.RESET + + +@dataclass(frozen=True) +class Unittest(ThemeSection): + passed: str = ANSIColors.GREEN + warn: str = ANSIColors.YELLOW + fail: str = ANSIColors.RED + fail_info: str = ANSIColors.BOLD_RED + reset: str = ANSIColors.RESET + + +@dataclass(frozen=True) +class Theme: + """A suite of themes for all sections of Python. + + When adding a new one, remember to also modify `copy_with` and `no_colors` + below. + """ + argparse: Argparse = field(default_factory=Argparse) + syntax: Syntax = field(default_factory=Syntax) + traceback: Traceback = field(default_factory=Traceback) + unittest: Unittest = field(default_factory=Unittest) + + def copy_with( + self, + *, + argparse: Argparse | None = None, + syntax: Syntax | None = None, + traceback: Traceback | None = None, + unittest: Unittest | None = None, + ) -> Self: + """Return a new Theme based on this instance with some sections replaced. + + Themes are immutable to protect against accidental modifications that + could lead to invalid terminal states. + """ + return type(self)( + argparse=argparse or self.argparse, + syntax=syntax or self.syntax, + traceback=traceback or self.traceback, + unittest=unittest or self.unittest, + ) + + @classmethod + def no_colors(cls) -> Self: + """Return a new Theme where colors in all sections are empty strings. + + This allows writing user code as if colors are always used. The color + fields will be ANSI color code strings when colorization is desired + and possible, and empty strings otherwise. + """ + return cls( + argparse=Argparse.no_colors(), + syntax=Syntax.no_colors(), + traceback=Traceback.no_colors(), + unittest=Unittest.no_colors(), + ) + + +def get_colors( + colorize: bool = False, *, file: IO[str] | IO[bytes] | None = None +) -> ANSIColors: + if colorize or can_colorize(file=file): + return ANSIColors() + else: + return NoColors + + +def decolor(text: str) -> str: + """Remove ANSI color codes from a string.""" + for code in ColorCodes: + text = text.replace(code, "") + return text + + +def can_colorize(*, file: IO[str] | IO[bytes] | None = None) -> bool: + + def _safe_getenv(k: str, fallback: str | None = None) -> str | None: + """Exception-safe environment retrieval. See gh-128636.""" + try: + return os.environ.get(k, fallback) + except Exception: + return fallback + + if file is None: + file = sys.stdout + + if not sys.flags.ignore_environment: + if _safe_getenv("PYTHON_COLORS") == "0": + return False + if _safe_getenv("PYTHON_COLORS") == "1": + return True + if _safe_getenv("NO_COLOR"): + return False + if not COLORIZE: + return False + if _safe_getenv("FORCE_COLOR"): + return True + if _safe_getenv("TERM") == "dumb": + return False + + if not hasattr(file, "fileno"): + return False + + if sys.platform == "win32": + try: + import nt + + if not nt._supports_virtual_terminal(): + return False + except (ImportError, AttributeError): + return False + + try: + return os.isatty(file.fileno()) + except OSError: + return hasattr(file, "isatty") and file.isatty() + + +default_theme = Theme() +theme_no_color = default_theme.no_colors() + + +def get_theme( + *, + tty_file: IO[str] | IO[bytes] | None = None, + force_color: bool = False, + force_no_color: bool = False, +) -> Theme: + """Returns the currently set theme, potentially in a zero-color variant. + + In cases where colorizing is not possible (see `can_colorize`), the returned + theme contains all empty strings in all color definitions. + See `Theme.no_colors()` for more information. + + It is recommended not to cache the result of this function for extended + periods of time because the user might influence theme selection by + the interactive shell, a debugger, or application-specific code. The + environment (including environment variable state and console configuration + on Windows) can also change in the course of the application life cycle. + """ + if force_color or (not force_no_color and + can_colorize(file=tty_file)): + return _theme + return theme_no_color + + +def set_theme(t: Theme) -> None: + global _theme + + if not isinstance(t, Theme): + raise ValueError(f"Expected Theme object, found {t}") + + _theme = t + + +set_theme(default_theme) diff --git a/Lib/_compat_pickle.py b/Lib/_compat_pickle.py index 17b9010278f..60793c391ae 100644 --- a/Lib/_compat_pickle.py +++ b/Lib/_compat_pickle.py @@ -22,7 +22,6 @@ 'tkMessageBox': 'tkinter.messagebox', 'ScrolledText': 'tkinter.scrolledtext', 'Tkconstants': 'tkinter.constants', - 'Tix': 'tkinter.tix', 'ttk': 'tkinter.ttk', 'Tkinter': 'tkinter', 'markupbase': '_markupbase', @@ -257,3 +256,4 @@ for excname in PYTHON3_IMPORTERROR_EXCEPTIONS: REVERSE_NAME_MAPPING[('builtins', excname)] = ('exceptions', 'ImportError') +del excname diff --git a/Lib/_compression.py b/Lib/_compression.py deleted file mode 100644 index e8b70aa0a3e..00000000000 --- a/Lib/_compression.py +++ /dev/null @@ -1,162 +0,0 @@ -"""Internal classes used by the gzip, lzma and bz2 modules""" - -import io -import sys - -BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE # Compressed data read chunk size - - -class BaseStream(io.BufferedIOBase): - """Mode-checking helper functions.""" - - def _check_not_closed(self): - if self.closed: - raise ValueError("I/O operation on closed file") - - def _check_can_read(self): - if not self.readable(): - raise io.UnsupportedOperation("File not open for reading") - - def _check_can_write(self): - if not self.writable(): - raise io.UnsupportedOperation("File not open for writing") - - def _check_can_seek(self): - if not self.readable(): - raise io.UnsupportedOperation("Seeking is only supported " - "on files open for reading") - if not self.seekable(): - raise io.UnsupportedOperation("The underlying file object " - "does not support seeking") - - -class DecompressReader(io.RawIOBase): - """Adapts the decompressor API to a RawIOBase reader API""" - - def readable(self): - return True - - def __init__(self, fp, decomp_factory, trailing_error=(), **decomp_args): - self._fp = fp - self._eof = False - self._pos = 0 # Current offset in decompressed stream - - # Set to size of decompressed stream once it is known, for SEEK_END - self._size = -1 - - # Save the decompressor factory and arguments. - # If the file contains multiple compressed streams, each - # stream will need a separate decompressor object. A new decompressor - # object is also needed when implementing a backwards seek(). - self._decomp_factory = decomp_factory - self._decomp_args = decomp_args - self._decompressor = self._decomp_factory(**self._decomp_args) - - # Exception class to catch from decompressor signifying invalid - # trailing data to ignore - self._trailing_error = trailing_error - - def close(self): - self._decompressor = None - return super().close() - - def seekable(self): - return self._fp.seekable() - - def readinto(self, b): - with memoryview(b) as view, view.cast("B") as byte_view: - data = self.read(len(byte_view)) - byte_view[:len(data)] = data - return len(data) - - def read(self, size=-1): - if size < 0: - return self.readall() - - if not size or self._eof: - return b"" - data = None # Default if EOF is encountered - # Depending on the input data, our call to the decompressor may not - # return any data. In this case, try again after reading another block. - while True: - if self._decompressor.eof: - rawblock = (self._decompressor.unused_data or - self._fp.read(BUFFER_SIZE)) - if not rawblock: - break - # Continue to next stream. - self._decompressor = self._decomp_factory( - **self._decomp_args) - try: - data = self._decompressor.decompress(rawblock, size) - except self._trailing_error: - # Trailing data isn't a valid compressed stream; ignore it. - break - else: - if self._decompressor.needs_input: - rawblock = self._fp.read(BUFFER_SIZE) - if not rawblock: - raise EOFError("Compressed file ended before the " - "end-of-stream marker was reached") - else: - rawblock = b"" - data = self._decompressor.decompress(rawblock, size) - if data: - break - if not data: - self._eof = True - self._size = self._pos - return b"" - self._pos += len(data) - return data - - def readall(self): - chunks = [] - # sys.maxsize means the max length of output buffer is unlimited, - # so that the whole input buffer can be decompressed within one - # .decompress() call. - while data := self.read(sys.maxsize): - chunks.append(data) - - return b"".join(chunks) - - # Rewind the file to the beginning of the data stream. - def _rewind(self): - self._fp.seek(0) - self._eof = False - self._pos = 0 - self._decompressor = self._decomp_factory(**self._decomp_args) - - def seek(self, offset, whence=io.SEEK_SET): - # Recalculate offset as an absolute file position. - if whence == io.SEEK_SET: - pass - elif whence == io.SEEK_CUR: - offset = self._pos + offset - elif whence == io.SEEK_END: - # Seeking relative to EOF - we need to know the file's size. - if self._size < 0: - while self.read(io.DEFAULT_BUFFER_SIZE): - pass - offset = self._size + offset - else: - raise ValueError("Invalid value for whence: {}".format(whence)) - - # Make it so that offset is the number of bytes to skip forward. - if offset < self._pos: - self._rewind() - else: - offset -= self._pos - - # Read and discard data until we reach the desired position. - while offset > 0: - data = self.read(min(io.DEFAULT_BUFFER_SIZE, offset)) - if not data: - break - offset -= len(data) - - return self._pos - - def tell(self): - """Return the current file position.""" - return self._pos diff --git a/Lib/_dummy_thread.py b/Lib/_dummy_thread.py index 424b0b3be5e..0630d4e59fa 100644 --- a/Lib/_dummy_thread.py +++ b/Lib/_dummy_thread.py @@ -11,15 +11,35 @@ import _dummy_thread as _thread """ + # Exports only things specified by thread documentation; # skipping obsolete synonyms allocate(), start_new(), exit_thread(). -__all__ = ['error', 'start_new_thread', 'exit', 'get_ident', 'allocate_lock', - 'interrupt_main', 'LockType', 'RLock', - '_count'] +__all__ = [ + "error", + "start_new_thread", + "exit", + "get_ident", + "allocate_lock", + "interrupt_main", + "LockType", + "RLock", + "_count", + "start_joinable_thread", + "daemon_threads_allowed", + "_shutdown", + "_make_thread_handle", + "_ThreadHandle", + "_get_main_thread_ident", + "_is_main_interpreter", + "_local", +] # A dummy value TIMEOUT_MAX = 2**31 +# Main thread ident for dummy implementation +_MAIN_THREAD_IDENT = -1 + # NOTE: this module can be imported early in the extension building process, # and so top level imports of other modules should be avoided. Instead, all # imports are done when needed on a function-by-function basis. Since threads @@ -27,6 +47,7 @@ error = RuntimeError + def start_new_thread(function, args, kwargs={}): """Dummy implementation of _thread.start_new_thread(). @@ -52,6 +73,7 @@ def start_new_thread(function, args, kwargs={}): pass except: import traceback + traceback.print_exc() _main = True global _interrupt @@ -59,10 +81,58 @@ def start_new_thread(function, args, kwargs={}): _interrupt = False raise KeyboardInterrupt + +def start_joinable_thread(function, handle=None, daemon=True): + """Dummy implementation of _thread.start_joinable_thread(). + + In dummy thread, we just run the function synchronously. + """ + if handle is None: + handle = _ThreadHandle() + try: + function() + except SystemExit: + pass + except: + import traceback + + traceback.print_exc() + handle._set_done() + return handle + + +def daemon_threads_allowed(): + """Dummy implementation of _thread.daemon_threads_allowed().""" + return True + + +def _shutdown(): + """Dummy implementation of _thread._shutdown().""" + pass + + +def _make_thread_handle(ident): + """Dummy implementation of _thread._make_thread_handle().""" + handle = _ThreadHandle() + handle._ident = ident + return handle + + +def _get_main_thread_ident(): + """Dummy implementation of _thread._get_main_thread_ident().""" + return _MAIN_THREAD_IDENT + + +def _is_main_interpreter(): + """Dummy implementation of _thread._is_main_interpreter().""" + return True + + def exit(): """Dummy implementation of _thread.exit().""" raise SystemExit + def get_ident(): """Dummy implementation of _thread.get_ident(). @@ -70,26 +140,31 @@ def get_ident(): available, it is safe to assume that the current process is the only thread. Thus a constant can be safely returned. """ - return -1 + return _MAIN_THREAD_IDENT + def allocate_lock(): """Dummy implementation of _thread.allocate_lock().""" return LockType() + def stack_size(size=None): """Dummy implementation of _thread.stack_size().""" if size is not None: raise error("setting thread stack size not supported") return 0 + def _set_sentinel(): """Dummy implementation of _thread._set_sentinel().""" return LockType() + def _count(): """Dummy implementation of _thread._count().""" return 0 + class LockType(object): """Class implementing dummy implementation of _thread.LockType. @@ -125,6 +200,7 @@ def acquire(self, waitflag=None, timeout=-1): else: if timeout > 0: import time + time.sleep(timeout) return False @@ -153,14 +229,41 @@ def __repr__(self): "locked" if self.locked_status else "unlocked", self.__class__.__module__, self.__class__.__qualname__, - hex(id(self)) + hex(id(self)), ) + +class _ThreadHandle: + """Dummy implementation of _thread._ThreadHandle.""" + + def __init__(self): + self._ident = _MAIN_THREAD_IDENT + self._done = False + + @property + def ident(self): + return self._ident + + def _set_done(self): + self._done = True + + def is_done(self): + return self._done + + def join(self, timeout=None): + # In dummy thread, thread is always done + return + + def __repr__(self): + return f"<_ThreadHandle ident={self._ident}>" + + # Used to signal that interrupt_main was called in a "thread" _interrupt = False # True when not executing in a "thread" _main = True + def interrupt_main(): """Set _interrupt flag to True to have start_new_thread raise KeyboardInterrupt upon exiting.""" @@ -170,6 +273,7 @@ def interrupt_main(): global _interrupt _interrupt = True + class RLock: def __init__(self): self.locked_count = 0 @@ -190,7 +294,7 @@ def release(self): return True def locked(self): - return self.locked_status != 0 + return self.locked_count != 0 def __repr__(self): return "<%s %s.%s object owner=%s count=%s at %s>" % ( @@ -199,5 +303,36 @@ def __repr__(self): self.__class__.__qualname__, get_ident() if self.locked_count else 0, self.locked_count, - hex(id(self)) + hex(id(self)), ) + + +class _local: + """Dummy implementation of _thread._local (thread-local storage).""" + + def __init__(self): + object.__setattr__(self, "_local__impl", {}) + + def __getattribute__(self, name): + if name.startswith("_local__"): + return object.__getattribute__(self, name) + impl = object.__getattribute__(self, "_local__impl") + try: + return impl[name] + except KeyError: + raise AttributeError(name) + + def __setattr__(self, name, value): + if name.startswith("_local__"): + return object.__setattr__(self, name, value) + impl = object.__getattribute__(self, "_local__impl") + impl[name] = value + + def __delattr__(self, name): + if name.startswith("_local__"): + return object.__delattr__(self, name) + impl = object.__getattribute__(self, "_local__impl") + try: + del impl[name] + except KeyError: + raise AttributeError(name) diff --git a/Lib/_ios_support.py b/Lib/_ios_support.py new file mode 100644 index 00000000000..20467a7c2bc --- /dev/null +++ b/Lib/_ios_support.py @@ -0,0 +1,71 @@ +import sys +try: + from ctypes import cdll, c_void_p, c_char_p, util +except ImportError: + # ctypes is an optional module. If it's not present, we're limited in what + # we can tell about the system, but we don't want to prevent the module + # from working. + print("ctypes isn't available; iOS system calls will not be available", file=sys.stderr) + objc = None +else: + # ctypes is available. Load the ObjC library, and wrap the objc_getClass, + # sel_registerName methods + lib = util.find_library("objc") + if lib is None: + # Failed to load the objc library + raise ImportError("ObjC runtime library couldn't be loaded") + + objc = cdll.LoadLibrary(lib) + objc.objc_getClass.restype = c_void_p + objc.objc_getClass.argtypes = [c_char_p] + objc.sel_registerName.restype = c_void_p + objc.sel_registerName.argtypes = [c_char_p] + + +def get_platform_ios(): + # Determine if this is a simulator using the multiarch value + is_simulator = sys.implementation._multiarch.endswith("simulator") + + # We can't use ctypes; abort + if not objc: + return None + + # Most of the methods return ObjC objects + objc.objc_msgSend.restype = c_void_p + # All the methods used have no arguments. + objc.objc_msgSend.argtypes = [c_void_p, c_void_p] + + # Equivalent of: + # device = [UIDevice currentDevice] + UIDevice = objc.objc_getClass(b"UIDevice") + SEL_currentDevice = objc.sel_registerName(b"currentDevice") + device = objc.objc_msgSend(UIDevice, SEL_currentDevice) + + # Equivalent of: + # device_systemVersion = [device systemVersion] + SEL_systemVersion = objc.sel_registerName(b"systemVersion") + device_systemVersion = objc.objc_msgSend(device, SEL_systemVersion) + + # Equivalent of: + # device_systemName = [device systemName] + SEL_systemName = objc.sel_registerName(b"systemName") + device_systemName = objc.objc_msgSend(device, SEL_systemName) + + # Equivalent of: + # device_model = [device model] + SEL_model = objc.sel_registerName(b"model") + device_model = objc.objc_msgSend(device, SEL_model) + + # UTF8String returns a const char*; + SEL_UTF8String = objc.sel_registerName(b"UTF8String") + objc.objc_msgSend.restype = c_char_p + + # Equivalent of: + # system = [device_systemName UTF8String] + # release = [device_systemVersion UTF8String] + # model = [device_model UTF8String] + system = objc.objc_msgSend(device_systemName, SEL_UTF8String).decode() + release = objc.objc_msgSend(device_systemVersion, SEL_UTF8String).decode() + model = objc.objc_msgSend(device_model, SEL_UTF8String).decode() + + return system, release, model, is_simulator diff --git a/Lib/_markupbase.py b/Lib/_markupbase.py index 3ad7e279960..614f0cd16dd 100644 --- a/Lib/_markupbase.py +++ b/Lib/_markupbase.py @@ -13,7 +13,7 @@ _markedsectionclose = re.compile(r']\s*]\s*>') # An analysis of the MS-Word extensions is available at -# http://www.planetpublish.com/xmlarena/xap/Thursday/WordtoXML.pdf +# http://web.archive.org/web/20060321153828/http://www.planetpublish.com/xmlarena/xap/Thursday/WordtoXML.pdf _msmarkedsectionclose = re.compile(r']\s*>') diff --git a/Lib/_opcode_metadata.py b/Lib/_opcode_metadata.py new file mode 100644 index 00000000000..001a016621c --- /dev/null +++ b/Lib/_opcode_metadata.py @@ -0,0 +1,371 @@ +# This file is generated by tools/opcode_metadata/generate_py_opcode_metadata.py +# for RustPython bytecode format (CPython 3.14 compatible opcode numbers). +# Do not edit! + +_specializations = { + "RESUME": [ + "RESUME_CHECK", + ], + "LOAD_CONST": [ + "LOAD_CONST_MORTAL", + "LOAD_CONST_IMMORTAL", + ], + "TO_BOOL": [ + "TO_BOOL_ALWAYS_TRUE", + "TO_BOOL_BOOL", + "TO_BOOL_INT", + "TO_BOOL_LIST", + "TO_BOOL_NONE", + "TO_BOOL_STR", + ], + "BINARY_OP": [ + "BINARY_OP_MULTIPLY_INT", + "BINARY_OP_ADD_INT", + "BINARY_OP_SUBTRACT_INT", + "BINARY_OP_MULTIPLY_FLOAT", + "BINARY_OP_ADD_FLOAT", + "BINARY_OP_SUBTRACT_FLOAT", + "BINARY_OP_ADD_UNICODE", + "BINARY_OP_SUBSCR_LIST_INT", + "BINARY_OP_SUBSCR_LIST_SLICE", + "BINARY_OP_SUBSCR_TUPLE_INT", + "BINARY_OP_SUBSCR_STR_INT", + "BINARY_OP_SUBSCR_DICT", + "BINARY_OP_SUBSCR_GETITEM", + "BINARY_OP_EXTEND", + "BINARY_OP_INPLACE_ADD_UNICODE", + ], + "STORE_SUBSCR": [ + "STORE_SUBSCR_DICT", + "STORE_SUBSCR_LIST_INT", + ], + "SEND": [ + "SEND_GEN", + ], + "UNPACK_SEQUENCE": [ + "UNPACK_SEQUENCE_TWO_TUPLE", + "UNPACK_SEQUENCE_TUPLE", + "UNPACK_SEQUENCE_LIST", + ], + "STORE_ATTR": [ + "STORE_ATTR_INSTANCE_VALUE", + "STORE_ATTR_SLOT", + "STORE_ATTR_WITH_HINT", + ], + "LOAD_GLOBAL": [ + "LOAD_GLOBAL_MODULE", + "LOAD_GLOBAL_BUILTIN", + ], + "LOAD_SUPER_ATTR": [ + "LOAD_SUPER_ATTR_ATTR", + "LOAD_SUPER_ATTR_METHOD", + ], + "LOAD_ATTR": [ + "LOAD_ATTR_INSTANCE_VALUE", + "LOAD_ATTR_MODULE", + "LOAD_ATTR_WITH_HINT", + "LOAD_ATTR_SLOT", + "LOAD_ATTR_CLASS", + "LOAD_ATTR_CLASS_WITH_METACLASS_CHECK", + "LOAD_ATTR_PROPERTY", + "LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN", + "LOAD_ATTR_METHOD_WITH_VALUES", + "LOAD_ATTR_METHOD_NO_DICT", + "LOAD_ATTR_METHOD_LAZY_DICT", + "LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES", + "LOAD_ATTR_NONDESCRIPTOR_NO_DICT", + ], + "COMPARE_OP": [ + "COMPARE_OP_FLOAT", + "COMPARE_OP_INT", + "COMPARE_OP_STR", + ], + "CONTAINS_OP": [ + "CONTAINS_OP_SET", + "CONTAINS_OP_DICT", + ], + "JUMP_BACKWARD": [ + "JUMP_BACKWARD_NO_JIT", + "JUMP_BACKWARD_JIT", + ], + "FOR_ITER": [ + "FOR_ITER_LIST", + "FOR_ITER_TUPLE", + "FOR_ITER_RANGE", + "FOR_ITER_GEN", + ], + "CALL": [ + "CALL_BOUND_METHOD_EXACT_ARGS", + "CALL_PY_EXACT_ARGS", + "CALL_TYPE_1", + "CALL_STR_1", + "CALL_TUPLE_1", + "CALL_BUILTIN_CLASS", + "CALL_BUILTIN_O", + "CALL_BUILTIN_FAST", + "CALL_BUILTIN_FAST_WITH_KEYWORDS", + "CALL_LEN", + "CALL_ISINSTANCE", + "CALL_LIST_APPEND", + "CALL_METHOD_DESCRIPTOR_O", + "CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS", + "CALL_METHOD_DESCRIPTOR_NOARGS", + "CALL_METHOD_DESCRIPTOR_FAST", + "CALL_ALLOC_AND_ENTER_INIT", + "CALL_PY_GENERAL", + "CALL_BOUND_METHOD_GENERAL", + "CALL_NON_PY_GENERAL", + ], + "CALL_KW": [ + "CALL_KW_BOUND_METHOD", + "CALL_KW_PY", + "CALL_KW_NON_PY", + ], +} + +_specialized_opmap = { + 'BINARY_OP_ADD_FLOAT': 129, + 'BINARY_OP_ADD_INT': 130, + 'BINARY_OP_ADD_UNICODE': 131, + 'BINARY_OP_EXTEND': 132, + 'BINARY_OP_INPLACE_ADD_UNICODE': 3, + 'BINARY_OP_MULTIPLY_FLOAT': 133, + 'BINARY_OP_MULTIPLY_INT': 134, + 'BINARY_OP_SUBSCR_DICT': 135, + 'BINARY_OP_SUBSCR_GETITEM': 136, + 'BINARY_OP_SUBSCR_LIST_INT': 137, + 'BINARY_OP_SUBSCR_LIST_SLICE': 138, + 'BINARY_OP_SUBSCR_STR_INT': 139, + 'BINARY_OP_SUBSCR_TUPLE_INT': 140, + 'BINARY_OP_SUBTRACT_FLOAT': 141, + 'BINARY_OP_SUBTRACT_INT': 142, + 'CALL_ALLOC_AND_ENTER_INIT': 143, + 'CALL_BOUND_METHOD_EXACT_ARGS': 144, + 'CALL_BOUND_METHOD_GENERAL': 145, + 'CALL_BUILTIN_CLASS': 146, + 'CALL_BUILTIN_FAST': 147, + 'CALL_BUILTIN_FAST_WITH_KEYWORDS': 148, + 'CALL_BUILTIN_O': 149, + 'CALL_ISINSTANCE': 150, + 'CALL_KW_BOUND_METHOD': 151, + 'CALL_KW_NON_PY': 152, + 'CALL_KW_PY': 153, + 'CALL_LEN': 154, + 'CALL_LIST_APPEND': 155, + 'CALL_METHOD_DESCRIPTOR_FAST': 156, + 'CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS': 157, + 'CALL_METHOD_DESCRIPTOR_NOARGS': 158, + 'CALL_METHOD_DESCRIPTOR_O': 159, + 'CALL_NON_PY_GENERAL': 160, + 'CALL_PY_EXACT_ARGS': 161, + 'CALL_PY_GENERAL': 162, + 'CALL_STR_1': 163, + 'CALL_TUPLE_1': 164, + 'CALL_TYPE_1': 165, + 'COMPARE_OP_FLOAT': 166, + 'COMPARE_OP_INT': 167, + 'COMPARE_OP_STR': 168, + 'CONTAINS_OP_DICT': 169, + 'CONTAINS_OP_SET': 170, + 'FOR_ITER_GEN': 171, + 'FOR_ITER_LIST': 172, + 'FOR_ITER_RANGE': 173, + 'FOR_ITER_TUPLE': 174, + 'JUMP_BACKWARD_JIT': 175, + 'JUMP_BACKWARD_NO_JIT': 176, + 'LOAD_ATTR_CLASS': 177, + 'LOAD_ATTR_CLASS_WITH_METACLASS_CHECK': 178, + 'LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN': 179, + 'LOAD_ATTR_INSTANCE_VALUE': 180, + 'LOAD_ATTR_METHOD_LAZY_DICT': 181, + 'LOAD_ATTR_METHOD_NO_DICT': 182, + 'LOAD_ATTR_METHOD_WITH_VALUES': 183, + 'LOAD_ATTR_MODULE': 184, + 'LOAD_ATTR_NONDESCRIPTOR_NO_DICT': 185, + 'LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES': 186, + 'LOAD_ATTR_PROPERTY': 187, + 'LOAD_ATTR_SLOT': 188, + 'LOAD_ATTR_WITH_HINT': 189, + 'LOAD_CONST_IMMORTAL': 190, + 'LOAD_CONST_MORTAL': 191, + 'LOAD_GLOBAL_BUILTIN': 192, + 'LOAD_GLOBAL_MODULE': 193, + 'LOAD_SUPER_ATTR_ATTR': 194, + 'LOAD_SUPER_ATTR_METHOD': 195, + 'RESUME_CHECK': 196, + 'SEND_GEN': 197, + 'STORE_ATTR_INSTANCE_VALUE': 198, + 'STORE_ATTR_SLOT': 199, + 'STORE_ATTR_WITH_HINT': 200, + 'STORE_SUBSCR_DICT': 201, + 'STORE_SUBSCR_LIST_INT': 202, + 'TO_BOOL_ALWAYS_TRUE': 203, + 'TO_BOOL_BOOL': 204, + 'TO_BOOL_INT': 205, + 'TO_BOOL_LIST': 206, + 'TO_BOOL_NONE': 207, + 'TO_BOOL_STR': 208, + 'UNPACK_SEQUENCE_LIST': 209, + 'UNPACK_SEQUENCE_TUPLE': 210, + 'UNPACK_SEQUENCE_TWO_TUPLE': 211, +} + +opmap = { + 'CACHE': 0, + 'RESERVED': 17, + 'RESUME': 128, + 'INSTRUMENTED_LINE': 254, + 'ENTER_EXECUTOR': 255, + 'BINARY_SLICE': 1, + 'BUILD_TEMPLATE': 2, + 'CALL_FUNCTION_EX': 4, + 'CHECK_EG_MATCH': 5, + 'CHECK_EXC_MATCH': 6, + 'CLEANUP_THROW': 7, + 'DELETE_SUBSCR': 8, + 'END_FOR': 9, + 'END_SEND': 10, + 'EXIT_INIT_CHECK': 11, + 'FORMAT_SIMPLE': 12, + 'FORMAT_WITH_SPEC': 13, + 'GET_AITER': 14, + 'GET_ANEXT': 15, + 'GET_ITER': 16, + 'GET_LEN': 18, + 'GET_YIELD_FROM_ITER': 19, + 'INTERPRETER_EXIT': 20, + 'LOAD_BUILD_CLASS': 21, + 'LOAD_LOCALS': 22, + 'MAKE_FUNCTION': 23, + 'MATCH_KEYS': 24, + 'MATCH_MAPPING': 25, + 'MATCH_SEQUENCE': 26, + 'NOP': 27, + 'NOT_TAKEN': 28, + 'POP_EXCEPT': 29, + 'POP_ITER': 30, + 'POP_TOP': 31, + 'PUSH_EXC_INFO': 32, + 'PUSH_NULL': 33, + 'RETURN_GENERATOR': 34, + 'RETURN_VALUE': 35, + 'SETUP_ANNOTATIONS': 36, + 'STORE_SLICE': 37, + 'STORE_SUBSCR': 38, + 'TO_BOOL': 39, + 'UNARY_INVERT': 40, + 'UNARY_NEGATIVE': 41, + 'UNARY_NOT': 42, + 'WITH_EXCEPT_START': 43, + 'BINARY_OP': 44, + 'BUILD_INTERPOLATION': 45, + 'BUILD_LIST': 46, + 'BUILD_MAP': 47, + 'BUILD_SET': 48, + 'BUILD_SLICE': 49, + 'BUILD_STRING': 50, + 'BUILD_TUPLE': 51, + 'CALL': 52, + 'CALL_INTRINSIC_1': 53, + 'CALL_INTRINSIC_2': 54, + 'CALL_KW': 55, + 'COMPARE_OP': 56, + 'CONTAINS_OP': 57, + 'CONVERT_VALUE': 58, + 'COPY': 59, + 'COPY_FREE_VARS': 60, + 'DELETE_ATTR': 61, + 'DELETE_DEREF': 62, + 'DELETE_FAST': 63, + 'DELETE_GLOBAL': 64, + 'DELETE_NAME': 65, + 'DICT_MERGE': 66, + 'DICT_UPDATE': 67, + 'END_ASYNC_FOR': 68, + 'EXTENDED_ARG': 69, + 'FOR_ITER': 70, + 'GET_AWAITABLE': 71, + 'IMPORT_FROM': 72, + 'IMPORT_NAME': 73, + 'IS_OP': 74, + 'JUMP_BACKWARD': 75, + 'JUMP_BACKWARD_NO_INTERRUPT': 76, + 'JUMP_FORWARD': 77, + 'LIST_APPEND': 78, + 'LIST_EXTEND': 79, + 'LOAD_ATTR': 80, + 'LOAD_COMMON_CONSTANT': 81, + 'LOAD_CONST': 82, + 'LOAD_DEREF': 83, + 'LOAD_FAST': 84, + 'LOAD_FAST_AND_CLEAR': 85, + 'LOAD_FAST_BORROW': 86, + 'LOAD_FAST_BORROW_LOAD_FAST_BORROW': 87, + 'LOAD_FAST_CHECK': 88, + 'LOAD_FAST_LOAD_FAST': 89, + 'LOAD_FROM_DICT_OR_DEREF': 90, + 'LOAD_FROM_DICT_OR_GLOBALS': 91, + 'LOAD_GLOBAL': 92, + 'LOAD_NAME': 93, + 'LOAD_SMALL_INT': 94, + 'LOAD_SPECIAL': 95, + 'LOAD_SUPER_ATTR': 96, + 'MAKE_CELL': 97, + 'MAP_ADD': 98, + 'MATCH_CLASS': 99, + 'POP_JUMP_IF_FALSE': 100, + 'POP_JUMP_IF_NONE': 101, + 'POP_JUMP_IF_NOT_NONE': 102, + 'POP_JUMP_IF_TRUE': 103, + 'RAISE_VARARGS': 104, + 'RERAISE': 105, + 'SEND': 106, + 'SET_ADD': 107, + 'SET_FUNCTION_ATTRIBUTE': 108, + 'SET_UPDATE': 109, + 'STORE_ATTR': 110, + 'STORE_DEREF': 111, + 'STORE_FAST': 112, + 'STORE_FAST_LOAD_FAST': 113, + 'STORE_FAST_STORE_FAST': 114, + 'STORE_GLOBAL': 115, + 'STORE_NAME': 116, + 'SWAP': 117, + 'UNPACK_EX': 118, + 'UNPACK_SEQUENCE': 119, + 'YIELD_VALUE': 120, + 'INSTRUMENTED_END_FOR': 234, + 'INSTRUMENTED_POP_ITER': 235, + 'INSTRUMENTED_END_SEND': 236, + 'INSTRUMENTED_FOR_ITER': 237, + 'INSTRUMENTED_INSTRUCTION': 238, + 'INSTRUMENTED_JUMP_FORWARD': 239, + 'INSTRUMENTED_NOT_TAKEN': 240, + 'INSTRUMENTED_POP_JUMP_IF_TRUE': 241, + 'INSTRUMENTED_POP_JUMP_IF_FALSE': 242, + 'INSTRUMENTED_POP_JUMP_IF_NONE': 243, + 'INSTRUMENTED_POP_JUMP_IF_NOT_NONE': 244, + 'INSTRUMENTED_RESUME': 245, + 'INSTRUMENTED_RETURN_VALUE': 246, + 'INSTRUMENTED_YIELD_VALUE': 247, + 'INSTRUMENTED_END_ASYNC_FOR': 248, + 'INSTRUMENTED_LOAD_SUPER_ATTR': 249, + 'INSTRUMENTED_CALL': 250, + 'INSTRUMENTED_CALL_KW': 251, + 'INSTRUMENTED_CALL_FUNCTION_EX': 252, + 'INSTRUMENTED_JUMP_BACKWARD': 253, + 'ANNOTATIONS_PLACEHOLDER': 256, + 'JUMP': 257, + 'JUMP_IF_FALSE': 258, + 'JUMP_IF_TRUE': 259, + 'JUMP_NO_INTERRUPT': 260, + 'LOAD_CLOSURE': 261, + 'POP_BLOCK': 262, + 'SETUP_CLEANUP': 263, + 'SETUP_FINALLY': 264, + 'SETUP_WITH': 265, + 'STORE_FAST_MAYBE_NULL': 266, +} + +HAVE_ARGUMENT = 43 +MIN_INSTRUMENTED_OPCODE = 234 diff --git a/Lib/_osx_support.py b/Lib/_osx_support.py index aa66c8b9f41..0cb064fcd79 100644 --- a/Lib/_osx_support.py +++ b/Lib/_osx_support.py @@ -507,6 +507,11 @@ def get_platform_osx(_config_vars, osname, release, machine): # MACOSX_DEPLOYMENT_TARGET. macver = _config_vars.get('MACOSX_DEPLOYMENT_TARGET', '') + if macver and '.' not in macver: + # Ensure that the version includes at least a major + # and minor version, even if MACOSX_DEPLOYMENT_TARGET + # is set to a single-label version like "14". + macver += '.0' macrelease = _get_system_version() or macver macver = macver or macrelease diff --git a/Lib/_py_warnings.py b/Lib/_py_warnings.py new file mode 100644 index 00000000000..55f8c069591 --- /dev/null +++ b/Lib/_py_warnings.py @@ -0,0 +1,869 @@ +"""Python part of the warnings subsystem.""" + +import sys +import _contextvars +import _thread + + +__all__ = ["warn", "warn_explicit", "showwarning", + "formatwarning", "filterwarnings", "simplefilter", + "resetwarnings", "catch_warnings", "deprecated"] + + +# Normally '_wm' is sys.modules['warnings'] but for unit tests it can be +# a different module. User code is allowed to reassign global attributes +# of the 'warnings' module, commonly 'filters' or 'showwarning'. So we +# need to lookup these global attributes dynamically on the '_wm' object, +# rather than binding them earlier. The code in this module consistently uses +# '_wm.' rather than using the globals of this module. If the +# '_warnings' C extension is in use, some globals are replaced by functions +# and variables defined in that extension. +_wm = None + + +def _set_module(module): + global _wm + _wm = module + + +# filters contains a sequence of filter 5-tuples +# The components of the 5-tuple are: +# - an action: error, ignore, always, all, default, module, or once +# - a compiled regex that must match the warning message +# - a class representing the warning category +# - a compiled regex that must match the module that is being warned +# - a line number for the line being warning, or 0 to mean any line +# If either if the compiled regexs are None, match anything. +filters = [] + + +defaultaction = "default" +onceregistry = {} +_lock = _thread.RLock() +_filters_version = 1 + + +# If true, catch_warnings() will use a context var to hold the modified +# filters list. Otherwise, catch_warnings() will operate on the 'filters' +# global of the warnings module. +_use_context = sys.flags.context_aware_warnings + + +class _Context: + def __init__(self, filters): + self._filters = filters + self.log = None # if set to a list, logging is enabled + + def copy(self): + context = _Context(self._filters[:]) + if self.log is not None: + context.log = self.log + return context + + def _record_warning(self, msg): + self.log.append(msg) + + +class _GlobalContext(_Context): + def __init__(self): + self.log = None + + @property + def _filters(self): + # Since there is quite a lot of code that assigns to + # warnings.filters, this needs to return the current value of + # the module global. + try: + return _wm.filters + except AttributeError: + # 'filters' global was deleted. Do we need to actually handle this case? + return [] + + +_global_context = _GlobalContext() + + +_warnings_context = _contextvars.ContextVar('warnings_context') + + +def _get_context(): + if not _use_context: + return _global_context + try: + return _wm._warnings_context.get() + except LookupError: + return _global_context + + +def _set_context(context): + assert _use_context + _wm._warnings_context.set(context) + + +def _new_context(): + assert _use_context + old_context = _wm._get_context() + new_context = old_context.copy() + _wm._set_context(new_context) + return old_context, new_context + + +def _get_filters(): + """Return the current list of filters. This is a non-public API used by + module functions and by the unit tests.""" + return _wm._get_context()._filters + + +def _filters_mutated_lock_held(): + _wm._filters_version += 1 + + +def showwarning(message, category, filename, lineno, file=None, line=None): + """Hook to write a warning to a file; replace if you like.""" + msg = _wm.WarningMessage(message, category, filename, lineno, file, line) + _wm._showwarnmsg_impl(msg) + + +def formatwarning(message, category, filename, lineno, line=None): + """Function to format a warning the standard way.""" + msg = _wm.WarningMessage(message, category, filename, lineno, None, line) + return _wm._formatwarnmsg_impl(msg) + + +def _showwarnmsg_impl(msg): + context = _wm._get_context() + if context.log is not None: + context._record_warning(msg) + return + file = msg.file + if file is None: + file = sys.stderr + if file is None: + # sys.stderr is None when run with pythonw.exe: + # warnings get lost + return + text = _wm._formatwarnmsg(msg) + try: + file.write(text) + except OSError: + # the file (probably stderr) is invalid - this warning gets lost. + pass + + +def _formatwarnmsg_impl(msg): + category = msg.category.__name__ + s = f"{msg.filename}:{msg.lineno}: {category}: {msg.message}\n" + + if msg.line is None: + try: + import linecache + line = linecache.getline(msg.filename, msg.lineno) + except Exception: + # When a warning is logged during Python shutdown, linecache + # and the import machinery don't work anymore + line = None + linecache = None + else: + line = msg.line + if line: + line = line.strip() + s += " %s\n" % line + + if msg.source is not None: + try: + import tracemalloc + # Logging a warning should not raise a new exception: + # catch Exception, not only ImportError and RecursionError. + except Exception: + # don't suggest to enable tracemalloc if it's not available + suggest_tracemalloc = False + tb = None + else: + try: + suggest_tracemalloc = not tracemalloc.is_tracing() + tb = tracemalloc.get_object_traceback(msg.source) + except Exception: + # When a warning is logged during Python shutdown, tracemalloc + # and the import machinery don't work anymore + suggest_tracemalloc = False + tb = None + + if tb is not None: + s += 'Object allocated at (most recent call last):\n' + for frame in tb: + s += (' File "%s", lineno %s\n' + % (frame.filename, frame.lineno)) + + try: + if linecache is not None: + line = linecache.getline(frame.filename, frame.lineno) + else: + line = None + except Exception: + line = None + if line: + line = line.strip() + s += ' %s\n' % line + elif suggest_tracemalloc: + s += (f'{category}: Enable tracemalloc to get the object ' + f'allocation traceback\n') + return s + + +# Keep a reference to check if the function was replaced +_showwarning_orig = showwarning + + +def _showwarnmsg(msg): + """Hook to write a warning to a file; replace if you like.""" + try: + sw = _wm.showwarning + except AttributeError: + pass + else: + if sw is not _showwarning_orig: + # warnings.showwarning() was replaced + if not callable(sw): + raise TypeError("warnings.showwarning() must be set to a " + "function or method") + + sw(msg.message, msg.category, msg.filename, msg.lineno, + msg.file, msg.line) + return + _wm._showwarnmsg_impl(msg) + + +# Keep a reference to check if the function was replaced +_formatwarning_orig = formatwarning + + +def _formatwarnmsg(msg): + """Function to format a warning the standard way.""" + try: + fw = _wm.formatwarning + except AttributeError: + pass + else: + if fw is not _formatwarning_orig: + # warnings.formatwarning() was replaced + return fw(msg.message, msg.category, + msg.filename, msg.lineno, msg.line) + return _wm._formatwarnmsg_impl(msg) + + +def filterwarnings(action, message="", category=Warning, module="", lineno=0, + append=False): + """Insert an entry into the list of warnings filters (at the front). + + 'action' -- one of "error", "ignore", "always", "all", "default", "module", + or "once" + 'message' -- a regex that the warning message must match + 'category' -- a class that the warning must be a subclass of + 'module' -- a regex that the module name must match + 'lineno' -- an integer line number, 0 matches all warnings + 'append' -- if true, append to the list of filters + """ + if action not in {"error", "ignore", "always", "all", "default", "module", "once"}: + raise ValueError(f"invalid action: {action!r}") + if not isinstance(message, str): + raise TypeError("message must be a string") + if not isinstance(category, type) or not issubclass(category, Warning): + raise TypeError("category must be a Warning subclass") + if not isinstance(module, str): + raise TypeError("module must be a string") + if not isinstance(lineno, int): + raise TypeError("lineno must be an int") + if lineno < 0: + raise ValueError("lineno must be an int >= 0") + + if message or module: + import re + + if message: + message = re.compile(message, re.I) + else: + message = None + if module: + module = re.compile(module) + else: + module = None + + _wm._add_filter(action, message, category, module, lineno, append=append) + + +def simplefilter(action, category=Warning, lineno=0, append=False): + """Insert a simple entry into the list of warnings filters (at the front). + + A simple filter matches all modules and messages. + 'action' -- one of "error", "ignore", "always", "all", "default", "module", + or "once" + 'category' -- a class that the warning must be a subclass of + 'lineno' -- an integer line number, 0 matches all warnings + 'append' -- if true, append to the list of filters + """ + if action not in {"error", "ignore", "always", "all", "default", "module", "once"}: + raise ValueError(f"invalid action: {action!r}") + if not isinstance(lineno, int): + raise TypeError("lineno must be an int") + if lineno < 0: + raise ValueError("lineno must be an int >= 0") + _wm._add_filter(action, None, category, None, lineno, append=append) + + +def _filters_mutated(): + # Even though this function is not part of the public API, it's used by + # a fair amount of user code. + with _wm._lock: + _wm._filters_mutated_lock_held() + + +def _add_filter(*item, append): + with _wm._lock: + filters = _wm._get_filters() + if not append: + # Remove possible duplicate filters, so new one will be placed + # in correct place. If append=True and duplicate exists, do nothing. + try: + filters.remove(item) + except ValueError: + pass + filters.insert(0, item) + else: + if item not in filters: + filters.append(item) + _wm._filters_mutated_lock_held() + + +def resetwarnings(): + """Clear the list of warning filters, so that no filters are active.""" + with _wm._lock: + del _wm._get_filters()[:] + _wm._filters_mutated_lock_held() + + +class _OptionError(Exception): + """Exception used by option processing helpers.""" + pass + + +# Helper to process -W options passed via sys.warnoptions +def _processoptions(args): + for arg in args: + try: + _wm._setoption(arg) + except _wm._OptionError as msg: + print("Invalid -W option ignored:", msg, file=sys.stderr) + + +# Helper for _processoptions() +def _setoption(arg): + parts = arg.split(':') + if len(parts) > 5: + raise _wm._OptionError("too many fields (max 5): %r" % (arg,)) + while len(parts) < 5: + parts.append('') + action, message, category, module, lineno = [s.strip() + for s in parts] + action = _wm._getaction(action) + category = _wm._getcategory(category) + if message or module: + import re + if message: + message = re.escape(message) + if module: + module = re.escape(module) + r'\z' + if lineno: + try: + lineno = int(lineno) + if lineno < 0: + raise ValueError + except (ValueError, OverflowError): + raise _wm._OptionError("invalid lineno %r" % (lineno,)) from None + else: + lineno = 0 + _wm.filterwarnings(action, message, category, module, lineno) + + +# Helper for _setoption() +def _getaction(action): + if not action: + return "default" + for a in ('default', 'always', 'all', 'ignore', 'module', 'once', 'error'): + if a.startswith(action): + return a + raise _wm._OptionError("invalid action: %r" % (action,)) + + +# Helper for _setoption() +def _getcategory(category): + if not category: + return Warning + if '.' not in category: + import builtins as m + klass = category + else: + module, _, klass = category.rpartition('.') + try: + m = __import__(module, None, None, [klass]) + except ImportError: + raise _wm._OptionError("invalid module name: %r" % (module,)) from None + try: + cat = getattr(m, klass) + except AttributeError: + raise _wm._OptionError("unknown warning category: %r" % (category,)) from None + if not issubclass(cat, Warning): + raise _wm._OptionError("invalid warning category: %r" % (category,)) + return cat + + +def _is_internal_filename(filename): + return 'importlib' in filename and '_bootstrap' in filename + + +def _is_filename_to_skip(filename, skip_file_prefixes): + return any(filename.startswith(prefix) for prefix in skip_file_prefixes) + + +def _is_internal_frame(frame): + """Signal whether the frame is an internal CPython implementation detail.""" + return _is_internal_filename(frame.f_code.co_filename) + + +def _next_external_frame(frame, skip_file_prefixes): + """Find the next frame that doesn't involve Python or user internals.""" + frame = frame.f_back + while frame is not None and ( + _is_internal_filename(filename := frame.f_code.co_filename) or + _is_filename_to_skip(filename, skip_file_prefixes)): + frame = frame.f_back + return frame + + +# Code typically replaced by _warnings +def warn(message, category=None, stacklevel=1, source=None, + *, skip_file_prefixes=()): + """Issue a warning, or maybe ignore it or raise an exception.""" + # Check if message is already a Warning object + if isinstance(message, Warning): + category = message.__class__ + # Check category argument + if category is None: + category = UserWarning + if not (isinstance(category, type) and issubclass(category, Warning)): + raise TypeError("category must be a Warning subclass, " + "not '{:s}'".format(type(category).__name__)) + if not isinstance(skip_file_prefixes, tuple): + # The C version demands a tuple for implementation performance. + raise TypeError('skip_file_prefixes must be a tuple of strs.') + if skip_file_prefixes: + stacklevel = max(2, stacklevel) + # Get context information + try: + if stacklevel <= 1 or _is_internal_frame(sys._getframe(1)): + # If frame is too small to care or if the warning originated in + # internal code, then do not try to hide any frames. + frame = sys._getframe(stacklevel) + else: + frame = sys._getframe(1) + # Look for one frame less since the above line starts us off. + for x in range(stacklevel-1): + frame = _next_external_frame(frame, skip_file_prefixes) + if frame is None: + raise ValueError + except ValueError: + globals = sys.__dict__ + filename = "" + lineno = 0 + else: + globals = frame.f_globals + filename = frame.f_code.co_filename + lineno = frame.f_lineno + if '__name__' in globals: + module = globals['__name__'] + else: + module = "" + registry = globals.setdefault("__warningregistry__", {}) + _wm.warn_explicit( + message, + category, + filename, + lineno, + module, + registry, + globals, + source=source, + ) + + +def warn_explicit(message, category, filename, lineno, + module=None, registry=None, module_globals=None, + source=None): + lineno = int(lineno) + if module is None: + module = filename or "" + if module[-3:].lower() == ".py": + module = module[:-3] # XXX What about leading pathname? + if isinstance(message, Warning): + text = str(message) + category = message.__class__ + else: + text = message + message = category(message) + key = (text, category, lineno) + with _wm._lock: + if registry is None: + registry = {} + if registry.get('version', 0) != _wm._filters_version: + registry.clear() + registry['version'] = _wm._filters_version + # Quick test for common case + if registry.get(key): + return + # Search the filters + for item in _wm._get_filters(): + action, msg, cat, mod, ln = item + if ((msg is None or msg.match(text)) and + issubclass(category, cat) and + (mod is None or mod.match(module)) and + (ln == 0 or lineno == ln)): + break + else: + action = _wm.defaultaction + # Early exit actions + if action == "ignore": + return + + if action == "error": + raise message + # Other actions + if action == "once": + registry[key] = 1 + oncekey = (text, category) + if _wm.onceregistry.get(oncekey): + return + _wm.onceregistry[oncekey] = 1 + elif action in {"always", "all"}: + pass + elif action == "module": + registry[key] = 1 + altkey = (text, category, 0) + if registry.get(altkey): + return + registry[altkey] = 1 + elif action == "default": + registry[key] = 1 + else: + # Unrecognized actions are errors + raise RuntimeError( + "Unrecognized action (%r) in warnings.filters:\n %s" % + (action, item)) + + # Prime the linecache for formatting, in case the + # "file" is actually in a zipfile or something. + import linecache + linecache.getlines(filename, module_globals) + + # Print message and context + msg = _wm.WarningMessage(message, category, filename, lineno, source=source) + _wm._showwarnmsg(msg) + + +class WarningMessage(object): + + _WARNING_DETAILS = ("message", "category", "filename", "lineno", "file", + "line", "source") + + def __init__(self, message, category, filename, lineno, file=None, + line=None, source=None): + self.message = message + self.category = category + self.filename = filename + self.lineno = lineno + self.file = file + self.line = line + self.source = source + self._category_name = category.__name__ if category else None + + def __str__(self): + return ("{message : %r, category : %r, filename : %r, lineno : %s, " + "line : %r}" % (self.message, self._category_name, + self.filename, self.lineno, self.line)) + + def __repr__(self): + return f'<{type(self).__qualname__} {self}>' + + +class catch_warnings(object): + + """A context manager that copies and restores the warnings filter upon + exiting the context. + + The 'record' argument specifies whether warnings should be captured by a + custom implementation of warnings.showwarning() and be appended to a list + returned by the context manager. Otherwise None is returned by the context + manager. The objects appended to the list are arguments whose attributes + mirror the arguments to showwarning(). + + The 'module' argument is to specify an alternative module to the module + named 'warnings' and imported under that name. This argument is only useful + when testing the warnings module itself. + + If the 'action' argument is not None, the remaining arguments are passed + to warnings.simplefilter() as if it were called immediately on entering the + context. + """ + + def __init__(self, *, record=False, module=None, + action=None, category=Warning, lineno=0, append=False): + """Specify whether to record warnings and if an alternative module + should be used other than sys.modules['warnings']. + + """ + self._record = record + self._module = sys.modules['warnings'] if module is None else module + self._entered = False + if action is None: + self._filter = None + else: + self._filter = (action, category, lineno, append) + + def __repr__(self): + args = [] + if self._record: + args.append("record=True") + if self._module is not sys.modules['warnings']: + args.append("module=%r" % self._module) + name = type(self).__name__ + return "%s(%s)" % (name, ", ".join(args)) + + def __enter__(self): + if self._entered: + raise RuntimeError("Cannot enter %r twice" % self) + self._entered = True + with _wm._lock: + if _use_context: + self._saved_context, context = self._module._new_context() + else: + context = None + self._filters = self._module.filters + self._module.filters = self._filters[:] + self._showwarning = self._module.showwarning + self._showwarnmsg_impl = self._module._showwarnmsg_impl + self._module._filters_mutated_lock_held() + if self._record: + if _use_context: + context.log = log = [] + else: + log = [] + self._module._showwarnmsg_impl = log.append + # Reset showwarning() to the default implementation to make sure + # that _showwarnmsg() calls _showwarnmsg_impl() + self._module.showwarning = self._module._showwarning_orig + else: + log = None + if self._filter is not None: + self._module.simplefilter(*self._filter) + return log + + def __exit__(self, *exc_info): + if not self._entered: + raise RuntimeError("Cannot exit %r without entering first" % self) + with _wm._lock: + if _use_context: + self._module._warnings_context.set(self._saved_context) + else: + self._module.filters = self._filters + self._module.showwarning = self._showwarning + self._module._showwarnmsg_impl = self._showwarnmsg_impl + self._module._filters_mutated_lock_held() + + +class deprecated: + """Indicate that a class, function or overload is deprecated. + + When this decorator is applied to an object, the type checker + will generate a diagnostic on usage of the deprecated object. + + Usage: + + @deprecated("Use B instead") + class A: + pass + + @deprecated("Use g instead") + def f(): + pass + + @overload + @deprecated("int support is deprecated") + def g(x: int) -> int: ... + @overload + def g(x: str) -> int: ... + + The warning specified by *category* will be emitted at runtime + on use of deprecated objects. For functions, that happens on calls; + for classes, on instantiation and on creation of subclasses. + If the *category* is ``None``, no warning is emitted at runtime. + The *stacklevel* determines where the + warning is emitted. If it is ``1`` (the default), the warning + is emitted at the direct caller of the deprecated object; if it + is higher, it is emitted further up the stack. + Static type checker behavior is not affected by the *category* + and *stacklevel* arguments. + + The deprecation message passed to the decorator is saved in the + ``__deprecated__`` attribute on the decorated object. + If applied to an overload, the decorator + must be after the ``@overload`` decorator for the attribute to + exist on the overload as returned by ``get_overloads()``. + + See PEP 702 for details. + + """ + def __init__( + self, + message: str, + /, + *, + category: type[Warning] | None = DeprecationWarning, + stacklevel: int = 1, + ) -> None: + if not isinstance(message, str): + raise TypeError( + f"Expected an object of type str for 'message', not {type(message).__name__!r}" + ) + self.message = message + self.category = category + self.stacklevel = stacklevel + + def __call__(self, arg, /): + # Make sure the inner functions created below don't + # retain a reference to self. + msg = self.message + category = self.category + stacklevel = self.stacklevel + if category is None: + arg.__deprecated__ = msg + return arg + elif isinstance(arg, type): + import functools + from types import MethodType + + original_new = arg.__new__ + + @functools.wraps(original_new) + def __new__(cls, /, *args, **kwargs): + if cls is arg: + _wm.warn(msg, category=category, stacklevel=stacklevel + 1) + if original_new is not object.__new__: + return original_new(cls, *args, **kwargs) + # Mirrors a similar check in object.__new__. + elif cls.__init__ is object.__init__ and (args or kwargs): + raise TypeError(f"{cls.__name__}() takes no arguments") + else: + return original_new(cls) + + arg.__new__ = staticmethod(__new__) + + if "__init_subclass__" in arg.__dict__: + # __init_subclass__ is directly present on the decorated class. + # Synthesize a wrapper that calls this method directly. + original_init_subclass = arg.__init_subclass__ + # We need slightly different behavior if __init_subclass__ + # is a bound method (likely if it was implemented in Python). + # Otherwise, it likely means it's a builtin such as + # object's implementation of __init_subclass__. + if isinstance(original_init_subclass, MethodType): + original_init_subclass = original_init_subclass.__func__ + + @functools.wraps(original_init_subclass) + def __init_subclass__(*args, **kwargs): + _wm.warn(msg, category=category, stacklevel=stacklevel + 1) + return original_init_subclass(*args, **kwargs) + else: + def __init_subclass__(cls, *args, **kwargs): + _wm.warn(msg, category=category, stacklevel=stacklevel + 1) + return super(arg, cls).__init_subclass__(*args, **kwargs) + + arg.__init_subclass__ = classmethod(__init_subclass__) + + arg.__deprecated__ = __new__.__deprecated__ = msg + __init_subclass__.__deprecated__ = msg + return arg + elif callable(arg): + import functools + import inspect + + @functools.wraps(arg) + def wrapper(*args, **kwargs): + _wm.warn(msg, category=category, stacklevel=stacklevel + 1) + return arg(*args, **kwargs) + + if inspect.iscoroutinefunction(arg): + wrapper = inspect.markcoroutinefunction(wrapper) + + arg.__deprecated__ = wrapper.__deprecated__ = msg + return wrapper + else: + raise TypeError( + "@deprecated decorator with non-None category must be applied to " + f"a class or callable, not {arg!r}" + ) + + +_DEPRECATED_MSG = "{name!r} is deprecated and slated for removal in Python {remove}" + + +def _deprecated(name, message=_DEPRECATED_MSG, *, remove, _version=sys.version_info): + """Warn that *name* is deprecated or should be removed. + + RuntimeError is raised if *remove* specifies a major/minor tuple older than + the current Python version or the same version but past the alpha. + + The *message* argument is formatted with *name* and *remove* as a Python + version tuple (e.g. (3, 11)). + + """ + remove_formatted = f"{remove[0]}.{remove[1]}" + if (_version[:2] > remove) or (_version[:2] == remove and _version[3] != "alpha"): + msg = f"{name!r} was slated for removal after Python {remove_formatted} alpha" + raise RuntimeError(msg) + else: + msg = message.format(name=name, remove=remove_formatted) + _wm.warn(msg, DeprecationWarning, stacklevel=3) + + +# Private utility function called by _PyErr_WarnUnawaitedCoroutine +def _warn_unawaited_coroutine(coro): + msg_lines = [ + f"coroutine '{coro.__qualname__}' was never awaited\n" + ] + if coro.cr_origin is not None: + import linecache, traceback + def extract(): + for filename, lineno, funcname in reversed(coro.cr_origin): + line = linecache.getline(filename, lineno) + yield (filename, lineno, funcname, line) + msg_lines.append("Coroutine created at (most recent call last)\n") + msg_lines += traceback.format_list(list(extract())) + msg = "".join(msg_lines).rstrip("\n") + # Passing source= here means that if the user happens to have tracemalloc + # enabled and tracking where the coroutine was created, the warning will + # contain that traceback. This does mean that if they have *both* + # coroutine origin tracking *and* tracemalloc enabled, they'll get two + # partially-redundant tracebacks. If we wanted to be clever we could + # probably detect this case and avoid it, but for now we don't bother. + _wm.warn( + msg, category=RuntimeWarning, stacklevel=2, source=coro + ) + + +def _setup_defaults(): + # Several warning categories are ignored by default in regular builds + if hasattr(sys, 'gettotalrefcount'): + return + _wm.filterwarnings("default", category=DeprecationWarning, module="__main__", append=1) + _wm.simplefilter("ignore", category=DeprecationWarning, append=1) + _wm.simplefilter("ignore", category=PendingDeprecationWarning, append=1) + _wm.simplefilter("ignore", category=ImportWarning, append=1) + _wm.simplefilter("ignore", category=ResourceWarning, append=1) diff --git a/Lib/_pycodecs.py b/Lib/_pycodecs.py index 0741504cc9e..98dec3c614d 100644 --- a/Lib/_pycodecs.py +++ b/Lib/_pycodecs.py @@ -22,10 +22,10 @@ The builtin Unicode codecs use the following interface: - _encode(Unicode_object[,errors='strict']) -> + _encode(Unicode_object[,errors='strict']) -> (string object, bytes consumed) - _decode(char_buffer_obj[,errors='strict']) -> + _decode(char_buffer_obj[,errors='strict']) -> (Unicode object, bytes consumed) _encode() interfaces also accept non-Unicode object as @@ -44,47 +44,76 @@ From PyPy v1.0.0 """ -#from unicodecodec import * - -__all__ = ['register', 'lookup', 'lookup_error', 'register_error', 'encode', 'decode', - 'latin_1_encode', 'mbcs_decode', 'readbuffer_encode', 'escape_encode', - 'utf_8_decode', 'raw_unicode_escape_decode', 'utf_7_decode', - 'unicode_escape_encode', 'latin_1_decode', 'utf_16_decode', - 'unicode_escape_decode', 'ascii_decode', 'charmap_encode', 'charmap_build', - 'unicode_internal_encode', 'unicode_internal_decode', 'utf_16_ex_decode', - 'escape_decode', 'charmap_decode', 'utf_7_encode', 'mbcs_encode', - 'ascii_encode', 'utf_16_encode', 'raw_unicode_escape_encode', 'utf_8_encode', - 'utf_16_le_encode', 'utf_16_be_encode', 'utf_16_le_decode', 'utf_16_be_decode',] +# from unicodecodec import * + +__all__ = [ + "register", + "lookup", + "lookup_error", + "register_error", + "encode", + "decode", + "latin_1_encode", + "mbcs_decode", + "readbuffer_encode", + "escape_encode", + "utf_8_decode", + "raw_unicode_escape_decode", + "utf_7_decode", + "unicode_escape_encode", + "latin_1_decode", + "utf_16_decode", + "unicode_escape_decode", + "ascii_decode", + "charmap_encode", + "charmap_build", + "unicode_internal_encode", + "unicode_internal_decode", + "utf_16_ex_decode", + "escape_decode", + "charmap_decode", + "utf_7_encode", + "mbcs_encode", + "ascii_encode", + "utf_16_encode", + "raw_unicode_escape_encode", + "utf_8_encode", + "utf_16_le_encode", + "utf_16_be_encode", + "utf_16_le_decode", + "utf_16_be_decode", + "utf_32_ex_decode", +] import sys import warnings from _codecs import * -def latin_1_encode( obj, errors='strict'): - """None - """ +def latin_1_encode(obj, errors="strict"): + """None""" res = PyUnicode_EncodeLatin1(obj, len(obj), errors) res = bytes(res) return res, len(obj) + + # XXX MBCS codec might involve ctypes ? def mbcs_decode(): - """None - """ + """None""" pass -def readbuffer_encode( obj, errors='strict'): - """None - """ + +def readbuffer_encode(obj, errors="strict"): + """None""" if isinstance(obj, str): res = obj.encode() else: res = bytes(obj) return res, len(obj) -def escape_encode( obj, errors='strict'): - """None - """ + +def escape_encode(obj, errors="strict"): + """None""" if not isinstance(obj, bytes): raise TypeError("must be bytes") s = repr(obj).encode() @@ -93,85 +122,88 @@ def escape_encode( obj, errors='strict'): v = v.replace(b"'", b"\\'").replace(b'\\"', b'"') return v, len(obj) -def raw_unicode_escape_decode( data, errors='strict', final=False): - """None - """ - res = PyUnicode_DecodeRawUnicodeEscape(data, len(data), errors, final) - res = ''.join(res) - return res, len(data) -def utf_7_decode( data, errors='strict'): - """None - """ - res = PyUnicode_DecodeUTF7(data, len(data), errors) - res = ''.join(res) - return res, len(data) +def raw_unicode_escape_decode(data, errors="strict", final=True): + """None""" + res, consumed = PyUnicode_DecodeRawUnicodeEscape(data, len(data), errors, final) + res = "".join(res) + return res, consumed -def unicode_escape_encode( obj, errors='strict'): - """None - """ + +def utf_7_decode(data, errors="strict", final=False): + """None""" + res, consumed = PyUnicode_DecodeUTF7(data, len(data), errors, final) + res = "".join(res) + return res, consumed + + +def unicode_escape_encode(obj, errors="strict"): + """None""" res = unicodeescape_string(obj, len(obj), 0) - res = b''.join(res) + res = b"".join(res) return res, len(obj) -def latin_1_decode( data, errors='strict'): - """None - """ + +def latin_1_decode(data, errors="strict"): + """None""" res = PyUnicode_DecodeLatin1(data, len(data), errors) - res = ''.join(res) + res = "".join(res) return res, len(data) -def utf_16_decode( data, errors='strict', final=False): - """None - """ + +def utf_16_decode(data, errors="strict", final=False): + """None""" consumed = len(data) if final: consumed = 0 - res, consumed, byteorder = PyUnicode_DecodeUTF16Stateful(data, len(data), errors, 'native', final) - res = ''.join(res) + res, consumed, byteorder = PyUnicode_DecodeUTF16Stateful( + data, len(data), errors, "native", final + ) + res = "".join(res) return res, consumed -def unicode_escape_decode( data, errors='strict', final=False): - """None - """ - res = PyUnicode_DecodeUnicodeEscape(data, len(data), errors, final) - res = ''.join(res) - return res, len(data) +def unicode_escape_decode(data, errors="strict", final=True): + """None""" + res, consumed = PyUnicode_DecodeUnicodeEscape(data, len(data), errors, final) + res = "".join(res) + return res, consumed -def ascii_decode( data, errors='strict'): - """None - """ + +def ascii_decode(data, errors="strict"): + """None""" res = PyUnicode_DecodeASCII(data, len(data), errors) - res = ''.join(res) + res = "".join(res) return res, len(data) -def charmap_encode(obj, errors='strict', mapping='latin-1'): - """None - """ + +def charmap_encode(obj, errors="strict", mapping="latin-1"): + """None""" res = PyUnicode_EncodeCharmap(obj, len(obj), mapping, errors) res = bytes(res) return res, len(obj) + def charmap_build(s): return {ord(c): i for i, c in enumerate(s)} + if sys.maxunicode == 65535: unicode_bytes = 2 else: unicode_bytes = 4 -def unicode_internal_encode( obj, errors='strict'): - """None - """ + +def unicode_internal_encode(obj, errors="strict"): + """None""" if type(obj) == str: p = bytearray() t = [ord(x) for x in obj] for i in t: b = bytearray() for j in range(unicode_bytes): - b.append(i%256) + b.append(i % 256) i >>= 8 if sys.byteorder == "big": b.reverse() @@ -179,12 +211,12 @@ def unicode_internal_encode( obj, errors='strict'): res = bytes(p) return res, len(res) else: - res = "You can do better than this" # XXX make this right + res = "You can do better than this" # XXX make this right return res, len(res) -def unicode_internal_decode( unistr, errors='strict'): - """None - """ + +def unicode_internal_decode(unistr, errors="strict"): + """None""" if type(unistr) == str: return unistr, len(unistr) else: @@ -198,165 +230,418 @@ def unicode_internal_decode( unistr, errors='strict'): start = 0 stop = unicode_bytes step = 1 - while i < len(unistr)-unicode_bytes+1: + while i < len(unistr) - unicode_bytes + 1: t = 0 h = 0 for j in range(start, stop, step): - t += ord(unistr[i+j])<<(h*8) + t += ord(unistr[i + j]) << (h * 8) h += 1 i += unicode_bytes p += chr(t) - res = ''.join(p) + res = "".join(p) return res, len(res) -def utf_16_ex_decode( data, errors='strict', byteorder=0, final=0): - """None - """ + +def utf_16_ex_decode(data, errors="strict", byteorder=0, final=0): + """None""" if byteorder == 0: - bm = 'native' + bm = "native" elif byteorder == -1: - bm = 'little' + bm = "little" else: - bm = 'big' + bm = "big" consumed = len(data) if final: consumed = 0 - res, consumed, byteorder = PyUnicode_DecodeUTF16Stateful(data, len(data), errors, bm, final) - res = ''.join(res) + res, consumed, byteorder = PyUnicode_DecodeUTF16Stateful( + data, len(data), errors, bm, final + ) + res = "".join(res) return res, consumed, byteorder -# XXX needs error messages when the input is invalid -def escape_decode(data, errors='strict'): - """None - """ + +def utf_32_ex_decode(data, errors="strict", byteorder=0, final=0): + """None""" + if byteorder == 0: + if len(data) < 4: + if final and len(data): + if sys.byteorder == "little": + bm = "little" + else: + bm = "big" + res, consumed, _ = PyUnicode_DecodeUTF32Stateful( + data, len(data), errors, bm, final + ) + return "".join(res), consumed, 0 + return "", 0, 0 + if data[0:4] == b"\xff\xfe\x00\x00": + res, consumed, _ = PyUnicode_DecodeUTF32Stateful( + data[4:], len(data) - 4, errors, "little", final + ) + return "".join(res), consumed + 4, -1 + if data[0:4] == b"\x00\x00\xfe\xff": + res, consumed, _ = PyUnicode_DecodeUTF32Stateful( + data[4:], len(data) - 4, errors, "big", final + ) + return "".join(res), consumed + 4, 1 + if sys.byteorder == "little": + bm = "little" + else: + bm = "big" + res, consumed, _ = PyUnicode_DecodeUTF32Stateful( + data, len(data), errors, bm, final + ) + return "".join(res), consumed, 0 + + if byteorder == -1: + res, consumed, _ = PyUnicode_DecodeUTF32Stateful( + data, len(data), errors, "little", final + ) + return "".join(res), consumed, -1 + + res, consumed, _ = PyUnicode_DecodeUTF32Stateful( + data, len(data), errors, "big", final + ) + return "".join(res), consumed, 1 + + +def _is_hex_digit(b): + return ( + 0x30 <= b <= 0x39 # 0-9 + or 0x41 <= b <= 0x46 # A-F + or 0x61 <= b <= 0x66 + ) # a-f + + +def escape_decode(data, errors="strict"): + if isinstance(data, str): + data = data.encode("latin-1") l = len(data) i = 0 res = bytearray() while i < l: - - if data[i] == '\\': + if data[i] == 0x5C: # '\\' i += 1 if i >= l: raise ValueError("Trailing \\ in string") - else: - if data[i] == '\\': - res += b'\\' - elif data[i] == 'n': - res += b'\n' - elif data[i] == 't': - res += b'\t' - elif data[i] == 'r': - res += b'\r' - elif data[i] == 'b': - res += b'\b' - elif data[i] == '\'': - res += b'\'' - elif data[i] == '\"': - res += b'\"' - elif data[i] == 'f': - res += b'\f' - elif data[i] == 'a': - res += b'\a' - elif data[i] == 'v': - res += b'\v' - elif '0' <= data[i] <= '9': - # emulate a strange wrap-around behavior of CPython: - # \400 is the same as \000 because 0400 == 256 - octal = data[i:i+3] - res.append(int(octal, 8) & 0xFF) - i += 2 - elif data[i] == 'x': - hexa = data[i+1:i+3] - res.append(int(hexa, 16)) + ch = data[i] + if ch == 0x5C: + res.append(0x5C) # \\ + elif ch == 0x27: + res.append(0x27) # \' + elif ch == 0x22: + res.append(0x22) # \" + elif ch == 0x61: + res.append(0x07) # \a + elif ch == 0x62: + res.append(0x08) # \b + elif ch == 0x66: + res.append(0x0C) # \f + elif ch == 0x6E: + res.append(0x0A) # \n + elif ch == 0x72: + res.append(0x0D) # \r + elif ch == 0x74: + res.append(0x09) # \t + elif ch == 0x76: + res.append(0x0B) # \v + elif ch == 0x0A: + pass # \ continuation + elif 0x30 <= ch <= 0x37: # \0-\7 octal + val = ch - 0x30 + if i + 1 < l and 0x30 <= data[i + 1] <= 0x37: + i += 1 + val = (val << 3) | (data[i] - 0x30) + if i + 1 < l and 0x30 <= data[i + 1] <= 0x37: + i += 1 + val = (val << 3) | (data[i] - 0x30) + res.append(val & 0xFF) + elif ch == 0x78: # \x hex + hex_count = 0 + for j in range(1, 3): + if i + j < l and _is_hex_digit(data[i + j]): + hex_count += 1 + else: + break + if hex_count < 2: + if errors == "strict": + raise ValueError("invalid \\x escape at position %d" % (i - 1)) + elif errors == "replace": + res.append(0x3F) # '?' + i += hex_count + else: + res.append(int(bytes(data[i + 1 : i + 3]), 16)) i += 2 + else: + import warnings + + warnings.warn( + '"\\%c" is an invalid escape sequence' % ch + if 0x20 <= ch < 0x7F + else '"\\x%02x" is an invalid escape sequence' % ch, + DeprecationWarning, + stacklevel=2, + ) + res.append(0x5C) + res.append(ch) else: res.append(data[i]) i += 1 - res = bytes(res) - return res, len(res) + return bytes(res), l + -def charmap_decode( data, errors='strict', mapping=None): - """None - """ +def charmap_decode(data, errors="strict", mapping=None): + """None""" res = PyUnicode_DecodeCharmap(data, len(data), mapping, errors) - res = ''.join(res) + res = "".join(res) return res, len(data) -def utf_7_encode( obj, errors='strict'): - """None - """ +def utf_7_encode(obj, errors="strict"): + """None""" res = PyUnicode_EncodeUTF7(obj, len(obj), 0, 0, errors) - res = b''.join(res) + res = b"".join(res) return res, len(obj) -def mbcs_encode( obj, errors='strict'): - """None - """ + +def mbcs_encode(obj, errors="strict"): + """None""" pass + + ## return (PyUnicode_EncodeMBCS( -## (obj), +## (obj), ## len(obj), ## errors), ## len(obj)) - -def ascii_encode( obj, errors='strict'): - """None - """ + +def ascii_encode(obj, errors="strict"): + """None""" res = PyUnicode_EncodeASCII(obj, len(obj), errors) res = bytes(res) return res, len(obj) -def utf_16_encode( obj, errors='strict'): - """None - """ - res = PyUnicode_EncodeUTF16(obj, len(obj), errors, 'native') + +def utf_16_encode(obj, errors="strict"): + """None""" + res = PyUnicode_EncodeUTF16(obj, len(obj), errors, "native") res = bytes(res) return res, len(obj) -def raw_unicode_escape_encode( obj, errors='strict'): - """None - """ + +def raw_unicode_escape_encode(obj, errors="strict"): + """None""" res = PyUnicode_EncodeRawUnicodeEscape(obj, len(obj)) res = bytes(res) return res, len(obj) -def utf_16_le_encode( obj, errors='strict'): - """None - """ - res = PyUnicode_EncodeUTF16(obj, len(obj), errors, 'little') + +def utf_16_le_encode(obj, errors="strict"): + """None""" + res = PyUnicode_EncodeUTF16(obj, len(obj), errors, "little") res = bytes(res) return res, len(obj) -def utf_16_be_encode( obj, errors='strict'): - """None - """ - res = PyUnicode_EncodeUTF16(obj, len(obj), errors, 'big') + +def utf_16_be_encode(obj, errors="strict"): + """None""" + res = PyUnicode_EncodeUTF16(obj, len(obj), errors, "big") res = bytes(res) return res, len(obj) -def utf_16_le_decode( data, errors='strict', byteorder=0, final = 0): - """None - """ - consumed = len(data) - if final: - consumed = 0 - res, consumed, byteorder = PyUnicode_DecodeUTF16Stateful(data, len(data), errors, 'little', final) - res = ''.join(res) + +def utf_16_le_decode(data, errors="strict", final=0): + res, consumed, byteorder = PyUnicode_DecodeUTF16Stateful( + data, len(data), errors, "little", final + ) + res = "".join(res) return res, consumed -def utf_16_be_decode( data, errors='strict', byteorder=0, final = 0): - """None - """ - consumed = len(data) - if final: - consumed = 0 - res, consumed, byteorder = PyUnicode_DecodeUTF16Stateful(data, len(data), errors, 'big', final) - res = ''.join(res) + +def utf_16_be_decode(data, errors="strict", final=0): + res, consumed, byteorder = PyUnicode_DecodeUTF16Stateful( + data, len(data), errors, "big", final + ) + res = "".join(res) return res, consumed +def STORECHAR32(ch, byteorder): + """Store a 32-bit character as 4 bytes in the specified byte order.""" + b0 = ch & 0xFF + b1 = (ch >> 8) & 0xFF + b2 = (ch >> 16) & 0xFF + b3 = (ch >> 24) & 0xFF + if byteorder == "little": + return [b0, b1, b2, b3] + else: # big-endian + return [b3, b2, b1, b0] + + +def PyUnicode_EncodeUTF32(s, size, errors, byteorder="little"): + """Encode a Unicode string to UTF-32.""" + p = [] + bom = sys.byteorder + + if byteorder == "native": + bom = sys.byteorder + # Add BOM for native encoding + p += STORECHAR32(0xFEFF, bom) + + if byteorder == "little": + bom = "little" + elif byteorder == "big": + bom = "big" + + pos = 0 + while pos < len(s): + ch = ord(s[pos]) + if 0xD800 <= ch <= 0xDFFF: + if errors == "surrogatepass": + p += STORECHAR32(ch, bom) + pos += 1 + else: + res, pos = unicode_call_errorhandler( + errors, "utf-32", "surrogates not allowed", s, pos, pos + 1, False + ) + for c in res: + p += STORECHAR32(ord(c), bom) + else: + p += STORECHAR32(ch, bom) + pos += 1 + + return p + + +def utf_32_encode(obj, errors="strict"): + """UTF-32 encoding with BOM.""" + encoded = PyUnicode_EncodeUTF32(obj, len(obj), errors, "native") + return bytes(encoded), len(obj) + + +def utf_32_le_encode(obj, errors="strict"): + """UTF-32 little-endian encoding without BOM.""" + encoded = PyUnicode_EncodeUTF32(obj, len(obj), errors, "little") + return bytes(encoded), len(obj) + + +def utf_32_be_encode(obj, errors="strict"): + """UTF-32 big-endian encoding without BOM.""" + res = PyUnicode_EncodeUTF32(obj, len(obj), errors, "big") + res = bytes(res) + return res, len(obj) + + +def PyUnicode_DecodeUTF32Stateful(data, size, errors, byteorder="little", final=0): + """Decode UTF-32 encoded bytes to Unicode string.""" + if size == 0: + return [], 0, 0 + + result = [] + pos = 0 + aligned_size = (size // 4) * 4 + + while pos + 3 < aligned_size: + if byteorder == "little": + ch = ( + data[pos] + | (data[pos + 1] << 8) + | (data[pos + 2] << 16) + | (data[pos + 3] << 24) + ) + else: # big-endian + ch = ( + (data[pos] << 24) + | (data[pos + 1] << 16) + | (data[pos + 2] << 8) + | data[pos + 3] + ) + + # Validate code point + if ch > 0x10FFFF: + if errors == "strict": + raise UnicodeDecodeError( + "utf-32", + bytes(data), + pos, + pos + 4, + "codepoint not in range(0x110000)", + ) + elif errors == "replace": + result.append("\ufffd") + # 'ignore' - skip this character + pos += 4 + elif 0xD800 <= ch <= 0xDFFF: + if errors == "surrogatepass": + result.append(chr(ch)) + pos += 4 + else: + msg = "code point in surrogate code point range(0xd800, 0xe000)" + res, pos = unicode_call_errorhandler( + errors, "utf-32", msg, data, pos, pos + 4, True + ) + result.append(res) + else: + result.append(chr(ch)) + pos += 4 + + # Handle trailing incomplete bytes + if pos < size: + if final: + res, pos = unicode_call_errorhandler( + errors, "utf-32", "truncated data", data, pos, size, True + ) + if res: + result.append(res) + + return result, pos, 0 + + +def utf_32_decode(data, errors="strict", final=0): + """UTF-32 decoding with BOM detection.""" + if len(data) >= 4: + # Check for BOM + if data[0:4] == b"\xff\xfe\x00\x00": + # UTF-32 LE BOM + res, consumed, _ = PyUnicode_DecodeUTF32Stateful( + data[4:], len(data) - 4, errors, "little", final + ) + res = "".join(res) + return res, consumed + 4 + elif data[0:4] == b"\x00\x00\xfe\xff": + # UTF-32 BE BOM + res, consumed, _ = PyUnicode_DecodeUTF32Stateful( + data[4:], len(data) - 4, errors, "big", final + ) + res = "".join(res) + return res, consumed + 4 + + # Default to little-endian if no BOM + byteorder = "little" if sys.byteorder == "little" else "big" + res, consumed, _ = PyUnicode_DecodeUTF32Stateful( + data, len(data), errors, byteorder, final + ) + res = "".join(res) + return res, consumed + + +def utf_32_le_decode(data, errors="strict", final=0): + """UTF-32 little-endian decoding without BOM.""" + res, consumed, _ = PyUnicode_DecodeUTF32Stateful( + data, len(data), errors, "little", final + ) + res = "".join(res) + return res, consumed + + +def utf_32_be_decode(data, errors="strict", final=0): + """UTF-32 big-endian decoding without BOM.""" + res, consumed, _ = PyUnicode_DecodeUTF32Stateful( + data, len(data), errors, "big", final + ) + res = "".join(res) + return res, consumed # ---------------------------------------------------------------------- @@ -364,9 +649,9 @@ def utf_16_be_decode( data, errors='strict', byteorder=0, final = 0): ##import sys ##""" Python implementation of CPythons builtin unicode codecs. ## -## Generally the functions in this module take a list of characters an returns +## Generally the functions in this module take a list of characters an returns ## a list of characters. -## +## ## For use in the PyPy project""" @@ -376,283 +661,496 @@ def utf_16_be_decode( data, errors='strict', byteorder=0, final = 0): ## 1 - special ## 2 - whitespace (optional) ## 3 - RFC2152 Set O (optional) - + utf7_special = [ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 3, 3, 3, 3, 3, 3, 0, 0, 0, 3, 1, 0, 0, 0, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, - 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1, 3, 3, 3, - 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 1, 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 2, + 2, + 1, + 1, + 2, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 2, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 3, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 3, + 3, + 3, + 0, + 3, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 1, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 3, + 3, + 1, + 1, ] -unicode_latin1 = [None]*256 +unicode_latin1 = [None] * 256 def SPECIAL(c, encodeO, encodeWS): c = ord(c) - return (c>127 or utf7_special[c] == 1) or \ - (encodeWS and (utf7_special[(c)] == 2)) or \ - (encodeO and (utf7_special[(c)] == 3)) + return ( + (c > 127 or utf7_special[c] == 1) + or (encodeWS and (utf7_special[(c)] == 2)) + or (encodeO and (utf7_special[(c)] == 3)) + ) + + def B64(n): - return bytes([b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(n) & 0x3f]]) + return bytes( + [ + b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[ + (n) & 0x3F + ] + ] + ) + + def B64CHAR(c): - return (c.isalnum() or (c) == b'+' or (c) == b'/') + return c.isalnum() or (c) == b"+" or (c) == b"/" + + def UB64(c): - if (c) == b'+' : - return 62 - elif (c) == b'/': - return 63 - elif (c) >= b'a': - return ord(c) - 71 - elif (c) >= b'A': - return ord(c) - 65 - else: + if (c) == b"+": + return 62 + elif (c) == b"/": + return 63 + elif (c) >= b"a": + return ord(c) - 71 + elif (c) >= b"A": + return ord(c) - 65 + else: return ord(c) + 4 -def ENCODE( ch, bits) : + +def ENCODE(ch, bits): out = [] - while (bits >= 6): - out += B64(ch >> (bits-6)) - bits -= 6 + while bits >= 6: + out += B64(ch >> (bits - 6)) + bits -= 6 return out, bits -def PyUnicode_DecodeUTF7(s, size, errors): - starts = s - errmsg = "" - inShift = 0 - bitsleft = 0 - charsleft = 0 - surrogate = 0 - p = [] - errorHandler = None - exc = None +def _IS_BASE64(ch): + return ( + (ord("A") <= ch <= ord("Z")) + or (ord("a") <= ch <= ord("z")) + or (ord("0") <= ch <= ord("9")) + or ch == ord("+") + or ch == ord("/") + ) - if (size == 0): - return '' + +def _FROM_BASE64(ch): + if ch == ord("+"): + return 62 + if ch == ord("/"): + return 63 + if ch >= ord("a"): + return ch - 71 + if ch >= ord("A"): + return ch - 65 + if ch >= ord("0"): + return ch - ord("0") + 52 + return -1 + + +def _DECODE_DIRECT(ch): + return ch <= 127 and ch != ord("+") + + +def PyUnicode_DecodeUTF7(s, size, errors, final=False): + if size == 0: + return [], 0 + + p = [] + inShift = False + base64bits = 0 + base64buffer = 0 + surrogate = 0 + startinpos = 0 + shiftOutStart = 0 i = 0 + while i < size: - - ch = bytes([s[i]]) - if (inShift): - if ((ch == b'-') or not B64CHAR(ch)): - inShift = 0 + ch = s[i] + if inShift: + if _IS_BASE64(ch): + base64buffer = (base64buffer << 6) | _FROM_BASE64(ch) + base64bits += 6 i += 1 - - while (bitsleft >= 16): - outCh = ((charsleft) >> (bitsleft-16)) & 0xffff - bitsleft -= 16 - - if (surrogate): - ## We have already generated an error for the high surrogate - ## so let's not bother seeing if the low surrogate is correct or not - surrogate = 0 - elif (0xDC00 <= (outCh) and (outCh) <= 0xDFFF): - ## This is a surrogate pair. Unfortunately we can't represent - ## it in a 16-bit character - surrogate = 1 - msg = "code pairs are not supported" - out, x = unicode_call_errorhandler(errors, 'utf-7', msg, s, i-1, i) - p.append(out) - bitsleft = 0 - break + if base64bits >= 16: + outCh = (base64buffer >> (base64bits - 16)) & 0xFFFF + base64bits -= 16 + base64buffer &= (1 << base64bits) - 1 + if surrogate: + if 0xDC00 <= outCh <= 0xDFFF: + ch2 = ( + 0x10000 + + ((surrogate - 0xD800) << 10) + + (outCh - 0xDC00) + ) + p.append(chr(ch2)) + surrogate = 0 + continue + else: + p.append(chr(surrogate)) + surrogate = 0 + if 0xD800 <= outCh <= 0xDBFF: + surrogate = outCh else: - p.append(chr(outCh )) - #p += out - if (bitsleft >= 6): -## /* The shift sequence has a partial character in it. If -## bitsleft < 6 then we could just classify it as padding -## but that is not the case here */ - msg = "partial character in shift sequence" - out, x = unicode_call_errorhandler(errors, 'utf-7', msg, s, i-1, i) - -## /* According to RFC2152 the remaining bits should be zero. We -## choose to signal an error/insert a replacement character -## here so indicate the potential of a misencoded character. */ - -## /* On x86, a << b == a << (b%32) so make sure that bitsleft != 0 */ -## if (bitsleft and (charsleft << (sizeof(charsleft) * 8 - bitsleft))): -## raise UnicodeDecodeError, "non-zero padding bits in shift sequence" - if (ch == b'-') : - if ((i < size) and (s[i] == '-')) : - p += '-' - inShift = 1 - - elif SPECIAL(ch, 0, 0) : - raise UnicodeDecodeError("unexpected special character") - - else: - p.append(chr(ord(ch))) + p.append(chr(outCh)) else: - charsleft = (charsleft << 6) | UB64(ch) - bitsleft += 6 - i += 1 -## /* p, charsleft, bitsleft, surrogate = */ DECODE(p, charsleft, bitsleft, surrogate); - elif ( ch == b'+' ): + inShift = False + if base64bits > 0: + if base64bits >= 6: + i += 1 + errmsg = "partial character in shift sequence" + out, i = unicode_call_errorhandler( + errors, "utf-7", errmsg, s, startinpos, i + ) + p.append(out) + continue + else: + if base64buffer != 0: + i += 1 + errmsg = "non-zero padding bits in shift sequence" + out, i = unicode_call_errorhandler( + errors, "utf-7", errmsg, s, startinpos, i + ) + p.append(out) + continue + if surrogate and _DECODE_DIRECT(ch): + p.append(chr(surrogate)) + surrogate = 0 + if ch == ord("-"): + i += 1 + elif ch == ord("+"): startinpos = i i += 1 - if (i= 6 or (base64bits > 0 and base64buffer != 0): + errmsg = "unterminated shift sequence" + out, i = unicode_call_errorhandler( + errors, "utf-7", errmsg, s, startinpos, size + ) + p.append(out) + + return p, size + + +def _ENCODE_DIRECT(ch, encodeSetO, encodeWhiteSpace): + c = ord(ch) if isinstance(ch, str) else ch + if c > 127: + return False + if utf7_special[c] == 0: + return True + if utf7_special[c] == 2: + return not encodeWhiteSpace + if utf7_special[c] == 3: + return not encodeSetO + return False - if (inShift) : - #XXX This aint right - endinpos = size - raise UnicodeDecodeError("unterminated shift sequence") - - return p def PyUnicode_EncodeUTF7(s, size, encodeSetO, encodeWhiteSpace, errors): - -# /* It might be possible to tighten this worst case */ inShift = False - i = 0 - bitsleft = 0 - charsleft = 0 + base64bits = 0 + base64buffer = 0 out = [] - for ch in s: - if (not inShift) : - if (ch == '+'): - out.append(b'+-') - elif (SPECIAL(ch, encodeSetO, encodeWhiteSpace)): - charsleft = ord(ch) - bitsleft = 16 - out.append(b'+') - p, bitsleft = ENCODE( charsleft, bitsleft) - out.append(p) - inShift = bitsleft > 0 + + for i, ch in enumerate(s): + ch_ord = ord(ch) + if inShift: + if _ENCODE_DIRECT(ch, encodeSetO, encodeWhiteSpace): + # shifting out + if base64bits: + out.append(B64(base64buffer << (6 - base64bits))) + base64buffer = 0 + base64bits = 0 + inShift = False + if B64CHAR(ch) or ch == "-": + out.append(b"-") + out.append(bytes([ch_ord])) else: - out.append(bytes([ord(ch)])) + # encode character in base64 + if ch_ord >= 0x10000: + # split into surrogate pair + hi = 0xD800 | ((ch_ord - 0x10000) >> 10) + lo = 0xDC00 | ((ch_ord - 0x10000) & 0x3FF) + base64bits += 16 + base64buffer = (base64buffer << 16) | hi + while base64bits >= 6: + out.append(B64(base64buffer >> (base64bits - 6))) + base64bits -= 6 + base64buffer &= (1 << base64bits) - 1 if base64bits else 0 + ch_ord = lo + + base64bits += 16 + base64buffer = (base64buffer << 16) | ch_ord + while base64bits >= 6: + out.append(B64(base64buffer >> (base64bits - 6))) + base64bits -= 6 + base64buffer &= (1 << base64bits) - 1 if base64bits else 0 else: - if (not SPECIAL(ch, encodeSetO, encodeWhiteSpace)): - out.append(B64((charsleft) << (6-bitsleft))) - charsleft = 0 - bitsleft = 0 -## /* Characters not in the BASE64 set implicitly unshift the sequence -## so no '-' is required, except if the character is itself a '-' */ - if (B64CHAR(ch) or ch == '-'): - out.append(b'-') - inShift = False - out.append(bytes([ord(ch)])) + if ch == "+": + out.append(b"+-") + elif _ENCODE_DIRECT(ch, encodeSetO, encodeWhiteSpace): + out.append(bytes([ch_ord])) else: - bitsleft += 16 - charsleft = (((charsleft) << 16) | ord(ch)) - p, bitsleft = ENCODE(charsleft, bitsleft) - out.append(p) -## /* If the next character is special then we dont' need to terminate -## the shift sequence. If the next character is not a BASE64 character -## or '-' then the shift sequence will be terminated implicitly and we -## don't have to insert a '-'. */ - - if (bitsleft == 0): - if (i + 1 < size): - ch2 = s[i+1] - - if (SPECIAL(ch2, encodeSetO, encodeWhiteSpace)): - pass - elif (B64CHAR(ch2) or ch2 == '-'): - out.append(b'-') - inShift = False - else: + out.append(b"+") + inShift = True + # encode character in base64 + if ch_ord >= 0x10000: + hi = 0xD800 | ((ch_ord - 0x10000) >> 10) + lo = 0xDC00 | ((ch_ord - 0x10000) & 0x3FF) + base64bits += 16 + base64buffer = (base64buffer << 16) | hi + while base64bits >= 6: + out.append(B64(base64buffer >> (base64bits - 6))) + base64bits -= 6 + base64buffer &= (1 << base64bits) - 1 if base64bits else 0 + ch_ord = lo + + base64bits += 16 + base64buffer = (base64buffer << 16) | ch_ord + while base64bits >= 6: + out.append(B64(base64buffer >> (base64bits - 6))) + base64bits -= 6 + base64buffer &= (1 << base64bits) - 1 if base64bits else 0 + + if base64bits == 0: + if i + 1 < size: + ch2 = s[i + 1] + if _ENCODE_DIRECT(ch2, encodeSetO, encodeWhiteSpace): + if B64CHAR(ch2) or ch2 == "-": + out.append(b"-") inShift = False else: - out.append(b'-') + out.append(b"-") inShift = False - i += 1 - - if (bitsleft): - out.append(B64(charsleft << (6-bitsleft) ) ) - out.append(b'-') + + if base64bits: + out.append(B64(base64buffer << (6 - base64bits))) + if inShift: + out.append(b"-") return out -unicode_empty = '' -def unicodeescape_string(s, size, quotes): +unicode_empty = "" + +def unicodeescape_string(s, size, quotes): p = [] - if (quotes) : - if (s.find('\'') != -1 and s.find('"') == -1): + if quotes: + if s.find("'") != -1 and s.find('"') == -1: p.append(b'"') else: - p.append(b'\'') + p.append(b"'") pos = 0 - while (pos < size): + while pos < size: ch = s[pos] - #/* Escape quotes */ - if (quotes and (ch == p[1] or ch == '\\')): - p.append(b'\\%c' % ord(ch)) + # /* Escape quotes */ + if quotes and (ch == p[1] or ch == "\\"): + p.append(b"\\%c" % ord(ch)) pos += 1 continue -#ifdef Py_UNICODE_WIDE - #/* Map 21-bit characters to '\U00xxxxxx' */ - elif (ord(ch) >= 0x10000): - p.append(b'\\U%08x' % ord(ch)) + # ifdef Py_UNICODE_WIDE + # /* Map 21-bit characters to '\U00xxxxxx' */ + elif ord(ch) >= 0x10000: + p.append(b"\\U%08x" % ord(ch)) pos += 1 - continue -#endif - #/* Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes */ - elif (ord(ch) >= 0xD800 and ord(ch) < 0xDC00): + continue + # endif + # /* Map UTF-16 surrogate pairs to Unicode \UXXXXXXXX escapes */ + elif ord(ch) >= 0xD800 and ord(ch) < 0xDC00: pos += 1 ch2 = s[pos] - - if (ord(ch2) >= 0xDC00 and ord(ch2) <= 0xDFFF): + + if ord(ch2) >= 0xDC00 and ord(ch2) <= 0xDFFF: ucs = (((ord(ch) & 0x03FF) << 10) | (ord(ch2) & 0x03FF)) + 0x00010000 - p.append(b'\\U%08x' % ucs) + p.append(b"\\U%08x" % ucs) pos += 1 continue - - #/* Fall through: isolated surrogates are copied as-is */ + + # /* Fall through: isolated surrogates are copied as-is */ pos -= 1 - - #/* Map 16-bit characters to '\uxxxx' */ - if (ord(ch) >= 256): - p.append(b'\\u%04x' % ord(ch)) - - #/* Map special whitespace to '\t', \n', '\r' */ - elif (ch == '\t'): - p.append(b'\\t') - - elif (ch == '\n'): - p.append(b'\\n') - - elif (ch == '\r'): - p.append(b'\\r') - - elif (ch == '\\'): - p.append(b'\\\\') - - #/* Map non-printable US ASCII to '\xhh' */ - elif (ch < ' ' or ch >= chr(0x7F)) : - p.append(b'\\x%02x' % ord(ch)) - #/* Copy everything else as-is */ + + # /* Map 16-bit characters to '\uxxxx' */ + if ord(ch) >= 256: + p.append(b"\\u%04x" % ord(ch)) + + # /* Map special whitespace to '\t', \n', '\r' */ + elif ch == "\t": + p.append(b"\\t") + + elif ch == "\n": + p.append(b"\\n") + + elif ch == "\r": + p.append(b"\\r") + + elif ch == "\\": + p.append(b"\\\\") + + # /* Map non-printable US ASCII to '\xhh' */ + elif ch < " " or ch >= chr(0x7F): + p.append(b"\\x%02x" % ord(ch)) + # /* Copy everything else as-is */ else: p.append(bytes([ord(ch)])) pos += 1 - if (quotes): + if quotes: p.append(p[0]) return p -def PyUnicode_DecodeASCII(s, size, errors): -# /* ASCII is equivalent to the first 128 ordinals in Unicode. */ - if (size == 1 and ord(s) < 128) : +def PyUnicode_DecodeASCII(s, size, errors): + # /* ASCII is equivalent to the first 128 ordinals in Unicode. */ + if size == 1 and ord(s) < 128: return [chr(ord(s))] - if (size == 0): - return [''] #unicode('') + if size == 0: + return [""] # unicode('') p = [] pos = 0 while pos < len(s): @@ -661,54 +1159,50 @@ def PyUnicode_DecodeASCII(s, size, errors): p += chr(c) pos += 1 else: - res = unicode_call_errorhandler( - errors, "ascii", "ordinal not in range(128)", - s, pos, pos+1) + errors, "ascii", "ordinal not in range(128)", s, pos, pos + 1 + ) p += res[0] pos = res[1] return p -def PyUnicode_EncodeASCII(p, size, errors): +def PyUnicode_EncodeASCII(p, size, errors): return unicode_encode_ucs1(p, size, errors, 128) -def PyUnicode_AsASCIIString(unistr): +def PyUnicode_AsASCIIString(unistr): if not type(unistr) == str: raise TypeError - return PyUnicode_EncodeASCII(str(unistr), - len(str), - None) + return PyUnicode_EncodeASCII(unistr, len(unistr), None) -def PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder='native', final=True): - bo = 0 #/* assume native ordering by default */ +def PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder="native", final=True): + bo = 0 # /* assume native ordering by default */ consumed = 0 errmsg = "" - if sys.byteorder == 'little': + if sys.byteorder == "little": ihi = 1 ilo = 0 else: ihi = 0 ilo = 1 - - #/* Unpack UTF-16 encoded data */ + # /* Unpack UTF-16 encoded data */ -## /* Check for BOM marks (U+FEFF) in the input and adjust current -## byte order setting accordingly. In native mode, the leading BOM -## mark is skipped, in all other modes, it is copied to the output -## stream as-is (giving a ZWNBSP character). */ + ## /* Check for BOM marks (U+FEFF) in the input and adjust current + ## byte order setting accordingly. In native mode, the leading BOM + ## mark is skipped, in all other modes, it is copied to the output + ## stream as-is (giving a ZWNBSP character). */ q = 0 p = [] - if byteorder == 'native': - if (size >= 2): + if byteorder == "native": + if size >= 2: bom = (s[ihi] << 8) | s[ilo] -#ifdef BYTEORDER_IS_LITTLE_ENDIAN - if sys.byteorder == 'little': - if (bom == 0xFEFF): + # ifdef BYTEORDER_IS_LITTLE_ENDIAN + if sys.byteorder == "little": + if bom == 0xFEFF: q += 2 bo = -1 elif bom == 0xFFFE: @@ -721,118 +1215,143 @@ def PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder='native', final=Tru elif bom == 0xFFFE: q += 2 bo = -1 - elif byteorder == 'little': + elif byteorder == "little": bo = -1 else: bo = 1 - - if (size == 0): - return [''], 0, bo - - if (bo == -1): - #/* force LE */ + + if size == 0: + return [""], 0, bo + + if bo == -1: + # /* force LE */ ihi = 1 ilo = 0 - elif (bo == 1): - #/* force BE */ + elif bo == 1: + # /* force BE */ ihi = 0 ilo = 1 - while (q < len(s)): - - #/* remaining bytes at the end? (size should be even) */ - if (len(s)-q<2): + while q < len(s): + # /* remaining bytes at the end? (size should be even) */ + if len(s) - q < 2: if not final: break - errmsg = "truncated data" - startinpos = q - endinpos = len(s) - unicode_call_errorhandler(errors, 'utf-16', errmsg, s, startinpos, endinpos, True) -# /* The remaining input chars are ignored if the callback -## chooses to skip the input */ - - ch = (s[q+ihi] << 8) | s[q+ilo] - q += 2 - - if (ch < 0xD800 or ch > 0xDFFF): + res, q = unicode_call_errorhandler( + errors, "utf-16", "truncated data", s, q, len(s), True + ) + p.append(res) + break + + ch = (s[q + ihi] << 8) | s[q + ilo] + + if ch < 0xD800 or ch > 0xDFFF: p.append(chr(ch)) - continue - - #/* UTF-16 code pair: */ - if (q >= len(s)): - errmsg = "unexpected end of data" - startinpos = q-2 - endinpos = len(s) - unicode_call_errorhandler(errors, 'utf-16', errmsg, s, startinpos, endinpos, True) - - if (0xD800 <= ch and ch <= 0xDBFF): - ch2 = (s[q+ihi] << 8) | s[q+ilo] q += 2 - if (0xDC00 <= ch2 and ch2 <= 0xDFFF): - #ifndef Py_UNICODE_WIDE - if sys.maxunicode < 65536: - p += [chr(ch), chr(ch2)] + continue + + # /* UTF-16 code pair: high surrogate */ + if 0xD800 <= ch <= 0xDBFF: + if q + 4 <= len(s): + ch2 = (s[q + 2 + ihi] << 8) | s[q + 2 + ilo] + if 0xDC00 <= ch2 <= 0xDFFF: + # Valid surrogate pair - always assemble + p.append(chr((((ch & 0x3FF) << 10) | (ch2 & 0x3FF)) + 0x10000)) + q += 4 + continue else: - p.append(chr((((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000)) - #endif + # High surrogate followed by non-low-surrogate + if errors == "surrogatepass": + p.append(chr(ch)) + q += 2 + continue + res, q = unicode_call_errorhandler( + errors, "utf-16", "illegal UTF-16 surrogate", s, q, q + 2, True + ) + p.append(res) + else: + # High surrogate at end of data + if not final: + break + if errors == "surrogatepass": + p.append(chr(ch)) + q += 2 + continue + res, q = unicode_call_errorhandler( + errors, "utf-16", "unexpected end of data", s, q, len(s), True + ) + p.append(res) + else: + # Low surrogate without preceding high surrogate + if errors == "surrogatepass": + p.append(chr(ch)) + q += 2 continue + res, q = unicode_call_errorhandler( + errors, "utf-16", "illegal encoding", s, q, q + 2, True + ) + p.append(res) - else: - errmsg = "illegal UTF-16 surrogate" - startinpos = q-4 - endinpos = startinpos+2 - unicode_call_errorhandler(errors, 'utf-16', errmsg, s, startinpos, endinpos, True) - - errmsg = "illegal encoding" - startinpos = q-2 - endinpos = startinpos+2 - unicode_call_errorhandler(errors, 'utf-16', errmsg, s, startinpos, endinpos, True) - return p, q, bo + # moved out of local scope, especially because it didn't # have any nested variables. + def STORECHAR(CH, byteorder): - hi = (CH >> 8) & 0xff - lo = CH & 0xff - if byteorder == 'little': + hi = (CH >> 8) & 0xFF + lo = CH & 0xFF + if byteorder == "little": return [lo, hi] else: return [hi, lo] -def PyUnicode_EncodeUTF16(s, size, errors, byteorder='little'): -# /* Offsets from p for storing byte pairs in the right order. */ +def PyUnicode_EncodeUTF16(s, size, errors, byteorder="little"): + # /* Offsets from p for storing byte pairs in the right order. */ - p = [] bom = sys.byteorder - if (byteorder == 'native'): - + if byteorder == "native": bom = sys.byteorder p += STORECHAR(0xFEFF, bom) - - if (size == 0): - return "" - if (byteorder == 'little' ): - bom = 'little' - elif (byteorder == 'big'): - bom = 'big' + if byteorder == "little": + bom = "little" + elif byteorder == "big": + bom = "big" - - for c in s: - ch = ord(c) - ch2 = 0 - if (ch >= 0x10000) : - ch2 = 0xDC00 | ((ch-0x10000) & 0x3FF) - ch = 0xD800 | ((ch-0x10000) >> 10) - - p += STORECHAR(ch, bom) - if (ch2): - p += STORECHAR(ch2, bom) + pos = 0 + while pos < len(s): + ch = ord(s[pos]) + if 0xD800 <= ch <= 0xDFFF: + if errors == "surrogatepass": + p += STORECHAR(ch, bom) + pos += 1 + else: + res, pos = unicode_call_errorhandler( + errors, "utf-16", "surrogates not allowed", s, pos, pos + 1, False + ) + for c in res: + cp = ord(c) + cp2 = 0 + if cp >= 0x10000: + cp2 = 0xDC00 | ((cp - 0x10000) & 0x3FF) + cp = 0xD800 | ((cp - 0x10000) >> 10) + p += STORECHAR(cp, bom) + if cp2: + p += STORECHAR(cp2, bom) + else: + ch2 = 0 + if ch >= 0x10000: + ch2 = 0xDC00 | ((ch - 0x10000) & 0x3FF) + ch = 0xD800 | ((ch - 0x10000) >> 10) + p += STORECHAR(ch, bom) + if ch2: + p += STORECHAR(ch2, bom) + pos += 1 return p @@ -840,119 +1359,149 @@ def PyUnicode_EncodeUTF16(s, size, errors, byteorder='little'): def PyUnicode_DecodeMBCS(s, size, errors): pass + def PyUnicode_EncodeMBCS(p, size, errors): pass -def unicode_call_errorhandler(errors, encoding, - reason, input, startinpos, endinpos, decode=True): - + +def unicode_call_errorhandler( + errors, encoding, reason, input, startinpos, endinpos, decode=True +): errorHandler = lookup_error(errors) if decode: - exceptionObject = UnicodeDecodeError(encoding, input, startinpos, endinpos, reason) + exceptionObject = UnicodeDecodeError( + encoding, input, startinpos, endinpos, reason + ) else: - exceptionObject = UnicodeEncodeError(encoding, input, startinpos, endinpos, reason) + exceptionObject = UnicodeEncodeError( + encoding, input, startinpos, endinpos, reason + ) res = errorHandler(exceptionObject) - if isinstance(res, tuple) and isinstance(res[0], str) and isinstance(res[1], int): + if ( + isinstance(res, tuple) + and isinstance(res[0], (str, bytes)) + and isinstance(res[1], int) + ): newpos = res[1] - if (newpos < 0): + if newpos < 0: newpos = len(input) + newpos if newpos < 0 or newpos > len(input): - raise IndexError( "position %d from error handler out of bounds" % newpos) + raise IndexError("position %d from error handler out of bounds" % newpos) return res[0], newpos else: - raise TypeError("encoding error handler must return (unicode, int) tuple, not %s" % repr(res)) + raise TypeError( + "encoding error handler must return (unicode, int) tuple, not %s" + % repr(res) + ) + + +# /* --- Latin-1 Codec ------------------------------------------------------ */ -#/* --- Latin-1 Codec ------------------------------------------------------ */ def PyUnicode_DecodeLatin1(s, size, errors): - #/* Latin-1 is equivalent to the first 256 ordinals in Unicode. */ -## if (size == 1): -## return [PyUnicode_FromUnicode(s, 1)] + # /* Latin-1 is equivalent to the first 256 ordinals in Unicode. */ + ## if (size == 1): + ## return [PyUnicode_FromUnicode(s, 1)] pos = 0 p = [] - while (pos < size): + while pos < size: p += chr(s[pos]) pos += 1 return p + def unicode_encode_ucs1(p, size, errors, limit): - if limit == 256: reason = "ordinal not in range(256)" encoding = "latin-1" else: reason = "ordinal not in range(128)" encoding = "ascii" - - if (size == 0): + + if size == 0: return [] res = bytearray() pos = 0 while pos < len(p): - #for ch in p: + # for ch in p: ch = p[pos] - + if ord(ch) < limit: res.append(ord(ch)) pos += 1 else: - #/* startpos for collecting unencodable chars */ - collstart = pos - collend = pos+1 + # /* startpos for collecting unencodable chars */ + collstart = pos + collend = pos + 1 while collend < len(p) and ord(p[collend]) >= limit: collend += 1 - x = unicode_call_errorhandler(errors, encoding, reason, p, collstart, collend, False) - res += x[0].encode() + x = unicode_call_errorhandler( + errors, encoding, reason, p, collstart, collend, False + ) + replacement = x[0] + if isinstance(replacement, bytes): + res += replacement + else: + res += replacement.encode() pos = x[1] - + return res + def PyUnicode_EncodeLatin1(p, size, errors): res = unicode_encode_ucs1(p, size, errors, 256) return res -hexdigits = [ord(hex(i)[-1]) for i in range(16)]+[ord(hex(i)[-1].upper()) for i in range(10, 16)] + +hexdigits = [ord(hex(i)[-1]) for i in range(16)] + [ + ord(hex(i)[-1].upper()) for i in range(10, 16) +] + def hex_number_end(s, pos, digits): target_end = pos + digits - while pos < target_end and pos < len(s) and s[pos] in hexdigits: + while pos < target_end and pos < len(s) and s[pos] in hexdigits: pos += 1 return pos + def hexescape(s, pos, digits, message, errors): ch = 0 p = [] number_end = hex_number_end(s, pos, digits) if number_end - pos != digits: - x = unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-2, number_end) + x = unicode_call_errorhandler( + errors, "unicodeescape", message, s, pos - 2, number_end + ) p.append(x[0]) pos = x[1] else: - ch = int(s[pos:pos+digits], 16) - #/* when we get here, ch is a 32-bit unicode character */ + ch = int(s[pos : pos + digits], 16) + # /* when we get here, ch is a 32-bit unicode character */ if ch <= sys.maxunicode: p.append(chr(ch)) pos += digits - elif (ch <= 0x10ffff): + elif ch <= 0x10FFFF: ch -= 0x10000 p.append(chr(0xD800 + (ch >> 10))) - p.append(chr(0xDC00 + (ch & 0x03FF))) + p.append(chr(0xDC00 + (ch & 0x03FF))) pos += digits else: message = "illegal Unicode character" - x = unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-2, - pos+digits) + x = unicode_call_errorhandler( + errors, "unicodeescape", message, s, pos - 2, pos + digits + ) p.append(x[0]) pos = x[1] res = p return res, pos + def PyUnicode_DecodeUnicodeEscape(s, size, errors, final): + if size == 0: + return "", 0 - if (size == 0): - return '' - if isinstance(s, str): s = s.encode() @@ -960,279 +1509,331 @@ def PyUnicode_DecodeUnicodeEscape(s, size, errors, final): p = [] pos = 0 - while (pos < size): -## /* Non-escape characters are interpreted as Unicode ordinals */ - if (chr(s[pos]) != '\\') : + while pos < size: + ## /* Non-escape characters are interpreted as Unicode ordinals */ + if s[pos] != ord("\\"): p.append(chr(s[pos])) pos += 1 continue -## /* \ - Escapes */ - else: - pos += 1 - if pos >= len(s): - errmessage = "\\ at end of string" - unicode_call_errorhandler(errors, "unicodeescape", errmessage, s, pos-1, size) - ch = chr(s[pos]) - pos += 1 - ## /* \x escapes */ - if ch == '\n': pass - elif ch == '\\': p += '\\' - elif ch == '\'': p += '\'' - elif ch == '\"': p += '\"' - elif ch == 'b' : p += '\b' - elif ch == 'f' : p += '\014' #/* FF */ - elif ch == 't' : p += '\t' - elif ch == 'n' : p += '\n' - elif ch == 'r' : p += '\r' - elif ch == 'v' : p += '\013' #break; /* VT */ - elif ch == 'a' : p += '\007' # break; /* BEL, not classic C */ - elif '0' <= ch <= '7': - x = ord(ch) - ord('0') - if pos < size: - ch = chr(s[pos]) - if '0' <= ch <= '7': - pos += 1 - x = (x<<3) + ord(ch) - ord('0') - if pos < size: - ch = chr(s[pos]) - if '0' <= ch <= '7': - pos += 1 - x = (x<<3) + ord(ch) - ord('0') - p.append(chr(x)) - ## /* hex escapes */ - ## /* \xXX */ - elif ch == 'x': + ## /* \ - Escapes */ + escape_start = pos + pos += 1 + if pos >= size: + if not final: + pos = escape_start + break + errmessage = "\\ at end of string" + unicode_call_errorhandler( + errors, "unicodeescape", errmessage, s, pos - 1, size + ) + break + ch = chr(s[pos]) + pos += 1 + ## /* \x escapes */ + if ch == "\n": + pass + elif ch == "\\": + p += "\\" + elif ch == "'": + p += "'" + elif ch == '"': + p += '"' + elif ch == "b": + p += "\b" + elif ch == "f": + p += "\014" # /* FF */ + elif ch == "t": + p += "\t" + elif ch == "n": + p += "\n" + elif ch == "r": + p += "\r" + elif ch == "v": + p += "\013" # break; /* VT */ + elif ch == "a": + p += "\007" # break; /* BEL, not classic C */ + elif "0" <= ch <= "7": + x = ord(ch) - ord("0") + if pos < size: + ch = chr(s[pos]) + if "0" <= ch <= "7": + pos += 1 + x = (x << 3) + ord(ch) - ord("0") + if pos < size: + ch = chr(s[pos]) + if "0" <= ch <= "7": + pos += 1 + x = (x << 3) + ord(ch) - ord("0") + p.append(chr(x)) + ## /* hex escapes */ + ## /* \xXX */ + elif ch in ("x", "u", "U"): + if ch == "x": digits = 2 message = "truncated \\xXX escape" - x = hexescape(s, pos, digits, message, errors) - p += x[0] - pos = x[1] - - # /* \uXXXX */ - elif ch == 'u': + elif ch == "u": digits = 4 message = "truncated \\uXXXX escape" + else: + digits = 8 + message = "truncated \\UXXXXXXXX escape" + number_end = hex_number_end(s, pos, digits) + if number_end - pos != digits: + if not final: + pos = escape_start + break x = hexescape(s, pos, digits, message, errors) p += x[0] pos = x[1] - - # /* \UXXXXXXXX */ - elif ch == 'U': - digits = 8 - message = "truncated \\UXXXXXXXX escape" + else: x = hexescape(s, pos, digits, message, errors) p += x[0] pos = x[1] -## /* \N{name} */ - elif ch == 'N': - message = "malformed \\N character escape" - # pos += 1 - look = pos - try: - import unicodedata - except ImportError: - message = "\\N escapes not supported (can't load unicodedata module)" - unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-1, size) - if look < size and chr(s[look]) == '{': - #/* look for the closing brace */ - while (look < size and chr(s[look]) != '}'): - look += 1 - if (look > pos+1 and look < size and chr(s[look]) == '}'): - #/* found a name. look it up in the unicode database */ - message = "unknown Unicode character name" - st = s[pos+1:look] - try: - chr_codec = unicodedata.lookup("%s" % st) - except LookupError as e: - x = unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-1, look+1) - else: - x = chr_codec, look + 1 - p.append(x[0]) - pos = x[1] - else: - x = unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-1, look+1) - else: - x = unicode_call_errorhandler(errors, "unicodeescape", message, s, pos-1, look+1) + ## /* \N{name} */ + elif ch == "N": + message = "malformed \\N character escape" + look = pos + try: + import unicodedata + except ImportError: + message = "\\N escapes not supported (can't load unicodedata module)" + unicode_call_errorhandler( + errors, "unicodeescape", message, s, pos - 1, size + ) + continue + if look < size and chr(s[look]) == "{": + # /* look for the closing brace */ + while look < size and chr(s[look]) != "}": + look += 1 + if look > pos + 1 and look < size and chr(s[look]) == "}": + # /* found a name. look it up in the unicode database */ + message = "unknown Unicode character name" + st = s[pos + 1 : look] + try: + chr_codec = unicodedata.lookup("%s" % st) + except LookupError as e: + x = unicode_call_errorhandler( + errors, "unicodeescape", message, s, pos - 1, look + 1 + ) + else: + x = chr_codec, look + 1 + p.append(x[0]) + pos = x[1] + else: + if not final: + pos = escape_start + break + x = unicode_call_errorhandler( + errors, "unicodeescape", message, s, pos - 1, look + 1 + ) + p.append(x[0]) + pos = x[1] else: - if not found_invalid_escape: - found_invalid_escape = True - warnings.warn("invalid escape sequence '\\%c'" % ch, DeprecationWarning, 2) - p.append('\\') - p.append(ch) - return p + if not final: + pos = escape_start + break + x = unicode_call_errorhandler( + errors, "unicodeescape", message, s, pos - 1, look + 1 + ) + p.append(x[0]) + pos = x[1] + else: + if not found_invalid_escape: + found_invalid_escape = True + warnings.warn( + "invalid escape sequence '\\%c'" % ch, DeprecationWarning, 2 + ) + p.append("\\") + p.append(ch) + return p, pos + def PyUnicode_EncodeRawUnicodeEscape(s, size): - - if (size == 0): - return b'' + if size == 0: + return b"" p = bytearray() for ch in s: -# /* Map 32-bit characters to '\Uxxxxxxxx' */ - if (ord(ch) >= 0x10000): - p += b'\\U%08x' % ord(ch) - elif (ord(ch) >= 256) : -# /* Map 16-bit characters to '\uxxxx' */ - p += b'\\u%04x' % (ord(ch)) -# /* Copy everything else as-is */ + # /* Map 32-bit characters to '\Uxxxxxxxx' */ + if ord(ch) >= 0x10000: + p += b"\\U%08x" % ord(ch) + elif ord(ch) >= 256: + # /* Map 16-bit characters to '\uxxxx' */ + p += b"\\u%04x" % (ord(ch)) + # /* Copy everything else as-is */ else: p.append(ord(ch)) - - #p += '\0' + + # p += '\0' return p -def charmapencode_output(c, mapping): +def charmapencode_output(c, mapping): rep = mapping[c] - if isinstance(rep, int) or isinstance(rep, int): + if isinstance(rep, int): if rep < 256: - return rep + return [rep] else: raise TypeError("character mapping must be in range(256)") elif isinstance(rep, str): - return ord(rep) + return [ord(rep)] + elif isinstance(rep, bytes): + return rep elif rep == None: raise KeyError("character maps to ") else: raise TypeError("character mapping must return integer, None or str") -def PyUnicode_EncodeCharmap(p, size, mapping='latin-1', errors='strict'): -## /* the following variable is used for caching string comparisons -## * -1=not initialized, 0=unknown, 1=strict, 2=replace, -## * 3=ignore, 4=xmlcharrefreplace */ +def PyUnicode_EncodeCharmap(p, size, mapping="latin-1", errors="strict"): + ## /* the following variable is used for caching string comparisons + ## * -1=not initialized, 0=unknown, 1=strict, 2=replace, + ## * 3=ignore, 4=xmlcharrefreplace */ -# /* Default to Latin-1 */ - if mapping == 'latin-1': + # /* Default to Latin-1 */ + if mapping == "latin-1": return PyUnicode_EncodeLatin1(p, size, errors) - if (size == 0): - return b'' + if size == 0: + return b"" inpos = 0 res = [] - while (inpos", p, inpos, inpos+1, False) - try: - res += [charmapencode_output(ord(y), mapping) for y in x[0]] - except KeyError: - raise UnicodeEncodeError("charmap", p, inpos, inpos+1, - "character maps to ") + x = unicode_call_errorhandler( + errors, + "charmap", + "character maps to ", + p, + inpos, + inpos + 1, + False, + ) + replacement = x[0] + if isinstance(replacement, bytes): + res += list(replacement) + else: + try: + for y in replacement: + res += charmapencode_output(ord(y), mapping) + except KeyError: + raise UnicodeEncodeError( + "charmap", p, inpos, inpos + 1, "character maps to " + ) inpos += 1 return res -def PyUnicode_DecodeCharmap(s, size, mapping, errors): -## /* Default to Latin-1 */ - if (mapping == None): +def PyUnicode_DecodeCharmap(s, size, mapping, errors): + ## /* Default to Latin-1 */ + if mapping == None: return PyUnicode_DecodeLatin1(s, size, errors) - if (size == 0): - return '' + if size == 0: + return "" p = [] inpos = 0 - while (inpos< len(s)): - - #/* Get mapping (char ordinal -> integer, Unicode char or None) */ + while inpos < len(s): + # /* Get mapping (char ordinal -> integer, Unicode char or None) */ ch = s[inpos] try: x = mapping[ch] if isinstance(x, int): - if x < 65536: + if x == 0xFFFE: + raise KeyError + if 0 <= x <= 0x10FFFF: p += chr(x) else: - raise TypeError("character mapping must be in range(65536)") + raise TypeError( + "character mapping must be in range(0x%x)" % (0x110000,) + ) elif isinstance(x, str): + if len(x) == 1 and x == "\ufffe": + raise KeyError p += x - elif not x: + elif x is None: raise KeyError else: raise TypeError - except KeyError: - x = unicode_call_errorhandler(errors, "charmap", - "character maps to ", s, inpos, inpos+1) + except (KeyError, IndexError): + x = unicode_call_errorhandler( + errors, "charmap", "character maps to ", s, inpos, inpos + 1 + ) p += x[0] inpos += 1 return p -def PyUnicode_DecodeRawUnicodeEscape(s, size, errors, final): - if (size == 0): - return '' +def PyUnicode_DecodeRawUnicodeEscape(s, size, errors, final): + if size == 0: + return "", 0 if isinstance(s, str): s = s.encode() pos = 0 p = [] - while (pos < len(s)): - ch = chr(s[pos]) - #/* Non-escape characters are interpreted as Unicode ordinals */ - if (ch != '\\'): - p.append(ch) + while pos < len(s): + # /* Non-escape characters are interpreted as Unicode ordinals */ + if s[pos] != ord("\\"): + p.append(chr(s[pos])) pos += 1 - continue + continue startinpos = pos -## /* \u-escapes are only interpreted iff the number of leading -## backslashes is odd */ + p_len_before = len(p) + ## /* \u-escapes are only interpreted iff the number of leading + ## backslashes is odd */ bs = pos while pos < size: - if (s[pos] != ord('\\')): + if s[pos] != ord("\\"): break p.append(chr(s[pos])) pos += 1 - - if (pos >= size): + + if pos >= size: + if not final: + del p[p_len_before:] + pos = startinpos break - if (((pos - bs) & 1) == 0 or - (s[pos] != ord('u') and s[pos] != ord('U'))) : + if ((pos - bs) & 1) == 0 or (s[pos] != ord("u") and s[pos] != ord("U")): p.append(chr(s[pos])) pos += 1 continue - + p.pop(-1) - if s[pos] == ord('u'): - count = 4 - else: - count = 8 + count = 4 if s[pos] == ord("u") else 8 pos += 1 - #/* \uXXXX with 4 hex digits, \Uxxxxxxxx with 8 */ + # /* \uXXXX with 4 hex digits, \Uxxxxxxxx with 8 */ number_end = hex_number_end(s, pos, count) if number_end - pos != count: + if not final: + del p[p_len_before:] + pos = startinpos + break res = unicode_call_errorhandler( - errors, "rawunicodeescape", "truncated \\uXXXX", - s, pos-2, number_end) + errors, "rawunicodeescape", "truncated \\uXXXX", s, pos - 2, number_end + ) p.append(res[0]) pos = res[1] else: - x = int(s[pos:pos+count], 16) - #ifndef Py_UNICODE_WIDE - if sys.maxunicode > 0xffff: - if (x > sys.maxunicode): - res = unicode_call_errorhandler( - errors, "rawunicodeescape", "\\Uxxxxxxxx out of range", - s, pos-2, pos+count) - pos = res[1] - p.append(res[0]) - else: - p.append(chr(x)) - pos += count + x = int(s[pos : pos + count], 16) + if x > sys.maxunicode: + res = unicode_call_errorhandler( + errors, + "rawunicodeescape", + "\\Uxxxxxxxx out of range", + s, + pos - 2, + pos + count, + ) + pos = res[1] + p.append(res[0]) else: - if (x > 0x10000): - res = unicode_call_errorhandler( - errors, "rawunicodeescape", "\\Uxxxxxxxx out of range", - s, pos-2, pos+count) - pos = res[1] - p.append(res[0]) - - #endif - else: - p.append(chr(x)) - pos += count + p.append(chr(x)) + pos += count - return p + return p, pos diff --git a/Lib/_pydatetime.py b/Lib/_pydatetime.py index cd0ea900bfb..70251dbb653 100644 --- a/Lib/_pydatetime.py +++ b/Lib/_pydatetime.py @@ -1,12 +1,10 @@ -"""Concrete date/time and related types. - -See http://www.iana.org/time-zones/repository/tz-link.html for -time zone and DST data sources. -""" +"""Pure Python implementation of the datetime module.""" __all__ = ("date", "datetime", "time", "timedelta", "timezone", "tzinfo", "MINYEAR", "MAXYEAR", "UTC") +__name__ = "datetime" + import time as _time import math as _math @@ -18,10 +16,10 @@ def _cmp(x, y): def _get_class_module(self): module_name = self.__class__.__module__ - if module_name == '_pydatetime': - return 'datetime' + if module_name == 'datetime': + return 'datetime.' else: - return module_name + return '' MINYEAR = 1 MAXYEAR = 9999 @@ -64,14 +62,14 @@ def _days_in_month(year, month): def _days_before_month(year, month): "year, month -> number of days in year preceding first day of month." - assert 1 <= month <= 12, 'month must be in 1..12' + assert 1 <= month <= 12, f"month must be in 1..12, not {month}" return _DAYS_BEFORE_MONTH[month] + (month > 2 and _is_leap(year)) def _ymd2ord(year, month, day): "year, month, day -> ordinal, considering 01-Jan-0001 as day 1." - assert 1 <= month <= 12, 'month must be in 1..12' + assert 1 <= month <= 12, f"month must be in 1..12, not {month}" dim = _days_in_month(year, month) - assert 1 <= day <= dim, ('day must be in 1..%d' % dim) + assert 1 <= day <= dim, f"day must be in 1..{dim}, not {day}" return (_days_before_year(year) + _days_before_month(year, month) + day) @@ -204,6 +202,17 @@ def _format_offset(off, sep=':'): s += '.%06d' % ss.microseconds return s +_normalize_century = None +def _need_normalize_century(): + global _normalize_century + if _normalize_century is None: + try: + _normalize_century = ( + _time.strftime("%Y", (99, 1, 1, 0, 0, 0, 0, 1, 0)) != "0099") + except ValueError: + _normalize_century = True + return _normalize_century + # Correctly substitute for %z and %Z escapes in strftime formats. def _wrap_strftime(object, format, timetuple): # Don't call utcoffset() or tzname() unless actually needed. @@ -261,6 +270,20 @@ def _wrap_strftime(object, format, timetuple): # strftime is going to have at this: escape % Zreplace = s.replace('%', '%%') newformat.append(Zreplace) + # Note that datetime(1000, 1, 1).strftime('%G') == '1000' so + # year 1000 for %G can go on the fast path. + elif ((ch in 'YG' or ch in 'FC') and + object.year < 1000 and _need_normalize_century()): + if ch == 'G': + year = int(_time.strftime("%G", timetuple)) + else: + year = object.year + if ch == 'C': + push('{:02}'.format(year // 100)) + else: + push('{:04}'.format(year)) + if ch == 'F': + push('-{:02}-{:02}'.format(*timetuple[1:3])) else: push('%') push(ch) @@ -399,9 +422,11 @@ def _parse_hh_mm_ss_ff(tstr): if pos < len_str: if tstr[pos] not in '.,': - raise ValueError("Invalid microsecond component") + raise ValueError("Invalid microsecond separator") else: pos += 1 + if not all(map(_is_ascii_digit, tstr[pos:])): + raise ValueError("Non-digit values in fraction") len_remainder = len_str - pos @@ -413,9 +438,6 @@ def _parse_hh_mm_ss_ff(tstr): time_comps[3] = int(tstr[pos:(pos+to_parse)]) if to_parse < 6: time_comps[3] *= _FRACTION_CORRECTION[to_parse-1] - if (len_remainder > to_parse - and not all(map(_is_ascii_digit, tstr[(pos+to_parse):]))): - raise ValueError("Non-digit values in unparsed fraction") return time_comps @@ -431,6 +453,17 @@ def _parse_isoformat_time(tstr): time_comps = _parse_hh_mm_ss_ff(timestr) + hour, minute, second, microsecond = time_comps + became_next_day = False + error_from_components = False + if (hour == 24): + if all(time_comp == 0 for time_comp in time_comps[1:]): + hour = 0 + time_comps[0] = hour + became_next_day = True + else: + error_from_components = True + tzi = None if tz_pos == len_str and tstr[-1] == 'Z': tzi = timezone.utc @@ -446,7 +479,7 @@ def _parse_isoformat_time(tstr): # HH:MM:SS len: 8 # HH:MM:SS.f+ len: 10+ - if len(tzstr) in (0, 1, 3): + if len(tzstr) in (0, 1, 3) or tstr[tz_pos-1] == 'Z': raise ValueError("Malformed time zone string") tz_comps = _parse_hh_mm_ss_ff(tzstr) @@ -463,13 +496,13 @@ def _parse_isoformat_time(tstr): time_comps.append(tzi) - return time_comps + return time_comps, became_next_day, error_from_components # tuple[int, int, int] -> tuple[int, int, int] version of date.fromisocalendar def _isoweek_to_gregorian(year, week, day): # Year is bounded this way because 9999-12-31 is (9999, 52, 5) if not MINYEAR <= year <= MAXYEAR: - raise ValueError(f"Year is out of range: {year}") + raise ValueError(f"year must be in {MINYEAR}..{MAXYEAR}, not {year}") if not 0 < week < 53: out_of_range = True @@ -502,7 +535,7 @@ def _isoweek_to_gregorian(year, week, day): def _check_tzname(name): if name is not None and not isinstance(name, str): raise TypeError("tzinfo.tzname() must return None or string, " - "not '%s'" % type(name)) + f"not {type(name).__name__!r}") # name is the offset-producing method, "utcoffset" or "dst". # offset is what it returned. @@ -515,24 +548,24 @@ def _check_utc_offset(name, offset): if offset is None: return if not isinstance(offset, timedelta): - raise TypeError("tzinfo.%s() must return None " - "or timedelta, not '%s'" % (name, type(offset))) + raise TypeError(f"tzinfo.{name}() must return None " + f"or timedelta, not {type(offset).__name__!r}") if not -timedelta(1) < offset < timedelta(1): - raise ValueError("%s()=%s, must be strictly between " - "-timedelta(hours=24) and timedelta(hours=24)" % - (name, offset)) + raise ValueError("offset must be a timedelta " + "strictly between -timedelta(hours=24) and " + f"timedelta(hours=24), not {offset!r}") def _check_date_fields(year, month, day): year = _index(year) month = _index(month) day = _index(day) if not MINYEAR <= year <= MAXYEAR: - raise ValueError('year must be in %d..%d' % (MINYEAR, MAXYEAR), year) + raise ValueError(f"year must be in {MINYEAR}..{MAXYEAR}, not {year}") if not 1 <= month <= 12: - raise ValueError('month must be in 1..12', month) + raise ValueError(f"month must be in 1..12, not {month}") dim = _days_in_month(year, month) if not 1 <= day <= dim: - raise ValueError('day must be in 1..%d' % dim, day) + raise ValueError(f"day {day} must be in range 1..{dim} for month {month} in year {year}") return year, month, day def _check_time_fields(hour, minute, second, microsecond, fold): @@ -541,24 +574,23 @@ def _check_time_fields(hour, minute, second, microsecond, fold): second = _index(second) microsecond = _index(microsecond) if not 0 <= hour <= 23: - raise ValueError('hour must be in 0..23', hour) + raise ValueError(f"hour must be in 0..23, not {hour}") if not 0 <= minute <= 59: - raise ValueError('minute must be in 0..59', minute) + raise ValueError(f"minute must be in 0..59, not {minute}") if not 0 <= second <= 59: - raise ValueError('second must be in 0..59', second) + raise ValueError(f"second must be in 0..59, not {second}") if not 0 <= microsecond <= 999999: - raise ValueError('microsecond must be in 0..999999', microsecond) + raise ValueError(f"microsecond must be in 0..999999, not {microsecond}") if fold not in (0, 1): - raise ValueError('fold must be either 0 or 1', fold) + raise ValueError(f"fold must be either 0 or 1, not {fold}") return hour, minute, second, microsecond, fold def _check_tzinfo_arg(tz): if tz is not None and not isinstance(tz, tzinfo): - raise TypeError("tzinfo argument must be None or of a tzinfo subclass") - -def _cmperror(x, y): - raise TypeError("can't compare '%s' to '%s'" % ( - type(x).__name__, type(y).__name__)) + raise TypeError( + "tzinfo argument must be None or of a tzinfo subclass, " + f"not {type(tz).__name__!r}" + ) def _divide_and_round(a, b): """divide a by b and round result to the nearest integer @@ -612,7 +644,19 @@ def __new__(cls, days=0, seconds=0, microseconds=0, # guide the C implementation; it's way more convoluted than speed- # ignoring auto-overflow-to-long idiomatic Python could be. - # XXX Check that all inputs are ints or floats. + for name, value in ( + ("days", days), + ("seconds", seconds), + ("microseconds", microseconds), + ("milliseconds", milliseconds), + ("minutes", minutes), + ("hours", hours), + ("weeks", weeks) + ): + if not isinstance(value, (int, float)): + raise TypeError( + f"unsupported type for timedelta {name} component: {type(value).__name__}" + ) # Final values, all integer. # s and us fit in 32-bit signed ints; d isn't bounded. @@ -713,9 +757,9 @@ def __repr__(self): args.append("microseconds=%d" % self._microseconds) if not args: args.append('0') - return "%s.%s(%s)" % (_get_class_module(self), - self.__class__.__qualname__, - ', '.join(args)) + return "%s%s(%s)" % (_get_class_module(self), + self.__class__.__qualname__, + ', '.join(args)) def __str__(self): mm, ss = divmod(self._seconds, 60) @@ -912,6 +956,7 @@ class date: fromtimestamp() today() fromordinal() + strptime() Operators: @@ -970,6 +1015,8 @@ def __new__(cls, year, month=None, day=None): @classmethod def fromtimestamp(cls, t): "Construct a date from a POSIX timestamp (like time.time())." + if t is None: + raise TypeError("'NoneType' object cannot be interpreted as an integer") y, m, d, hh, mm, ss, weekday, jday, dst = _time.localtime(t) return cls(y, m, d) @@ -992,8 +1039,12 @@ def fromordinal(cls, n): @classmethod def fromisoformat(cls, date_string): """Construct a date from a string in ISO 8601 format.""" + if not isinstance(date_string, str): - raise TypeError('fromisoformat: argument must be str') + raise TypeError('Argument must be a str') + + if not date_string.isascii(): + raise ValueError('Argument must be an ASCII str') if len(date_string) not in (7, 8, 10): raise ValueError(f'Invalid isoformat string: {date_string!r}') @@ -1010,6 +1061,12 @@ def fromisocalendar(cls, year, week, day): This is the inverse of the date.isocalendar() function""" return cls(*_isoweek_to_gregorian(year, week, day)) + @classmethod + def strptime(cls, date_string, format): + """Parse a date string according to the given format (like time.strptime()).""" + import _strptime + return _strptime._strptime_datetime_date(cls, date_string, format) + # Conversions to string def __repr__(self): @@ -1019,11 +1076,11 @@ def __repr__(self): >>> repr(d) 'datetime.date(2010, 1, 1)' """ - return "%s.%s(%d, %d, %d)" % (_get_class_module(self), - self.__class__.__qualname__, - self._year, - self._month, - self._day) + return "%s%s(%d, %d, %d)" % (_get_class_module(self), + self.__class__.__qualname__, + self._year, + self._month, + self._day) # XXX These shouldn't depend on time.localtime(), because that # clips the usable dates to [1970 .. 2038). At least ctime() is # easily done without using strftime() -- that's better too because @@ -1059,8 +1116,8 @@ def isoformat(self): This is 'YYYY-MM-DD'. References: - - http://www.w3.org/TR/NOTE-datetime - - http://www.cl.cam.ac.uk/~mgk25/iso-time.html + - https://www.w3.org/TR/NOTE-datetime + - https://www.cl.cam.ac.uk/~mgk25/iso-time.html """ return "%04d-%02d-%02d" % (self._year, self._month, self._day) @@ -1108,35 +1165,38 @@ def replace(self, year=None, month=None, day=None): day = self._day return type(self)(year, month, day) + __replace__ = replace + # Comparisons of date objects with other. def __eq__(self, other): - if isinstance(other, date): + if isinstance(other, date) and not isinstance(other, datetime): return self._cmp(other) == 0 return NotImplemented def __le__(self, other): - if isinstance(other, date): + if isinstance(other, date) and not isinstance(other, datetime): return self._cmp(other) <= 0 return NotImplemented def __lt__(self, other): - if isinstance(other, date): + if isinstance(other, date) and not isinstance(other, datetime): return self._cmp(other) < 0 return NotImplemented def __ge__(self, other): - if isinstance(other, date): + if isinstance(other, date) and not isinstance(other, datetime): return self._cmp(other) >= 0 return NotImplemented def __gt__(self, other): - if isinstance(other, date): + if isinstance(other, date) and not isinstance(other, datetime): return self._cmp(other) > 0 return NotImplemented def _cmp(self, other): assert isinstance(other, date) + assert not isinstance(other, datetime) y, m, d = self._year, self._month, self._day y2, m2, d2 = other._year, other._month, other._day return _cmp((y, m, d), (y2, m2, d2)) @@ -1191,7 +1251,7 @@ def isocalendar(self): The first week is 1; Monday is 1 ... Sunday is 7. ISO calendar algorithm taken from - http://www.phys.uu.nl/~vgent/calendar/isocalendar.htm + https://www.phys.uu.nl/~vgent/calendar/isocalendar.htm (used with permission) """ year = self._year @@ -1327,6 +1387,7 @@ class time: Constructors: __new__() + strptime() Operators: @@ -1385,6 +1446,12 @@ def __new__(cls, hour=0, minute=0, second=0, microsecond=0, tzinfo=None, *, fold self._fold = fold return self + @classmethod + def strptime(cls, date_string, format): + """string, format -> new time parsed from a string (like time.strptime()).""" + import _strptime + return _strptime._strptime_datetime_time(cls, date_string, format) + # Read-only field accessors @property def hour(self): @@ -1513,7 +1580,7 @@ def __repr__(self): s = ", %d" % self._second else: s = "" - s= "%s.%s(%d, %d%s)" % (_get_class_module(self), + s = "%s%s(%d, %d%s)" % (_get_class_module(self), self.__class__.__qualname__, self._hour, self._minute, s) if self._tzinfo is not None: @@ -1555,7 +1622,7 @@ def fromisoformat(cls, time_string): time_string = time_string.removeprefix('T') try: - return cls(*_parse_isoformat_time(time_string)) + return cls(*_parse_isoformat_time(time_string)[0]) except Exception: raise ValueError(f'Invalid isoformat string: {time_string!r}') @@ -1633,6 +1700,8 @@ def replace(self, hour=None, minute=None, second=None, microsecond=None, fold = self._fold return type(self)(hour, minute, second, microsecond, tzinfo, fold=fold) + __replace__ = replace + # Pickle support. def _getstate(self, protocol=3): @@ -1680,7 +1749,7 @@ class datetime(date): The year, month and day arguments are required. tzinfo may be None, or an instance of a tzinfo subclass. The remaining arguments may be ints. """ - __slots__ = date.__slots__ + time.__slots__ + __slots__ = time.__slots__ def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0, microsecond=0, tzinfo=None, *, fold=0): @@ -1867,10 +1936,27 @@ def fromisoformat(cls, date_string): if tstr: try: - time_components = _parse_isoformat_time(tstr) + time_components, became_next_day, error_from_components = _parse_isoformat_time(tstr) except ValueError: raise ValueError( f'Invalid isoformat string: {date_string!r}') from None + else: + if error_from_components: + raise ValueError("minute, second, and microsecond must be 0 when hour is 24") + + if became_next_day: + year, month, day = date_components + # Only wrap day/month when it was previously valid + if month <= 12 and day <= (days_in_month := _days_in_month(year, month)): + # Calculate midnight of the next day + day += 1 + if day > days_in_month: + day = 1 + month += 1 + if month > 12: + month = 1 + year += 1 + date_components = [year, month, day] else: time_components = [0, 0, 0, 0, None] @@ -1979,6 +2065,8 @@ def replace(self, year=None, month=None, day=None, hour=None, return type(self)(year, month, day, hour, minute, second, microsecond, tzinfo, fold=fold) + __replace__ = replace + def _local_timezone(self): if self.tzinfo is None: ts = self._mktime() @@ -2040,7 +2128,7 @@ def isoformat(self, sep='T', timespec='auto'): By default, the fractional part is omitted if self.microsecond == 0. If self.tzinfo is not None, the UTC offset is also attached, giving - giving a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM'. + a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM'. Optional argument sep specifies the separator between date and time, default 'T'. @@ -2068,9 +2156,9 @@ def __repr__(self): del L[-1] if L[-1] == 0: del L[-1] - s = "%s.%s(%s)" % (_get_class_module(self), - self.__class__.__qualname__, - ", ".join(map(str, L))) + s = "%s%s(%s)" % (_get_class_module(self), + self.__class__.__qualname__, + ", ".join(map(str, L))) if self._tzinfo is not None: assert s[-1:] == ")" s = s[:-1] + ", tzinfo=%r" % self._tzinfo + ")" @@ -2087,7 +2175,7 @@ def __str__(self): def strptime(cls, date_string, format): 'string, format -> new datetime parsed from a string (like time.strptime()).' import _strptime - return _strptime._strptime_datetime(cls, date_string, format) + return _strptime._strptime_datetime_datetime(cls, date_string, format) def utcoffset(self): """Return the timezone offset as timedelta positive east of UTC (negative west of @@ -2131,42 +2219,32 @@ def dst(self): def __eq__(self, other): if isinstance(other, datetime): return self._cmp(other, allow_mixed=True) == 0 - elif not isinstance(other, date): - return NotImplemented else: - return False + return NotImplemented def __le__(self, other): if isinstance(other, datetime): return self._cmp(other) <= 0 - elif not isinstance(other, date): - return NotImplemented else: - _cmperror(self, other) + return NotImplemented def __lt__(self, other): if isinstance(other, datetime): return self._cmp(other) < 0 - elif not isinstance(other, date): - return NotImplemented else: - _cmperror(self, other) + return NotImplemented def __ge__(self, other): if isinstance(other, datetime): return self._cmp(other) >= 0 - elif not isinstance(other, date): - return NotImplemented else: - _cmperror(self, other) + return NotImplemented def __gt__(self, other): if isinstance(other, datetime): return self._cmp(other) > 0 - elif not isinstance(other, date): - return NotImplemented else: - _cmperror(self, other) + return NotImplemented def _cmp(self, other, allow_mixed=False): assert isinstance(other, datetime) @@ -2311,7 +2389,6 @@ def __reduce__(self): def _isoweek1monday(year): # Helper to calculate the day number of the Monday starting week 1 - # XXX This could be done more efficiently THURSDAY = 3 firstday = _ymd2ord(year, 1, 1) firstweekday = (firstday + 6) % 7 # See weekday() above @@ -2338,9 +2415,12 @@ def __new__(cls, offset, name=_Omitted): if not cls._minoffset <= offset <= cls._maxoffset: raise ValueError("offset must be a timedelta " "strictly between -timedelta(hours=24) and " - "timedelta(hours=24).") + f"timedelta(hours=24), not {offset!r}") return cls._create(offset, name) + def __init_subclass__(cls): + raise TypeError("type 'datetime.timezone' is not an acceptable base type") + @classmethod def _create(cls, offset, name=None): self = tzinfo.__new__(cls) @@ -2375,12 +2455,12 @@ def __repr__(self): if self is self.utc: return 'datetime.timezone.utc' if self._name is None: - return "%s.%s(%r)" % (_get_class_module(self), - self.__class__.__qualname__, - self._offset) - return "%s.%s(%r, %r)" % (_get_class_module(self), - self.__class__.__qualname__, - self._offset, self._name) + return "%s%s(%r)" % (_get_class_module(self), + self.__class__.__qualname__, + self._offset) + return "%s%s(%r, %r)" % (_get_class_module(self), + self.__class__.__qualname__, + self._offset, self._name) def __str__(self): return self.tzname(None) diff --git a/Lib/_pydecimal.py b/Lib/_pydecimal.py index a0b608380ab..2277578df54 100644 --- a/Lib/_pydecimal.py +++ b/Lib/_pydecimal.py @@ -13,104 +13,7 @@ # bug) and will be backported. At this point the spec is stabilizing # and the updates are becoming fewer, smaller, and less significant. -""" -This is an implementation of decimal floating point arithmetic based on -the General Decimal Arithmetic Specification: - - http://speleotrove.com/decimal/decarith.html - -and IEEE standard 854-1987: - - http://en.wikipedia.org/wiki/IEEE_854-1987 - -Decimal floating point has finite precision with arbitrarily large bounds. - -The purpose of this module is to support arithmetic using familiar -"schoolhouse" rules and to avoid some of the tricky representation -issues associated with binary floating point. The package is especially -useful for financial applications or for contexts where users have -expectations that are at odds with binary floating point (for instance, -in binary floating point, 1.00 % 0.1 gives 0.09999999999999995 instead -of 0.0; Decimal('1.00') % Decimal('0.1') returns the expected -Decimal('0.00')). - -Here are some examples of using the decimal module: - ->>> from decimal import * ->>> setcontext(ExtendedContext) ->>> Decimal(0) -Decimal('0') ->>> Decimal('1') -Decimal('1') ->>> Decimal('-.0123') -Decimal('-0.0123') ->>> Decimal(123456) -Decimal('123456') ->>> Decimal('123.45e12345678') -Decimal('1.2345E+12345680') ->>> Decimal('1.33') + Decimal('1.27') -Decimal('2.60') ->>> Decimal('12.34') + Decimal('3.87') - Decimal('18.41') -Decimal('-2.20') ->>> dig = Decimal(1) ->>> print(dig / Decimal(3)) -0.333333333 ->>> getcontext().prec = 18 ->>> print(dig / Decimal(3)) -0.333333333333333333 ->>> print(dig.sqrt()) -1 ->>> print(Decimal(3).sqrt()) -1.73205080756887729 ->>> print(Decimal(3) ** 123) -4.85192780976896427E+58 ->>> inf = Decimal(1) / Decimal(0) ->>> print(inf) -Infinity ->>> neginf = Decimal(-1) / Decimal(0) ->>> print(neginf) --Infinity ->>> print(neginf + inf) -NaN ->>> print(neginf * inf) --Infinity ->>> print(dig / 0) -Infinity ->>> getcontext().traps[DivisionByZero] = 1 ->>> print(dig / 0) -Traceback (most recent call last): - ... - ... - ... -decimal.DivisionByZero: x / 0 ->>> c = Context() ->>> c.traps[InvalidOperation] = 0 ->>> print(c.flags[InvalidOperation]) -0 ->>> c.divide(Decimal(0), Decimal(0)) -Decimal('NaN') ->>> c.traps[InvalidOperation] = 1 ->>> print(c.flags[InvalidOperation]) -1 ->>> c.flags[InvalidOperation] = 0 ->>> print(c.flags[InvalidOperation]) -0 ->>> print(c.divide(Decimal(0), Decimal(0))) -Traceback (most recent call last): - ... - ... - ... -decimal.InvalidOperation: 0 / 0 ->>> print(c.flags[InvalidOperation]) -1 ->>> c.flags[InvalidOperation] = 0 ->>> c.traps[InvalidOperation] = 0 ->>> print(c.divide(Decimal(0), Decimal(0))) -NaN ->>> print(c.flags[InvalidOperation]) -1 ->>> -""" +"""Python decimal arithmetic module""" __all__ = [ # Two major classes @@ -135,13 +38,16 @@ 'ROUND_FLOOR', 'ROUND_UP', 'ROUND_HALF_DOWN', 'ROUND_05UP', # Functions for manipulating contexts - 'setcontext', 'getcontext', 'localcontext', + 'setcontext', 'getcontext', 'localcontext', 'IEEEContext', # Limits for the C version for compatibility - 'MAX_PREC', 'MAX_EMAX', 'MIN_EMIN', 'MIN_ETINY', + 'MAX_PREC', 'MAX_EMAX', 'MIN_EMIN', 'MIN_ETINY', 'IEEE_CONTEXT_MAX_BITS', - # C version: compile time choice that enables the thread local context - 'HAVE_THREADS' + # C version: compile time choice that enables the thread local context (deprecated, now always true) + 'HAVE_THREADS', + + # C version: compile time choice that enables the coroutine local context + 'HAVE_CONTEXTVAR' ] __xname__ = __name__ # sys.modules lookup (--without-threads) @@ -156,7 +62,7 @@ try: from collections import namedtuple as _namedtuple - DecimalTuple = _namedtuple('DecimalTuple', 'sign digits exponent') + DecimalTuple = _namedtuple('DecimalTuple', 'sign digits exponent', module='decimal') except ImportError: DecimalTuple = lambda *args: args @@ -172,14 +78,17 @@ # Compatibility with the C version HAVE_THREADS = True +HAVE_CONTEXTVAR = True if sys.maxsize == 2**63-1: MAX_PREC = 999999999999999999 MAX_EMAX = 999999999999999999 MIN_EMIN = -999999999999999999 + IEEE_CONTEXT_MAX_BITS = 512 else: MAX_PREC = 425000000 MAX_EMAX = 425000000 MIN_EMIN = -425000000 + IEEE_CONTEXT_MAX_BITS = 256 MIN_ETINY = MIN_EMIN - (MAX_PREC-1) @@ -190,13 +99,13 @@ class DecimalException(ArithmeticError): Used exceptions derive from this. If an exception derives from another exception besides this (such as - Underflow (Inexact, Rounded, Subnormal) that indicates that it is only + Underflow (Inexact, Rounded, Subnormal)) that indicates that it is only called if the others are present. This isn't actually used for anything, though. handle -- Called when context._raise_error is called and the - trap_enabler is not set. First argument is self, second is the - context. More arguments can be given, those being after + trap_enabler is not set. First argument is self, second is + the context. More arguments can be given, those being after the explanation in _raise_error (For example, context._raise_error(NewError, '(-x)!', self._sign) would call NewError().handle(context, self._sign).) @@ -238,7 +147,7 @@ class InvalidOperation(DecimalException): x ** (+-)INF An operand is invalid - The result of the operation after these is a quiet positive NaN, + The result of the operation after this is a quiet positive NaN, except when the cause is a signaling NaN, in which case the result is also a quiet NaN, but with the original sign, and an optional diagnostic information. @@ -313,11 +222,12 @@ class InvalidContext(InvalidOperation): """Invalid context. Unknown rounding, for example. This occurs and signals invalid-operation if an invalid context was - detected during an operation. This can occur if contexts are not checked - on creation and either the precision exceeds the capability of the - underlying concrete representation or an unknown or unsupported rounding - was specified. These aspects of the context need only be checked when - the values are required to be used. The result is [0,qNaN]. + detected during an operation. This can occur if contexts are not + checked on creation and either the precision exceeds the capability of + the underlying concrete representation or an unknown or unsupported + rounding was specified. These aspects of the context need only be + checked when the values are required to be used. The result is + [0,qNaN]. """ def handle(self, context, *args): @@ -410,8 +320,9 @@ class FloatOperation(DecimalException, TypeError): Decimal.from_float() or context.create_decimal_from_float() do not set the flag. - Otherwise (the signal is trapped), only equality comparisons and explicit - conversions are silent. All other mixed operations raise FloatOperation. + Otherwise (the signal is trapped), only equality comparisons and + explicit conversions are silent. All other mixed operations raise + FloatOperation. """ # List of public traps and flags @@ -431,82 +342,40 @@ class FloatOperation(DecimalException, TypeError): ##### Context Functions ################################################## # The getcontext() and setcontext() function manage access to a thread-local -# current context. Py2.4 offers direct support for thread locals. If that -# is not available, use threading.current_thread() which is slower but will -# work for older Pythons. If threads are not part of the build, create a -# mock threading object with threading.local() returning the module namespace. - -try: - import threading -except ImportError: - # Python was compiled without threads; create a mock object instead - class MockThreading(object): - def local(self, sys=sys): - return sys.modules[__xname__] - threading = MockThreading() - del MockThreading - -try: - threading.local +# current context. -except AttributeError: +import contextvars - # To fix reloading, force it to create a new context - # Old contexts have different exceptions in their dicts, making problems. - if hasattr(threading.current_thread(), '__decimal_context__'): - del threading.current_thread().__decimal_context__ +_current_context_var = contextvars.ContextVar('decimal_context') - def setcontext(context): - """Set this thread's context to context.""" - if context in (DefaultContext, BasicContext, ExtendedContext): - context = context.copy() - context.clear_flags() - threading.current_thread().__decimal_context__ = context - - def getcontext(): - """Returns this thread's context. - - If this thread does not yet have a context, returns - a new context and sets this thread's context. - New contexts are copies of DefaultContext. - """ - try: - return threading.current_thread().__decimal_context__ - except AttributeError: - context = Context() - threading.current_thread().__decimal_context__ = context - return context +_context_attributes = frozenset( + ['prec', 'Emin', 'Emax', 'capitals', 'clamp', 'rounding', 'flags', 'traps'] +) -else: +def getcontext(): + """Returns this thread's context. - local = threading.local() - if hasattr(local, '__decimal_context__'): - del local.__decimal_context__ + If this thread does not yet have a context, returns + a new context and sets this thread's context. + New contexts are copies of DefaultContext. + """ + try: + return _current_context_var.get() + except LookupError: + context = Context() + _current_context_var.set(context) + return context + +def setcontext(context): + """Set this thread's context to context.""" + if context in (DefaultContext, BasicContext, ExtendedContext): + context = context.copy() + context.clear_flags() + _current_context_var.set(context) - def getcontext(_local=local): - """Returns this thread's context. +del contextvars # Don't contaminate the namespace - If this thread does not yet have a context, returns - a new context and sets this thread's context. - New contexts are copies of DefaultContext. - """ - try: - return _local.__decimal_context__ - except AttributeError: - context = Context() - _local.__decimal_context__ = context - return context - - def setcontext(context, _local=local): - """Set this thread's context to context.""" - if context in (DefaultContext, BasicContext, ExtendedContext): - context = context.copy() - context.clear_flags() - _local.__decimal_context__ = context - - del threading, local # Don't contaminate the namespace - -def localcontext(ctx=None): +def localcontext(ctx=None, **kwargs): """Return a context manager for a copy of the supplied context Uses a copy of the current context if no context is specified @@ -542,8 +411,35 @@ def sin(x): >>> print(getcontext().prec) 28 """ - if ctx is None: ctx = getcontext() - return _ContextManager(ctx) + if ctx is None: + ctx = getcontext() + ctx_manager = _ContextManager(ctx) + for key, value in kwargs.items(): + if key not in _context_attributes: + raise TypeError(f"'{key}' is an invalid keyword argument for this function") + setattr(ctx_manager.new_context, key, value) + return ctx_manager + + +def IEEEContext(bits, /): + """ + Return a context object initialized to the proper values for one of the + IEEE interchange formats. The argument must be a multiple of 32 and less + than IEEE_CONTEXT_MAX_BITS. + """ + if bits <= 0 or bits > IEEE_CONTEXT_MAX_BITS or bits % 32: + raise ValueError("argument must be a multiple of 32, " + f"with a maximum of {IEEE_CONTEXT_MAX_BITS}") + + ctx = Context() + ctx.prec = 9 * (bits//32) - 2 + ctx.Emax = 3 * (1 << (bits//16 + 3)) + ctx.Emin = 1 - ctx.Emax + ctx.rounding = ROUND_HALF_EVEN + ctx.clamp = 1 + ctx.traps = dict.fromkeys(_signals, False) + + return ctx ##### Decimal class ####################################################### @@ -553,7 +449,7 @@ def sin(x): # numbers.py for more detail. class Decimal(object): - """Floating point class for decimal arithmetic.""" + """Floating-point class for decimal arithmetic.""" __slots__ = ('_exp','_int','_sign', '_is_special') # Generally, the value of the Decimal instance is given by @@ -711,6 +607,21 @@ def __new__(cls, value="0", context=None): raise TypeError("Cannot convert %r to Decimal" % value) + @classmethod + def from_number(cls, number): + """Converts a real number to a decimal number, exactly. + + >>> Decimal.from_number(314) # int + Decimal('314') + >>> Decimal.from_number(0.1) # float + Decimal('0.1000000000000000055511151231257827021181583404541015625') + >>> Decimal.from_number(Decimal('3.14')) # another decimal instance + Decimal('3.14') + """ + if isinstance(number, (int, Decimal, float)): + return cls(number) + raise TypeError("Cannot convert %r to Decimal" % number) + @classmethod def from_float(cls, f): """Converts a float to a decimal number, exactly. @@ -993,7 +904,7 @@ def __hash__(self): if self.is_snan(): raise TypeError('Cannot hash a signaling NaN value.') elif self.is_nan(): - return _PyHASH_NAN + return object.__hash__(self) else: if self._sign: return -_PyHASH_INF @@ -1674,13 +1585,13 @@ def __int__(self): __trunc__ = __int__ + @property def real(self): return self - real = property(real) + @property def imag(self): return Decimal(0) - imag = property(imag) def conjugate(self): return self @@ -2260,10 +2171,16 @@ def _power_exact(self, other, p): else: return None - if xc >= 10**p: + # An exact power of 10 is representable, but can convert to a + # string of any length. But an exact power of 10 shouldn't be + # possible at this point. + assert xc > 1, self + assert xc % 10 != 0, self + strxc = str(xc) + if len(strxc) > p: return None xe = -e-xe - return _dec_from_triple(0, str(xc), xe) + return _dec_from_triple(0, strxc, xe) # now y is positive; find m and n such that y = m/n if ye >= 0: @@ -2272,7 +2189,7 @@ def _power_exact(self, other, p): if xe != 0 and len(str(abs(yc*xe))) <= -ye: return None xc_bits = _nbits(xc) - if xc != 1 and len(str(abs(yc)*xc_bits)) <= -ye: + if len(str(abs(yc)*xc_bits)) <= -ye: return None m, n = yc, 10**(-ye) while m % 2 == n % 2 == 0: @@ -2285,7 +2202,7 @@ def _power_exact(self, other, p): # compute nth root of xc*10**xe if n > 1: # if 1 < xc < 2**n then xc isn't an nth power - if xc != 1 and xc_bits <= n: + if xc_bits <= n: return None xe, rem = divmod(xe, n) @@ -2313,13 +2230,18 @@ def _power_exact(self, other, p): return None xc = xc**m xe *= m - if xc > 10**p: + # An exact power of 10 is representable, but can convert to a string + # of any length. But an exact power of 10 shouldn't be possible at + # this point. + assert xc > 1, self + assert xc % 10 != 0, self + str_xc = str(xc) + if len(str_xc) > p: return None # by this point the result *is* exactly representable # adjust the exponent to get as close as possible to the ideal # exponent, if necessary - str_xc = str(xc) if other._isinteger() and other._sign == 0: ideal_exponent = self._exp*int(other) zeros = min(xe-ideal_exponent, p-len(str_xc)) @@ -2543,12 +2465,12 @@ def __pow__(self, other, modulo=None, context=None): return ans - def __rpow__(self, other, context=None): + def __rpow__(self, other, modulo=None, context=None): """Swaps self/other and returns __pow__.""" other = _convert_other(other) if other is NotImplemented: return other - return other.__pow__(self, context=context) + return other.__pow__(self, modulo, context=context) def normalize(self, context=None): """Normalize- strip trailing 0s, change anything equal to 0 to 0e0""" @@ -2978,8 +2900,8 @@ def compare_total(self, other, context=None): """Compares self to other using the abstract representations. This is not like the standard compare, which use their numerical - value. Note that a total ordering is defined for all possible abstract - representations. + value. Note that a total ordering is defined for all possible + abstract representations. """ other = _convert_other(other, raiseit=True) @@ -3050,7 +2972,8 @@ def compare_total(self, other, context=None): def compare_total_mag(self, other, context=None): """Compares self to other using abstract repr., ignoring sign. - Like compare_total, but with operand's sign ignored and assumed to be 0. + Like compare_total, but with operand's sign ignored and assumed to + be 0. """ other = _convert_other(other, raiseit=True) @@ -3420,7 +3343,10 @@ def _fill_logical(self, context, opa, opb): return opa, opb def logical_and(self, other, context=None): - """Applies an 'and' operation between self and other's digits.""" + """Applies an 'and' operation between self and other's digits. + + Both self and other must be logical numbers. + """ if context is None: context = getcontext() @@ -3437,14 +3363,20 @@ def logical_and(self, other, context=None): return _dec_from_triple(0, result.lstrip('0') or '0', 0) def logical_invert(self, context=None): - """Invert all its digits.""" + """Invert all its digits. + + The self must be logical number. + """ if context is None: context = getcontext() return self.logical_xor(_dec_from_triple(0,'1'*context.prec,0), context) def logical_or(self, other, context=None): - """Applies an 'or' operation between self and other's digits.""" + """Applies an 'or' operation between self and other's digits. + + Both self and other must be logical numbers. + """ if context is None: context = getcontext() @@ -3461,7 +3393,10 @@ def logical_or(self, other, context=None): return _dec_from_triple(0, result.lstrip('0') or '0', 0) def logical_xor(self, other, context=None): - """Applies an 'xor' operation between self and other's digits.""" + """Applies an 'xor' operation between self and other's digits. + + Both self and other must be logical numbers. + """ if context is None: context = getcontext() @@ -3837,6 +3772,10 @@ def __format__(self, specifier, context=None, _localeconv=None): # represented in fixed point; rescale them to 0e0. if not self and self._exp > 0 and spec['type'] in 'fF%': self = self._rescale(0, rounding) + if not self and spec['no_neg_0'] and self._sign: + adjusted_sign = 0 + else: + adjusted_sign = self._sign # figure out placement of the decimal point leftdigits = self._exp + len(self._int) @@ -3867,7 +3806,7 @@ def __format__(self, specifier, context=None, _localeconv=None): # done with the decimal-specific stuff; hand over the rest # of the formatting to the _format_number function - return _format_number(self._sign, intpart, fracpart, exp, spec) + return _format_number(adjusted_sign, intpart, fracpart, exp, spec) def _dec_from_triple(sign, coefficient, exponent, special=False): """Create a decimal instance directly, without any validation, @@ -4171,9 +4110,9 @@ def create_decimal_from_float(self, f): def abs(self, a): """Returns the absolute value of the operand. - If the operand is negative, the result is the same as using the minus - operation on the operand. Otherwise, the result is the same as using - the plus operation on the operand. + If the operand is negative, the result is the same as using the + minus operation on the operand. Otherwise, the result is the same + as using the plus operation on the operand. >>> ExtendedContext.abs(Decimal('2.1')) Decimal('2.1') @@ -4229,16 +4168,17 @@ def canonical(self, a): def compare(self, a, b): """Compares values numerically. - If the signs of the operands differ, a value representing each operand - ('-1' if the operand is less than zero, '0' if the operand is zero or - negative zero, or '1' if the operand is greater than zero) is used in - place of that operand for the comparison instead of the actual - operand. + If the signs of the operands differ, a value representing each + operand ('-1' if the operand is less than zero, '0' if the operand + is zero or negative zero, or '1' if the operand is greater than + zero) is used in place of that operand for the comparison instead of + the actual operand. - The comparison is then effected by subtracting the second operand from - the first and then returning a value according to the result of the - subtraction: '-1' if the result is less than zero, '0' if the result is - zero or negative zero, or '1' if the result is greater than zero. + The comparison is then effected by subtracting the second operand + from the first and then returning a value according to the result of + the subtraction: '-1' if the result is less than zero, '0' if the + result is zero or negative zero, or '1' if the result is greater + than zero. >>> ExtendedContext.compare(Decimal('2.1'), Decimal('3')) Decimal('-1') @@ -4301,8 +4241,8 @@ def compare_total(self, a, b): """Compares two operands using their abstract representation. This is not like the standard compare, which use their numerical - value. Note that a total ordering is defined for all possible abstract - representations. + value. Note that a total ordering is defined for all possible + abstract representations. >>> ExtendedContext.compare_total(Decimal('12.73'), Decimal('127.9')) Decimal('-1') @@ -4329,7 +4269,8 @@ def compare_total(self, a, b): def compare_total_mag(self, a, b): """Compares two operands using their abstract representation ignoring sign. - Like compare_total, but with operand's sign ignored and assumed to be 0. + Like compare_total, but with operand's sign ignored and assumed to + be 0. """ a = _convert_other(a, raiseit=True) return a.compare_total_mag(b) @@ -4987,8 +4928,8 @@ def multiply(self, a, b): If either operand is a special value then the general rules apply. Otherwise, the operands are multiplied together - ('long multiplication'), resulting in a number which may be as long as - the sum of the lengths of the two operands. + ('long multiplication'), resulting in a number which may be as long + as the sum of the lengths of the two operands. >>> ExtendedContext.multiply(Decimal('1.20'), Decimal('3')) Decimal('3.60') @@ -5264,19 +5205,19 @@ def quantize(self, a, b): """Returns a value equal to 'a' (rounded), having the exponent of 'b'. The coefficient of the result is derived from that of the left-hand - operand. It may be rounded using the current rounding setting (if the - exponent is being increased), multiplied by a positive power of ten (if - the exponent is being decreased), or is unchanged (if the exponent is - already equal to that of the right-hand operand). + operand. It may be rounded using the current rounding setting (if + the exponent is being increased), multiplied by a positive power of + ten (if the exponent is being decreased), or is unchanged (if the + exponent is already equal to that of the right-hand operand). Unlike other operations, if the length of the coefficient after the quantize operation would be greater than precision then an Invalid - operation condition is raised. This guarantees that, unless there is - an error condition, the exponent of the result of a quantize is always - equal to that of the right-hand operand. + operation condition is raised. This guarantees that, unless there + is an error condition, the exponent of the result of a quantize is + always equal to that of the right-hand operand. - Also unlike other operations, quantize will never raise Underflow, even - if the result is subnormal and inexact. + Also unlike other operations, quantize will never raise Underflow, + even if the result is subnormal and inexact. >>> ExtendedContext.quantize(Decimal('2.17'), Decimal('0.001')) Decimal('2.170') @@ -5330,13 +5271,13 @@ def remainder(self, a, b): """Returns the remainder from integer division. The result is the residue of the dividend after the operation of - calculating integer division as described for divide-integer, rounded - to precision digits if necessary. The sign of the result, if - non-zero, is the same as that of the original dividend. + calculating integer division as described for divide-integer, + rounded to precision digits if necessary. The sign of the result, + if non-zero, is the same as that of the original dividend. - This operation will fail under the same conditions as integer division - (that is, if integer division on the same two operands would fail, the - remainder cannot be calculated). + This operation will fail under the same conditions as integer + division (that is, if integer division on the same two operands + would fail, the remainder cannot be calculated). >>> ExtendedContext.remainder(Decimal('2.1'), Decimal('3')) Decimal('2.1') @@ -5370,9 +5311,9 @@ def remainder_near(self, a, b): is chosen). If the result is equal to 0 then its sign will be the sign of a. - This operation will fail under the same conditions as integer division - (that is, if integer division on the same two operands would fail, the - remainder cannot be calculated). + This operation will fail under the same conditions as integer + division (that is, if integer division on the same two operands + would fail, the remainder cannot be calculated). >>> ExtendedContext.remainder_near(Decimal('2.1'), Decimal('3')) Decimal('-0.9') @@ -5430,8 +5371,8 @@ def rotate(self, a, b): def same_quantum(self, a, b): """Returns True if the two operands have the same exponent. - The result is never affected by either the sign or the coefficient of - either operand. + The result is never affected by either the sign or the coefficient + of either operand. >>> ExtendedContext.same_quantum(Decimal('2.17'), Decimal('0.001')) False @@ -5503,8 +5444,8 @@ def shift(self, a, b): def sqrt(self, a): """Square root of a non-negative number to context precision. - If the result must be inexact, it is rounded using the round-half-even - algorithm. + If the result must be inexact, it is rounded using the + round-half-even algorithm. >>> ExtendedContext.sqrt(Decimal('0')) Decimal('0') @@ -5677,8 +5618,6 @@ def __init__(self, value=None): def __repr__(self): return "(%r, %r, %r)" % (self.sign, self.int, self.exp) - __str__ = __repr__ - def _normalize(op1, op2, prec = 0): @@ -6174,7 +6113,7 @@ def _convert_for_comparison(self, other, equality_op=False): (?P\d*) # with (possibly empty) diagnostic info. ) # \s* - \Z + \z """, re.VERBOSE | re.IGNORECASE).match _all_zeros = re.compile('0*$').match @@ -6187,7 +6126,7 @@ def _convert_for_comparison(self, other, equality_op=False): # # A format specifier for Decimal looks like: # -# [[fill]align][sign][#][0][minimumwidth][,][.precision][type] +# [[fill]align][sign][z][#][0][minimumwidth][,][.precision][type] _parse_format_specifier_regex = re.compile(r"""\A (?: @@ -6195,13 +6134,18 @@ def _convert_for_comparison(self, other, equality_op=False): (?P[<>=^]) )? (?P[-+ ])? +(?Pz)? (?P\#)? (?P0)? -(?P(?!0)\d+)? -(?P,)? -(?:\.(?P0|(?!0)\d+))? +(?P\d+)? +(?P[,_])? +(?:\. + (?=[\d,_]) # lookahead for digit or separator + (?P\d+)? + (?P[,_])? +)? (?P[eEfFgGn%])? -\Z +\z """, re.VERBOSE|re.DOTALL) del re @@ -6292,6 +6236,9 @@ def _parse_format_specifier(format_spec, _localeconv=None): format_dict['grouping'] = [3, 0] format_dict['decimal_point'] = '.' + if format_dict['frac_separators'] is None: + format_dict['frac_separators'] = '' + return format_dict def _format_align(sign, body, spec): @@ -6411,6 +6358,11 @@ def _format_number(is_negative, intpart, fracpart, exp, spec): sign = _format_sign(is_negative, spec) + frac_sep = spec['frac_separators'] + if fracpart and frac_sep: + fracpart = frac_sep.join(fracpart[pos:pos + 3] + for pos in range(0, len(fracpart), 3)) + if fracpart or spec['alt']: fracpart = spec['decimal_point'] + fracpart diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 2629ed9e009..d0adef08e84 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -16,15 +16,16 @@ _setmode = None import io -from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END) +from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END, Reader, Writer) # noqa: F401 valid_seek_flags = {0, 1, 2} # Hardwired values if hasattr(os, 'SEEK_HOLE') : valid_seek_flags.add(os.SEEK_HOLE) valid_seek_flags.add(os.SEEK_DATA) -# open() uses st_blksize whenever we can -DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes +# open() uses max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE) +# when the device block size is available. +DEFAULT_BUFFER_SIZE = 128 * 1024 # bytes # NOTE: Base classes defined here are registered with the "official" ABCs # defined in io.py. We don't use real inheritance though, because we don't want @@ -33,11 +34,8 @@ # Rebind for compatibility BlockingIOError = BlockingIOError -# Does io.IOBase finalizer log the exception if the close() method fails? -# The exception is ignored silently by default in release build. -_IOBASE_EMITS_UNRAISABLE = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode) # Does open() check its 'errors' argument? -_CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE +_CHECK_ERRORS = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode) def text_encoding(encoding, stacklevel=2): @@ -85,27 +83,28 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, wrapped. (If a file descriptor is given, it is closed when the returned I/O object is closed, unless closefd is set to False.) - mode is an optional string that specifies the mode in which the file is - opened. It defaults to 'r' which means open for reading in text mode. Other - common values are 'w' for writing (truncating the file if it already - exists), 'x' for exclusive creation of a new file, and 'a' for appending - (which on some Unix systems, means that all writes append to the end of the - file regardless of the current seek position). In text mode, if encoding is - not specified the encoding used is platform dependent. (For reading and - writing raw bytes use binary mode and leave encoding unspecified.) The - available modes are: - - ========= =============================================================== + mode is an optional string that specifies the mode in which the file + is opened. It defaults to 'r' which means open for reading in text + mode. Other common values are 'w' for writing (truncating the file if + it already exists), 'x' for exclusive creation of a new file, and + 'a' for appending (which on some Unix systems, means that all writes + append to the end of the file regardless of the current seek position). + In text mode, if encoding is not specified the encoding used is platform + dependent. (For reading and writing raw bytes use binary mode and leave + encoding unspecified.) The available modes are: + + ========= ========================================================== Character Meaning - --------- --------------------------------------------------------------- + --------- ---------------------------------------------------------- 'r' open for reading (default) 'w' open for writing, truncating the file first 'x' create a new file and open it for writing - 'a' open for writing, appending to the end of the file if it exists + 'a' open for writing, appending to the end of the file if it + exists 'b' binary mode 't' text mode (default) '+' open a disk file for updating (reading and writing) - ========= =============================================================== + ========= ========================================================== The default mode is 'rt' (open for reading text). For binary random access, the mode 'w+b' opens and truncates the file to 0 bytes, while @@ -113,23 +112,23 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, raises an `FileExistsError` if the file already exists. Python distinguishes between files opened in binary and text modes, - even when the underlying operating system doesn't. Files opened in + even when the underlying operating system doesn't. Files opened in binary mode (appending 'b' to the mode argument) return contents as - bytes objects without any decoding. In text mode (the default, or when + bytes objects without any decoding. In text mode (the default, or when 't' is appended to the mode argument), the contents of the file are returned as strings, the bytes having been first decoded using a platform-dependent encoding or using the specified encoding if given. buffering is an optional integer used to set the buffering policy. - Pass 0 to switch buffering off (only allowed in binary mode), 1 to select - line buffering (only usable in text mode), and an integer > 1 to indicate - the size of a fixed-size chunk buffer. When no buffering argument is - given, the default buffering policy works as follows: + Pass 0 to switch buffering off (only allowed in binary mode), 1 to + select line buffering (only usable in text mode), and an integer > 1 to + indicate the size of a fixed-size chunk buffer. When no buffering + argument is given, the default buffering policy works as follows: * Binary files are buffered in fixed-size chunks; the size of the buffer - is chosen using a heuristic trying to determine the underlying device's - "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`. - On many systems, the buffer will typically be 4096 or 8192 bytes long. + is max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE) when the device + block size is available. + On most systems, the buffer will typically be 128 kilobytes long. * "Interactive" text files (files for which isatty() returns True) use line buffering. Other text files use the policy described above @@ -149,8 +148,8 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, encoding error strings. newline is a string controlling how universal newlines works (it only - applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works - as follows: + applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It + works as follows: * On input, if newline is None, universal newlines mode is enabled. Lines in the input can end in '\n', '\r', or '\r\n', and @@ -166,17 +165,17 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, other legal values, any '\n' characters written are translated to the given string. - closedfd is a bool. If closefd is False, the underlying file descriptor will - be kept open when the file is closed. This does not work when a file name is - given and must be True in that case. + closedfd is a bool. If closefd is False, the underlying file descriptor + will be kept open when the file is closed. This does not work when + a file name is given and must be True in that case. The newly created file is non-inheritable. A custom opener can be used by passing a callable as *opener*. The - underlying file descriptor for the file object is then obtained by calling - *opener* with (*file*, *flags*). *opener* must return an open file - descriptor (passing os.open as *opener* results in functionality similar to - passing None). + underlying file descriptor for the file object is then obtained by + calling *opener* with (*file*, *flags*). *opener* must return an open + file descriptor (passing os.open as *opener* results in functionality + similar to passing None). open() returns a file object whose type depends on the mode, and through which the standard file operations such as reading and writing @@ -241,18 +240,11 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, result = raw try: line_buffering = False - if buffering == 1 or buffering < 0 and raw.isatty(): + if buffering == 1 or buffering < 0 and raw._isatty_open_only(): buffering = -1 line_buffering = True if buffering < 0: - buffering = DEFAULT_BUFFER_SIZE - try: - bs = os.fstat(raw.fileno()).st_blksize - except (OSError, AttributeError): - pass - else: - if bs > 1: - buffering = bs + buffering = max(min(raw._blksize, 8192 * 1024), DEFAULT_BUFFER_SIZE) if buffering < 0: raise ValueError("invalid buffering size") if buffering == 0: @@ -360,10 +352,12 @@ def seek(self, pos, whence=0): interpreted relative to the position indicated by whence. Values for whence are ints: - * 0 -- start of stream (the default); offset should be zero or positive + * 0 -- start of stream (the default); offset should be zero or + positive * 1 -- current stream position; offset may be negative * 2 -- end of stream; offset is usually negative - Some operating systems / file systems could provide additional values. + Some operating systems / file systems could provide additional + values. Return an int indicating the new absolute position. """ @@ -376,8 +370,8 @@ def tell(self): def truncate(self, pos=None): """Truncate file to size bytes. - Size defaults to the current IO position as reported by tell(). Return - the new size. + Size defaults to the current IO position as reported by tell(). + Return the new size. """ self._unsupported("truncate") @@ -416,18 +410,12 @@ def __del__(self): if closed: return - if _IOBASE_EMITS_UNRAISABLE: - self.close() - else: - # The try/except block is in case this is called at program - # exit time, when it's possible that globals have already been - # deleted, and then the close() call might fail. Since - # there's nothing we can do about such failures and they annoy - # the end users, we suppress the traceback. - try: - self.close() - except: - pass + if dealloc_warn := getattr(self, "_dealloc_warn", None): + dealloc_warn(self) + + # If close() fails, the caller logs the exception with + # sys.unraisablehook. close() must be called at the end at __del__(). + self.close() ### Inquiries ### @@ -507,7 +495,8 @@ def __exit__(self, *args): def fileno(self): """Returns underlying file descriptor (an int) if one exists. - An OSError is raised if the IO object does not use a file descriptor. + An OSError is raised if the IO object does not use a file + descriptor. """ self._unsupported("fileno") @@ -632,16 +621,15 @@ def read(self, size=-1): n = self.readinto(b) if n is None: return None + if n < 0 or n > len(b): + raise ValueError(f"readinto returned {n} outside buffer size {len(b)}") del b[n:] return bytes(b) def readall(self): """Read until EOF, using multiple read() call.""" res = bytearray() - while True: - data = self.read(DEFAULT_BUFFER_SIZE) - if not data: - break + while data := self.read(DEFAULT_BUFFER_SIZE): res += data if res: return bytes(res) @@ -666,8 +654,6 @@ def write(self, b): self._unsupported("write") io.RawIOBase.register(RawIOBase) -from _io import FileIO -RawIOBase.register(FileIO) class BufferedIOBase(IOBase): @@ -874,6 +860,10 @@ def __repr__(self): else: return "<{}.{} name={!r}>".format(modname, clsname, name) + def _dealloc_warn(self, source): + if dealloc_warn := getattr(self.raw, "_dealloc_warn", None): + dealloc_warn(source) + ### Lower-level APIs ### def fileno(self): @@ -949,22 +939,22 @@ def read1(self, size=-1): return self.read(size) def write(self, b): - if self.closed: - raise ValueError("write to closed file") if isinstance(b, str): raise TypeError("can't write str to binary stream") with memoryview(b) as view: + if self.closed: + raise ValueError("write to closed file") + n = view.nbytes # Size of any bytes-like object - if n == 0: - return 0 - pos = self._pos - if pos > len(self._buffer): - # Inserts null bytes between the current end of the file - # and the new write position. - padding = b'\x00' * (pos - len(self._buffer)) - self._buffer += padding - self._buffer[pos:pos + n] = b - self._pos += n + if n == 0: + return 0 + + pos = self._pos + if pos > len(self._buffer): + # Pad buffer to pos with null bytes. + self._buffer.resize(pos) + self._buffer[pos:pos + n] = view + self._pos += n return n def seek(self, pos, whence=0): @@ -1478,6 +1468,17 @@ def write(self, b): return BufferedWriter.write(self, b) +def _new_buffersize(bytes_read): + # Parallels _io/fileio.c new_buffersize + if bytes_read > 65536: + addend = bytes_read >> 3 + else: + addend = 256 + bytes_read + if addend < DEFAULT_BUFFER_SIZE: + addend = DEFAULT_BUFFER_SIZE + return bytes_read + addend + + class FileIO(RawIOBase): _fd = -1 _created = False @@ -1488,20 +1489,26 @@ class FileIO(RawIOBase): _closefd = True def __init__(self, file, mode='r', closefd=True, opener=None): - """Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading, - writing, exclusive creation or appending. The file will be created if it - doesn't exist when opened for writing or appending; it will be truncated - when opened for writing. A FileExistsError will be raised if it already - exists when opened for creating. Opening a file for creating implies - writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode - to allow simultaneous reading and writing. A custom opener can be used by - passing a callable as *opener*. The underlying file descriptor for the file - object is then obtained by calling opener with (*name*, *flags*). - *opener* must return an open file descriptor (passing os.open as *opener* - results in functionality similar to passing None). + """Open a file. + + The mode can be 'r' (default), 'w', 'x' or 'a' for reading, + writing, exclusive creation or appending. The file will be created + if it doesn't exist when opened for writing or appending; it will be + truncated when opened for writing. A FileExistsError will be raised + if it already exists when opened for creating. Opening a file for + creating implies writing so this mode behaves in a similar way to + 'w'. Add a '+' to the mode to allow simultaneous reading and + writing. + + A custom opener can be used by passing a callable as *opener*. + The underlying file descriptor for the file object is then obtained + by calling opener with (*name*, *flags*). *opener* must return + an open file descriptor (passing os.open as *opener* results in + functionality similar to passing None). """ if self._fd >= 0: # Have to close the existing file first. + self._stat_atopen = None try: if self._closefd: os.close(self._fd) @@ -1511,6 +1518,11 @@ def __init__(self, file, mode='r', closefd=True, opener=None): if isinstance(file, float): raise TypeError('integer argument expected, got float') if isinstance(file, int): + if isinstance(file, bool): + import warnings + warnings.warn("bool is used as a file descriptor", + RuntimeWarning, stacklevel=2) + file = int(file) fd = file if fd < 0: raise ValueError('negative file descriptor') @@ -1569,24 +1581,22 @@ def __init__(self, file, mode='r', closefd=True, opener=None): if not isinstance(fd, int): raise TypeError('expected integer from opener') if fd < 0: - raise OSError('Negative file descriptor') + # bpo-27066: Raise a ValueError for bad value. + raise ValueError(f'opener returned {fd}') owned_fd = fd if not noinherit_flag: os.set_inheritable(fd, False) self._closefd = closefd - fdfstat = os.fstat(fd) + self._stat_atopen = os.fstat(fd) try: - if stat.S_ISDIR(fdfstat.st_mode): + if stat.S_ISDIR(self._stat_atopen.st_mode): raise IsADirectoryError(errno.EISDIR, os.strerror(errno.EISDIR), file) except AttributeError: # Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR # don't exist. pass - self._blksize = getattr(fdfstat, 'st_blksize', 0) - if self._blksize <= 1: - self._blksize = DEFAULT_BUFFER_SIZE if _setmode: # don't translate newlines (\r\n <=> \n) @@ -1603,17 +1613,17 @@ def __init__(self, file, mode='r', closefd=True, opener=None): if e.errno != errno.ESPIPE: raise except: + self._stat_atopen = None if owned_fd is not None: os.close(owned_fd) raise self._fd = fd - def __del__(self): + def _dealloc_warn(self, source): if self._fd >= 0 and self._closefd and not self.closed: import warnings - warnings.warn('unclosed file %r' % (self,), ResourceWarning, + warnings.warn(f'unclosed file {source!r}', ResourceWarning, stacklevel=2, source=self) - self.close() def __getstate__(self): raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") @@ -1632,6 +1642,17 @@ def __repr__(self): return ('<%s name=%r mode=%r closefd=%r>' % (class_name, name, self.mode, self._closefd)) + @property + def _blksize(self): + if self._stat_atopen is None: + return DEFAULT_BUFFER_SIZE + + blksize = getattr(self._stat_atopen, "st_blksize", 0) + # WASI sets blsize to 0 + if not blksize: + return DEFAULT_BUFFER_SIZE + return blksize + def _checkReadable(self): if not self._readable: raise UnsupportedOperation('File not open for reading') @@ -1643,7 +1664,13 @@ def _checkWritable(self, msg=None): def read(self, size=None): """Read at most size bytes, returned as bytes. - Only makes one system call, so less data may be returned than requested + If size is less than 0, read all bytes in the file making + multiple read calls. See ``FileIO.readall``. + + Attempts to make only one system call, retrying only per + PEP 475 (EINTR). This means less data may be returned than + requested. + In non-blocking mode, returns None if no data is available. Return an empty bytes object at EOF. """ @@ -1659,52 +1686,64 @@ def read(self, size=None): def readall(self): """Read all data from the file, returned as bytes. - In non-blocking mode, returns as much as is immediately available, - or None if no data is available. Return an empty bytes object at EOF. + Reads until either there is an error or read() returns size 0 + (indicates EOF). If the file is already at EOF, returns an + empty bytes object. + + In non-blocking mode, returns as much data as could be read + before EAGAIN. If no data is available (EAGAIN is returned + before bytes are read) returns None. """ self._checkClosed() self._checkReadable() - bufsize = DEFAULT_BUFFER_SIZE - try: - pos = os.lseek(self._fd, 0, SEEK_CUR) - end = os.fstat(self._fd).st_size - if end >= pos: - bufsize = end - pos + 1 - except OSError: - pass + if self._stat_atopen is None or self._stat_atopen.st_size <= 0: + bufsize = DEFAULT_BUFFER_SIZE + else: + # In order to detect end of file, need a read() of at least 1 + # byte which returns size 0. Oversize the buffer by 1 byte so the + # I/O can be completed with two read() calls (one for all data, one + # for EOF) without needing to resize the buffer. + bufsize = self._stat_atopen.st_size + 1 - result = bytearray() - while True: - if len(result) >= bufsize: - bufsize = len(result) - bufsize += max(bufsize, DEFAULT_BUFFER_SIZE) - n = bufsize - len(result) - try: - chunk = os.read(self._fd, n) - except BlockingIOError: - if result: - break + if self._stat_atopen.st_size > 65536: + try: + pos = os.lseek(self._fd, 0, SEEK_CUR) + if self._stat_atopen.st_size >= pos: + bufsize = self._stat_atopen.st_size - pos + 1 + except OSError: + pass + + result = bytearray(bufsize) + bytes_read = 0 + try: + while n := os.readinto(self._fd, memoryview(result)[bytes_read:]): + bytes_read += n + if bytes_read >= len(result): + result.resize(_new_buffersize(bytes_read)) + except BlockingIOError: + if not bytes_read: return None - if not chunk: # reached the end of the file - break - result += chunk + assert len(result) - bytes_read >= 1, \ + "os.readinto buffer size 0 will result in erroneous EOF / returns 0" + result.resize(bytes_read) return bytes(result) - def readinto(self, b): + def readinto(self, buffer): """Same as RawIOBase.readinto().""" - m = memoryview(b).cast('B') - data = self.read(len(m)) - n = len(data) - m[:n] = data - return n + self._checkClosed() + self._checkReadable() + try: + return os.readinto(self._fd, buffer) + except BlockingIOError: + return None def write(self, b): """Write bytes b to file, return number written. Only makes one system call, so not all of the data may be written. - The number of bytes actually written is returned. In non-blocking mode, - returns None if the write would block. + The number of bytes actually written is returned. In non-blocking + mode, returns None if the write would block. """ self._checkClosed() self._checkWritable() @@ -1716,11 +1755,12 @@ def write(self, b): def seek(self, pos, whence=SEEK_SET): """Move to new file position. - Argument offset is a byte count. Optional argument whence defaults to - SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values - are SEEK_CUR or 1 (move relative to current position, positive or negative), - and SEEK_END or 2 (move relative to end of file, usually negative, although - many platforms allow seeking beyond the end of a file). + Argument offset is a byte count. Optional argument whence defaults + to SEEK_SET or 0 (offset from start of file, offset should be >= 0); + other values are SEEK_CUR or 1 (move relative to current position, + positive or negative), and SEEK_END or 2 (move relative to end of + file, usually negative, although many platforms allow seeking beyond + the end of a file). Note that not all file objects are seekable. """ @@ -1747,17 +1787,19 @@ def truncate(self, size=None): if size is None: size = self.tell() os.ftruncate(self._fd, size) + self._stat_atopen = None return size def close(self): """Close the file. - A closed file cannot be used for further I/O operations. close() may be - called more than once without error. + A closed file cannot be used for further I/O operations. + close() may be called more than once without error. """ if not self.closed: + self._stat_atopen = None try: - if self._closefd: + if self._closefd and self._fd >= 0: os.close(self._fd) finally: super().close() @@ -1794,6 +1836,21 @@ def isatty(self): self._checkClosed() return os.isatty(self._fd) + def _isatty_open_only(self): + """Checks whether the file is a TTY using an open-only optimization. + + TTYs are always character devices. If the interpreter knows a file is + not a character device when it would call ``isatty``, can skip that + call. Inside ``open()`` there is a fresh stat result that contains that + information. Use the stat result to skip a system call. Outside of that + context TOCTOU issues (the fd could be arbitrarily modified by + surrounding code). + """ + if (self._stat_atopen is not None + and not stat.S_ISCHR(self._stat_atopen.st_mode)): + return False + return os.isatty(self._fd) + @property def closefd(self): """True if the file descriptor will be closed by close().""" @@ -1832,8 +1889,8 @@ class TextIOBase(IOBase): def read(self, size=-1): """Read at most size characters from stream, where size is an int. - Read from underlying buffer until we have size characters or we hit EOF. - If size is negative or omitted, read until EOF. + Read from underlying buffer until we have size characters or we hit + EOF. If size is negative or omitted, read until EOF. Returns a string. """ @@ -2018,8 +2075,7 @@ def __init__(self, buffer, encoding=None, errors=None, newline=None, raise ValueError("invalid encoding: %r" % encoding) if not codecs.lookup(encoding)._is_text_encoding: - msg = ("%r is not a text encoding; " - "use codecs.open() to handle arbitrary codecs") + msg = "%r is not a text encoding" raise LookupError(msg % encoding) if errors is None: @@ -2527,9 +2583,12 @@ def read(self, size=None): size = size_index() decoder = self._decoder or self._get_decoder() if size < 0: + chunk = self.buffer.read() + if chunk is None: + raise BlockingIOError("Read returned None.") # Read everything. result = (self._get_decoded_chars() + - decoder.decode(self.buffer.read(), final=True)) + decoder.decode(chunk, final=True)) if self._snapshot is not None: self._set_decoded_chars('') self._snapshot = None @@ -2649,6 +2708,10 @@ def readline(self, size=None): def newlines(self): return self._decoder.newlines if self._decoder else None + def _dealloc_warn(self, source): + if dealloc_warn := getattr(self.buffer, "_dealloc_warn", None): + dealloc_warn(source) + class StringIO(TextIOWrapper): """Text I/O implementation using an in-memory buffer. diff --git a/Lib/_pylong.py b/Lib/_pylong.py new file mode 100644 index 00000000000..be1acd17ce3 --- /dev/null +++ b/Lib/_pylong.py @@ -0,0 +1,729 @@ +"""Python implementations of some algorithms for use by longobject.c. +The goal is to provide asymptotically faster algorithms that can be +used for operations on integers with many digits. In those cases, the +performance overhead of the Python implementation is not significant +since the asymptotic behavior is what dominates runtime. Functions +provided by this module should be considered private and not part of any +public API. + +Note: for ease of maintainability, please prefer clear code and avoid +"micro-optimizations". This module will only be imported and used for +integers with a huge number of digits. Saving a few microseconds with +tricky or non-obvious code is not worth it. For people looking for +maximum performance, they should use something like gmpy2.""" + +import re +import decimal +try: + import _decimal +except ImportError: + _decimal = None + +# A number of functions have this form, where `w` is a desired number of +# digits in base `base`: +# +# def inner(...w...): +# if w <= LIMIT: +# return something +# lo = w >> 1 +# hi = w - lo +# something involving base**lo, inner(...lo...), j, and inner(...hi...) +# figure out largest w needed +# result = inner(w) +# +# They all had some on-the-fly scheme to cache `base**lo` results for reuse. +# Power is costly. +# +# This routine aims to compute all amd only the needed powers in advance, as +# efficiently as reasonably possible. This isn't trivial, and all the +# on-the-fly methods did needless work in many cases. The driving code above +# changes to: +# +# figure out largest w needed +# mycache = compute_powers(w, base, LIMIT) +# result = inner(w) +# +# and `mycache[lo]` replaces `base**lo` in the inner function. +# +# If an algorithm wants the powers of ceiling(w/2) instead of the floor, +# pass keyword argument `need_hi=True`. +# +# While this does give minor speedups (a few percent at best), the +# primary intent is to simplify the functions using this, by eliminating +# the need for them to craft their own ad-hoc caching schemes. +# +# See code near end of file for a block of code that can be enabled to +# run millions of tests. +def compute_powers(w, base, more_than, *, need_hi=False, show=False): + seen = set() + need = set() + ws = {w} + while ws: + w = ws.pop() # any element is fine to use next + if w in seen or w <= more_than: + continue + seen.add(w) + lo = w >> 1 + hi = w - lo + # only _need_ one here; the other may, or may not, be needed + which = hi if need_hi else lo + need.add(which) + ws.add(which) + if lo != hi: + ws.add(w - which) + + # `need` is the set of exponents needed. To compute them all + # efficiently, possibly add other exponents to `extra`. The goal is + # to ensure that each exponent can be gotten from a smaller one via + # multiplying by the base, squaring it, or squaring and then + # multiplying by the base. + # + # If need_hi is False, this is already the case (w can always be + # gotten from w >> 1 via one of the squaring strategies). But we do + # the work anyway, just in case ;-) + # + # Note that speed is irrelevant. These loops are working on little + # ints (exponents) and go around O(log w) times. The total cost is + # insignificant compared to just one of the bigint multiplies. + cands = need.copy() + extra = set() + while cands: + w = max(cands) + cands.remove(w) + lo = w >> 1 + if lo > more_than and w-1 not in cands and lo not in cands: + extra.add(lo) + cands.add(lo) + assert need_hi or not extra + + d = {} + for n in sorted(need | extra): + lo = n >> 1 + hi = n - lo + if n-1 in d: + if show: + print("* base", end="") + result = d[n-1] * base # cheap! + elif lo in d: + # Multiplying a bigint by itself is about twice as fast + # in CPython provided it's the same object. + if show: + print("square", end="") + result = d[lo] * d[lo] # same object + if hi != lo: + if show: + print(" * base", end="") + assert 2 * lo + 1 == n + result *= base + else: # rare + if show: + print("pow", end='') + result = base ** n + if show: + print(" at", n, "needed" if n in need else "extra") + d[n] = result + + assert need <= d.keys() + if excess := d.keys() - need: + assert need_hi + for n in excess: + del d[n] + return d + +_unbounded_dec_context = decimal.getcontext().copy() +_unbounded_dec_context.prec = decimal.MAX_PREC +_unbounded_dec_context.Emax = decimal.MAX_EMAX +_unbounded_dec_context.Emin = decimal.MIN_EMIN +_unbounded_dec_context.traps[decimal.Inexact] = 1 # sanity check + +def int_to_decimal(n): + """Asymptotically fast conversion of an 'int' to Decimal.""" + + # Function due to Tim Peters. See GH issue #90716 for details. + # https://github.com/python/cpython/issues/90716 + # + # The implementation in longobject.c of base conversion algorithms + # between power-of-2 and non-power-of-2 bases are quadratic time. + # This function implements a divide-and-conquer algorithm that is + # faster for large numbers. Builds an equal decimal.Decimal in a + # "clever" recursive way. If we want a string representation, we + # apply str to _that_. + + from decimal import Decimal as D + BITLIM = 200 + + # Don't bother caching the "lo" mask in this; the time to compute it is + # tiny compared to the multiply. + def inner(n, w): + if w <= BITLIM: + return D(n) + w2 = w >> 1 + hi = n >> w2 + lo = n & ((1 << w2) - 1) + return inner(lo, w2) + inner(hi, w - w2) * w2pow[w2] + + with decimal.localcontext(_unbounded_dec_context): + nbits = n.bit_length() + w2pow = compute_powers(nbits, D(2), BITLIM) + if n < 0: + negate = True + n = -n + else: + negate = False + result = inner(n, nbits) + if negate: + result = -result + return result + +def int_to_decimal_string(n): + """Asymptotically fast conversion of an 'int' to a decimal string.""" + w = n.bit_length() + if w > 450_000 and _decimal is not None: + # It is only usable with the C decimal implementation. + # _pydecimal.py calls str() on very large integers, which in its + # turn calls int_to_decimal_string(), causing very deep recursion. + return str(int_to_decimal(n)) + + # Fallback algorithm for the case when the C decimal module isn't + # available. This algorithm is asymptotically worse than the algorithm + # using the decimal module, but better than the quadratic time + # implementation in longobject.c. + + DIGLIM = 1000 + def inner(n, w): + if w <= DIGLIM: + return str(n) + w2 = w >> 1 + hi, lo = divmod(n, pow10[w2]) + return inner(hi, w - w2) + inner(lo, w2).zfill(w2) + + # The estimation of the number of decimal digits. + # There is no harm in small error. If we guess too large, there may + # be leading 0's that need to be stripped. If we guess too small, we + # may need to call str() recursively for the remaining highest digits, + # which can still potentially be a large integer. This is manifested + # only if the number has way more than 10**15 digits, that exceeds + # the 52-bit physical address limit in both Intel64 and AMD64. + w = int(w * 0.3010299956639812 + 1) # log10(2) + pow10 = compute_powers(w, 5, DIGLIM) + for k, v in pow10.items(): + pow10[k] = v << k # 5**k << k == 5**k * 2**k == 10**k + if n < 0: + n = -n + sign = '-' + else: + sign = '' + s = inner(n, w) + if s[0] == '0' and n: + # If our guess of w is too large, there may be leading 0's that + # need to be stripped. + s = s.lstrip('0') + return sign + s + +def _str_to_int_inner(s): + """Asymptotically fast conversion of a 'str' to an 'int'.""" + + # Function due to Bjorn Martinsson. See GH issue #90716 for details. + # https://github.com/python/cpython/issues/90716 + # + # The implementation in longobject.c of base conversion algorithms + # between power-of-2 and non-power-of-2 bases are quadratic time. + # This function implements a divide-and-conquer algorithm making use + # of Python's built in big int multiplication. Since Python uses the + # Karatsuba algorithm for multiplication, the time complexity + # of this function is O(len(s)**1.58). + + DIGLIM = 2048 + + def inner(a, b): + if b - a <= DIGLIM: + return int(s[a:b]) + mid = (a + b + 1) >> 1 + return (inner(mid, b) + + ((inner(a, mid) * w5pow[b - mid]) + << (b - mid))) + + w5pow = compute_powers(len(s), 5, DIGLIM) + return inner(0, len(s)) + + +# Asymptotically faster version, using the C decimal module. See +# comments at the end of the file. This uses decimal arithmetic to +# convert from base 10 to base 256. The latter is just a string of +# bytes, which CPython can convert very efficiently to a Python int. + +# log of 10 to base 256 with best-possible 53-bit precision. Obtained +# via: +# from mpmath import mp +# mp.prec = 1000 +# print(float(mp.log(10, 256)).hex()) +_LOG_10_BASE_256 = float.fromhex('0x1.a934f0979a371p-2') # about 0.415 + +# _spread is for internal testing. It maps a key to the number of times +# that condition obtained in _dec_str_to_int_inner: +# key 0 - quotient guess was right +# key 1 - quotient had to be boosted by 1, one time +# key 999 - one adjustment wasn't enough, so fell back to divmod +from collections import defaultdict +_spread = defaultdict(int) +del defaultdict + +def _dec_str_to_int_inner(s, *, GUARD=8): + # Yes, BYTELIM is "large". Large enough that CPython will usually + # use the Karatsuba _str_to_int_inner to convert the string. This + # allowed reducing the cutoff for calling _this_ function from 3.5M + # to 2M digits. We could almost certainly do even better by + # fine-tuning this and/or using a larger output base than 256. + BYTELIM = 100_000 + D = decimal.Decimal + result = bytearray() + # See notes at end of file for discussion of GUARD. + assert GUARD > 0 # if 0, `decimal` can blow up - .prec 0 not allowed + + def inner(n, w): + #assert n < D256 ** w # required, but too expensive to check + if w <= BYTELIM: + # XXX Stefan Pochmann discovered that, for 1024-bit ints, + # `int(Decimal)` took 2.5x longer than `int(str(Decimal))`. + # Worse, `int(Decimal) is still quadratic-time for much + # larger ints. So unless/until all that is repaired, the + # seemingly redundant `str(Decimal)` is crucial to speed. + result.extend(int(str(n)).to_bytes(w)) # big-endian default + return + w1 = w >> 1 + w2 = w - w1 + if 0: + # This is maximally clear, but "too slow". `decimal` + # division is asymptotically fast, but we have no way to + # tell it to reuse the high-precision reciprocal it computes + # for pow256[w2], so it has to recompute it over & over & + # over again :-( + hi, lo = divmod(n, pow256[w2][0]) + else: + p256, recip = pow256[w2] + # The integer part will have a number of digits about equal + # to the difference between the log10s of `n` and `pow256` + # (which, since these are integers, is roughly approximated + # by `.adjusted()`). That's the working precision we need, + ctx.prec = max(n.adjusted() - p256.adjusted(), 0) + GUARD + hi = +n * +recip # unary `+` chops back to ctx.prec digits + ctx.prec = decimal.MAX_PREC + hi = hi.to_integral_value() # lose the fractional digits + lo = n - hi * p256 + # Because we've been uniformly rounding down, `hi` is a + # lower bound on the correct quotient. + assert lo >= 0 + # Adjust quotient up if needed. It usually isn't. In random + # testing on inputs through 5 billion digit strings, the + # test triggered once in about 200 thousand tries. + count = 0 + if lo >= p256: + count = 1 + lo -= p256 + hi += 1 + if lo >= p256: + # Complete correction via an exact computation. I + # believe it's not possible to get here provided + # GUARD >= 3. It's tested by reducing GUARD below + # that. + count = 999 + hi2, lo = divmod(lo, p256) + hi += hi2 + _spread[count] += 1 + # The assert should always succeed, but way too slow to keep + # enabled. + #assert hi, lo == divmod(n, pow256[w2][0]) + inner(hi, w1) + del hi # at top levels, can free a lot of RAM "early" + inner(lo, w2) + + # How many base 256 digits are needed?. Mathematically, exactly + # floor(log256(int(s))) + 1. There is no cheap way to compute this. + # But we can get an upper bound, and that's necessary for our error + # analysis to make sense. int(s) < 10**len(s), so the log needed is + # < log256(10**len(s)) = len(s) * log256(10). However, using + # finite-precision floating point for this, it's possible that the + # computed value is a little less than the true value. If the true + # value is at - or a little higher than - an integer, we can get an + # off-by-1 error too low. So we add 2 instead of 1 if chopping lost + # a fraction > 0.9. + + # The "WASI" test platform can complain about `len(s)` if it's too + # large to fit in its idea of "an index-sized integer". + lenS = s.__len__() + log_ub = lenS * _LOG_10_BASE_256 + log_ub_as_int = int(log_ub) + w = log_ub_as_int + 1 + (log_ub - log_ub_as_int > 0.9) + # And what if we've plain exhausted the limits of HW floats? We + # could compute the log to any desired precision using `decimal`, + # but it's not plausible that anyone will pass a string requiring + # trillions of bytes (unless they're just trying to "break things"). + if w.bit_length() >= 46: + # "Only" had < 53 - 46 = 7 bits to spare in IEEE-754 double. + raise ValueError(f"cannot convert string of len {lenS} to int") + with decimal.localcontext(_unbounded_dec_context) as ctx: + D256 = D(256) + pow256 = compute_powers(w, D256, BYTELIM, need_hi=True) + rpow256 = compute_powers(w, 1 / D256, BYTELIM, need_hi=True) + # We're going to do inexact, chopped arithmetic, multiplying by + # an approximation to the reciprocal of 256**i. We chop to get a + # lower bound on the true integer quotient. Our approximation is + # a lower bound, the multiplication is chopped too, and + # to_integral_value() is also chopped. + ctx.traps[decimal.Inexact] = 0 + ctx.rounding = decimal.ROUND_DOWN + for k, v in pow256.items(): + # No need to save much more precision in the reciprocal than + # the power of 256 has, plus some guard digits to absorb + # most relevant rounding errors. This is highly significant: + # 1/2**i has the same number of significant decimal digits + # as 5**i, generally over twice the number in 2**i, + ctx.prec = v.adjusted() + GUARD + 1 + # The unary "+" chops the reciprocal back to that precision. + pow256[k] = v, +rpow256[k] + del rpow256 # exact reciprocals no longer needed + ctx.prec = decimal.MAX_PREC + inner(D(s), w) + return int.from_bytes(result) + +def int_from_string(s): + """Asymptotically fast version of PyLong_FromString(), conversion + of a string of decimal digits into an 'int'.""" + # PyLong_FromString() has already removed leading +/-, checked for invalid + # use of underscore characters, checked that string consists of only digits + # and underscores, and stripped leading whitespace. The input can still + # contain underscores and have trailing whitespace. + s = s.rstrip().replace('_', '') + func = _str_to_int_inner + if len(s) >= 2_000_000 and _decimal is not None: + func = _dec_str_to_int_inner + return func(s) + +def str_to_int(s): + """Asymptotically fast version of decimal string to 'int' conversion.""" + # FIXME: this doesn't support the full syntax that int() supports. + m = re.match(r'\s*([+-]?)([0-9_]+)\s*', s) + if not m: + raise ValueError('invalid literal for int() with base 10') + v = int_from_string(m.group(2)) + if m.group(1) == '-': + v = -v + return v + + +# Fast integer division, based on code from Mark Dickinson, fast_div.py +# GH-47701. Additional refinements and optimizations by Bjorn Martinsson. The +# algorithm is due to Burnikel and Ziegler, in their paper "Fast Recursive +# Division". + +_DIV_LIMIT = 4000 + + +def _div2n1n(a, b, n): + """Divide a 2n-bit nonnegative integer a by an n-bit positive integer + b, using a recursive divide-and-conquer algorithm. + + Inputs: + n is a positive integer + b is a positive integer with exactly n bits + a is a nonnegative integer such that a < 2**n * b + + Output: + (q, r) such that a = b*q+r and 0 <= r < b. + + """ + if a.bit_length() - n <= _DIV_LIMIT: + return divmod(a, b) + pad = n & 1 + if pad: + a <<= 1 + b <<= 1 + n += 1 + half_n = n >> 1 + mask = (1 << half_n) - 1 + b1, b2 = b >> half_n, b & mask + q1, r = _div3n2n(a >> n, (a >> half_n) & mask, b, b1, b2, half_n) + q2, r = _div3n2n(r, a & mask, b, b1, b2, half_n) + if pad: + r >>= 1 + return q1 << half_n | q2, r + + +def _div3n2n(a12, a3, b, b1, b2, n): + """Helper function for _div2n1n; not intended to be called directly.""" + if a12 >> n == b1: + q, r = (1 << n) - 1, a12 - (b1 << n) + b1 + else: + q, r = _div2n1n(a12, b1, n) + r = (r << n | a3) - q * b2 + while r < 0: + q -= 1 + r += b + return q, r + + +def _int2digits(a, n): + """Decompose non-negative int a into base 2**n + + Input: + a is a non-negative integer + + Output: + List of the digits of a in base 2**n in little-endian order, + meaning the most significant digit is last. The most + significant digit is guaranteed to be non-zero. + If a is 0 then the output is an empty list. + + """ + a_digits = [0] * ((a.bit_length() + n - 1) // n) + + def inner(x, L, R): + if L + 1 == R: + a_digits[L] = x + return + mid = (L + R) >> 1 + shift = (mid - L) * n + upper = x >> shift + lower = x ^ (upper << shift) + inner(lower, L, mid) + inner(upper, mid, R) + + if a: + inner(a, 0, len(a_digits)) + return a_digits + + +def _digits2int(digits, n): + """Combine base-2**n digits into an int. This function is the + inverse of `_int2digits`. For more details, see _int2digits. + """ + + def inner(L, R): + if L + 1 == R: + return digits[L] + mid = (L + R) >> 1 + shift = (mid - L) * n + return (inner(mid, R) << shift) + inner(L, mid) + + return inner(0, len(digits)) if digits else 0 + + +def _divmod_pos(a, b): + """Divide a non-negative integer a by a positive integer b, giving + quotient and remainder.""" + # Use grade-school algorithm in base 2**n, n = nbits(b) + n = b.bit_length() + a_digits = _int2digits(a, n) + + r = 0 + q_digits = [] + for a_digit in reversed(a_digits): + q_digit, r = _div2n1n((r << n) + a_digit, b, n) + q_digits.append(q_digit) + q_digits.reverse() + q = _digits2int(q_digits, n) + return q, r + + +def int_divmod(a, b): + """Asymptotically fast replacement for divmod, for 'int'. + Its time complexity is O(n**1.58), where n = #bits(a) + #bits(b). + """ + if b == 0: + raise ZeroDivisionError('division by zero') + elif b < 0: + q, r = int_divmod(-a, -b) + return q, -r + elif a < 0: + q, r = int_divmod(~a, b) + return ~q, b + ~r + else: + return _divmod_pos(a, b) + + +# Notes on _dec_str_to_int_inner: +# +# Stefan Pochmann worked up a str->int function that used the decimal +# module to, in effect, convert from base 10 to base 256. This is +# "unnatural", in that it requires multiplying and dividing by large +# powers of 2, which `decimal` isn't naturally suited to. But +# `decimal`'s `*` and `/` are asymptotically superior to CPython's, so +# at _some_ point it could be expected to win. +# +# Alas, the crossover point was too high to be of much real interest. I +# (Tim) then worked on ways to replace its division with multiplication +# by a cached reciprocal approximation instead, fixing up errors +# afterwards. This reduced the crossover point significantly, +# +# I revisited the code, and found ways to improve and simplify it. The +# crossover point is at about 3.4 million digits now. +# +# About .adjusted() +# ----------------- +# Restrict to Decimal values x > 0. We don't use negative numbers in the +# code, and I don't want to have to keep typing, e.g., "absolute value". +# +# For convenience, I'll use `x.a` to mean `x.adjusted()`. x.a doesn't +# look at the digits of x, but instead returns an integer giving x's +# order of magnitude. These are equivalent: +# +# - x.a is the power-of-10 exponent of x's most significant digit. +# - x.a = the infinitely precise floor(log10(x)) +# - x can be written in this form, where f is a real with 1 <= f < 10: +# x = f * 10**x.a +# +# Observation; if x is an integer, len(str(x)) = x.a + 1. +# +# Lemma 1: (x * y).a = x.a + y.a, or one larger +# +# Proof: Write x = f * 10**x.a and y = g * 10**y.a, where f and g are in +# [1, 10). Then x*y = f*g * 10**(x.a + y.a), where 1 <= f*g < 100. If +# f*g < 10, (x*y).a is x.a+y.a. Else divide f*g by 10 to bring it back +# into [1, 10], and add 1 to the exponent to compensate. Then (x*y).a is +# x.a+y.a+1. +# +# Lemma 2: ceiling(log10(x/y)) <= x.a - y.a + 1 +# +# Proof: Express x and y as in Lemma 1. Then x/y = f/g * 10**(x.a - +# y.a), where 1/10 < f/g < 10. If 1 <= f/g, (x/y).a is x.a-y.a. Else +# multiply f/g by 10 to bring it back into [1, 10], and subtract 1 from +# the exponent to compensate. Then (x/y).a is x.a-y.a-1. So the largest +# (x/y).a can be is x.a-y.a. Since that's the floor of log10(x/y). the +# ceiling is at most 1 larger (with equality iff f/g = 1 exactly). +# +# GUARD digits +# ------------ +# We only want the integer part of divisions, so don't need to build +# the full multiplication tree. But using _just_ the number of +# digits expected in the integer part ignores too much. What's left +# out can have a very significant effect on the quotient. So we use +# GUARD additional digits. +# +# The default 8 is more than enough so no more than 1 correction step +# was ever needed for all inputs tried through 2.5 billion digits. In +# fact, I believe 3 guard digits are always enough - but the proof is +# very involved, so better safe than sorry. +# +# Short course: +# +# If prec is the decimal precision in effect, and we're rounding down, +# the result of an operation is exactly equal to the infinitely precise +# result times 1-e for some real e with 0 <= e < 10**(1-prec). In +# +# ctx.prec = max(n.adjusted() - p256.adjusted(), 0) + GUARD +# hi = +n * +recip # unary `+` chops to ctx.prec digits +# +# we have 3 visible chopped operations, but there's also a 4th: +# precomputing a truncated `recip` as part of setup. +# +# So the computed product is exactly equal to the true product times +# (1-e1)*(1-e2)*(1-e3)*(1-e4); since the e's are all very small, an +# excellent approximation to the second factor is 1-(e1+e2+e3+e4) (the +# 2nd and higher order terms in the expanded product are too tiny to +# matter). If they're all as large as possible, that's +# +# 1 - 4*10**(1-prec). This, BTW, is all bog-standard FP error analysis. +# +# That implies the computed product is within 1 of the true product +# provided prec >= log10(true_product) + 1.602. +# +# Here are telegraphic details, rephrasing the initial condition in +# equivalent ways, step by step: +# +# prod - prod * (1 - 4*10**(1-prec)) <= 1 +# prod - prod + prod * 4*10**(1-prec)) <= 1 +# prod * 4*10**(1-prec)) <= 1 +# 10**(log10(prod)) * 4*10**(1-prec)) <= 1 +# 4*10**(1-prec+log10(prod))) <= 1 +# 10**(1-prec+log10(prod))) <= 1/4 +# 1-prec+log10(prod) <= log10(1/4) = -0.602 +# -prec <= -1.602 - log10(prod) +# prec >= log10(prod) + 1.602 +# +# The true product is the same as the true ratio n/p256. By Lemma 2 +# above, n.a - p256.a + 1 is an upper bound on the ceiling of +# log10(prod). Then 2 is the ceiling of 1.602. so n.a - p256.a + 3 is an +# upper bound on the right hand side of the inequality. Any prec >= that +# will work. +# +# But since this is just a sketch of a proof ;-), the code uses the +# empirically tested 8 instead of 3. 5 digits more or less makes no +# practical difference to speed - these ints are huge. And while +# increasing GUARD above 3 may not be necessary, every increase cuts the +# percentage of cases that need a correction at all. +# +# On Computing Reciprocals +# ------------------------ +# In general, the exact reciprocals we compute have over twice as many +# significant digits as needed. 1/256**i has the same number of +# significant decimal digits as 5**i. It's a significant waste of RAM +# to store all those unneeded digits. +# +# So we cut exact reciprocals back to the least precision that can +# be needed so that the error analysis above is valid, +# +# [Note: turns out it's very significantly faster to do it this way than +# to compute 1 / 256**i directly to the desired precision, because the +# power method doesn't require division. It's also faster than computing +# (1/256)**i directly to the desired precision - no material division +# there, but `compute_powers()` is much smarter about _how_ to compute +# all the powers needed than repeated applications of `**` - that +# function invokes `**` for at most the few smallest powers needed.] +# +# The hard part is that chopping back to a shorter width occurs +# _outside_ of `inner`. We can't know then what `prec` `inner()` will +# need. We have to pick, for each value of `w2`, the largest possible +# value `prec` can become when `inner()` is working on `w2`. +# +# This is the `prec` inner() uses: +# max(n.a - p256.a, 0) + GUARD +# and what setup uses (renaming its `v` to `p256` - same thing): +# p256.a + GUARD + 1 +# +# We need that the second is always at least as large as the first, +# which is the same as requiring +# +# n.a - 2 * p256.a <= 1 +# +# What's the largest n can be? n < 255**w = 256**(w2 + (w - w2)). The +# worst case in this context is when w ix even. and then w = 2*w2, so +# n < 256**(2*w2) = (256**w2)**2 = p256**2. By Lemma 1, then, n.a +# is at most p256.a + p256.a + 1. +# +# So the most n.a - 2 * p256.a can be is +# p256.a + p256.a + 1 - 2 * p256.a = 1. QED +# +# Note: an earlier version of the code split on floor(e/2) instead of on +# the ceiling. The worst case then is odd `w`, and a more involved proof +# was needed to show that adding 4 (instead of 1) may be necessary. +# Basically because, in that case, n may be up to 256 times larger than +# p256**2. Curiously enough, by splitting on the ceiling instead, +# nothing in any proof here actually depends on the output base (256). + +# Enable for brute-force testing of compute_powers(). This takes about a +# minute, because it tries millions of cases. +if 0: + def consumer(w, limit, need_hi): + seen = set() + need = set() + def inner(w): + if w <= limit: + return + if w in seen: + return + seen.add(w) + lo = w >> 1 + hi = w - lo + need.add(hi if need_hi else lo) + inner(lo) + inner(hi) + inner(w) + exp = compute_powers(w, 1, limit, need_hi=need_hi) + assert exp.keys() == need + + from itertools import chain + for need_hi in (False, True): + for limit in (0, 1, 10, 100, 1_000, 10_000, 100_000): + for w in chain(range(1, 100_000), + (10**i for i in range(5, 30))): + consumer(w, limit, need_hi) diff --git a/Lib/_pyrepl/__init__.py b/Lib/_pyrepl/__init__.py new file mode 100644 index 00000000000..1693cbd0b98 --- /dev/null +++ b/Lib/_pyrepl/__init__.py @@ -0,0 +1,19 @@ +# Copyright 2000-2008 Michael Hudson-Doyle +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/Lib/_pyrepl/__main__.py b/Lib/_pyrepl/__main__.py new file mode 100644 index 00000000000..9c66812e13a --- /dev/null +++ b/Lib/_pyrepl/__main__.py @@ -0,0 +1,10 @@ +# Important: don't add things to this module, as they will end up in the REPL's +# default globals. Use _pyrepl.main instead. + +# Avoid caching this file by linecache and incorrectly report tracebacks. +# See https://github.com/python/cpython/issues/129098. +__spec__ = __loader__ = None + +if __name__ == "__main__": + from .main import interactive_console as __pyrepl_interactive_console + __pyrepl_interactive_console() diff --git a/Lib/_pyrepl/_module_completer.py b/Lib/_pyrepl/_module_completer.py new file mode 100644 index 00000000000..2098d0a54ab --- /dev/null +++ b/Lib/_pyrepl/_module_completer.py @@ -0,0 +1,426 @@ +from __future__ import annotations + +import importlib +import os +import pkgutil +import sys +import token +import tokenize +from importlib.machinery import FileFinder +from io import StringIO +from contextlib import contextmanager +from dataclasses import dataclass +from itertools import chain +from tokenize import TokenInfo + +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import Any, Iterable, Iterator, Mapping + + +HARDCODED_SUBMODULES = { + # Standard library submodules that are not detected by pkgutil.iter_modules + # but can be imported, so should be proposed in completion + "collections": ["abc"], + "os": ["path"], + "xml.parsers.expat": ["errors", "model"], +} + + +def make_default_module_completer() -> ModuleCompleter: + # Inside pyrepl, __package__ is set to None by default + return ModuleCompleter(namespace={'__package__': None}) + + +class ModuleCompleter: + """A completer for Python import statements. + + Examples: + - import + - import foo + - import foo. + - import foo as bar, baz + + - from + - from foo + - from foo import + - from foo import bar + - from foo import (bar as baz, qux + """ + + def __init__(self, namespace: Mapping[str, Any] | None = None) -> None: + self.namespace = namespace or {} + self._global_cache: list[pkgutil.ModuleInfo] = [] + self._curr_sys_path: list[str] = sys.path[:] + self._stdlib_path = os.path.dirname(importlib.__path__[0]) + + def get_completions(self, line: str) -> list[str] | None: + """Return the next possible import completions for 'line'.""" + result = ImportParser(line).parse() + if not result: + return None + try: + return self.complete(*result) + except Exception: + # Some unexpected error occurred, make it look like + # no completions are available + return [] + + def complete(self, from_name: str | None, name: str | None) -> list[str]: + if from_name is None: + # import x.y.z + assert name is not None + path, prefix = self.get_path_and_prefix(name) + modules = self.find_modules(path, prefix) + return [self.format_completion(path, module) for module in modules] + + if name is None: + # from x.y.z + path, prefix = self.get_path_and_prefix(from_name) + modules = self.find_modules(path, prefix) + return [self.format_completion(path, module) for module in modules] + + # from x.y import z + return self.find_modules(from_name, name) + + def find_modules(self, path: str, prefix: str) -> list[str]: + """Find all modules under 'path' that start with 'prefix'.""" + modules = self._find_modules(path, prefix) + # Filter out invalid module names + # (for example those containing dashes that cannot be imported with 'import') + return [mod for mod in modules if mod.isidentifier()] + + def _find_modules(self, path: str, prefix: str) -> list[str]: + if not path: + # Top-level import (e.g. `import foo`` or `from foo`)` + builtin_modules = [name for name in sys.builtin_module_names + if self.is_suggestion_match(name, prefix)] + third_party_modules = [module.name for module in self.global_cache + if self.is_suggestion_match(module.name, prefix)] + return sorted(builtin_modules + third_party_modules) + + if path.startswith('.'): + # Convert relative path to absolute path + package = self.namespace.get('__package__', '') + path = self.resolve_relative_name(path, package) # type: ignore[assignment] + if path is None: + return [] + + modules: Iterable[pkgutil.ModuleInfo] = self.global_cache + imported_module = sys.modules.get(path.split('.')[0]) + if imported_module: + # Filter modules to those who name and specs match the + # imported module to avoid invalid suggestions + spec = imported_module.__spec__ + if spec: + modules = [mod for mod in modules + if mod.name == spec.name + and mod.module_finder.find_spec(mod.name, None) == spec] + else: + modules = [] + + is_stdlib_import: bool | None = None + for segment in path.split('.'): + modules = [mod_info for mod_info in modules + if mod_info.ispkg and mod_info.name == segment] + if is_stdlib_import is None: + # Top-level import decide if we import from stdlib or not + is_stdlib_import = all( + self._is_stdlib_module(mod_info) for mod_info in modules + ) + modules = self.iter_submodules(modules) + + module_names = [module.name for module in modules] + if is_stdlib_import: + module_names.extend(HARDCODED_SUBMODULES.get(path, ())) + return [module_name for module_name in module_names + if self.is_suggestion_match(module_name, prefix)] + + def _is_stdlib_module(self, module_info: pkgutil.ModuleInfo) -> bool: + return (isinstance(module_info.module_finder, FileFinder) + and module_info.module_finder.path == self._stdlib_path) + + def is_suggestion_match(self, module_name: str, prefix: str) -> bool: + if prefix: + return module_name.startswith(prefix) + # For consistency with attribute completion, which + # does not suggest private attributes unless requested. + return not module_name.startswith("_") + + def iter_submodules(self, parent_modules: list[pkgutil.ModuleInfo]) -> Iterator[pkgutil.ModuleInfo]: + """Iterate over all submodules of the given parent modules.""" + specs = [info.module_finder.find_spec(info.name, None) + for info in parent_modules if info.ispkg] + search_locations = set(chain.from_iterable( + getattr(spec, 'submodule_search_locations', []) + for spec in specs if spec + )) + return pkgutil.iter_modules(search_locations) + + def get_path_and_prefix(self, dotted_name: str) -> tuple[str, str]: + """ + Split a dotted name into an import path and a + final prefix that is to be completed. + + Examples: + 'foo.bar' -> 'foo', 'bar' + 'foo.' -> 'foo', '' + '.foo' -> '.', 'foo' + """ + if '.' not in dotted_name: + return '', dotted_name + if dotted_name.startswith('.'): + stripped = dotted_name.lstrip('.') + dots = '.' * (len(dotted_name) - len(stripped)) + if '.' not in stripped: + return dots, stripped + path, prefix = stripped.rsplit('.', 1) + return dots + path, prefix + path, prefix = dotted_name.rsplit('.', 1) + return path, prefix + + def format_completion(self, path: str, module: str) -> str: + if path == '' or path.endswith('.'): + return f'{path}{module}' + return f'{path}.{module}' + + def resolve_relative_name(self, name: str, package: str) -> str | None: + """Resolve a relative module name to an absolute name. + + Example: resolve_relative_name('.foo', 'bar') -> 'bar.foo' + """ + # taken from importlib._bootstrap + level = 0 + for character in name: + if character != '.': + break + level += 1 + bits = package.rsplit('.', level - 1) + if len(bits) < level: + return None + base = bits[0] + name = name[level:] + return f'{base}.{name}' if name else base + + @property + def global_cache(self) -> list[pkgutil.ModuleInfo]: + """Global module cache""" + if not self._global_cache or self._curr_sys_path != sys.path: + self._curr_sys_path = sys.path[:] + self._global_cache = list(pkgutil.iter_modules()) + return self._global_cache + + +class ImportParser: + """ + Parses incomplete import statements that are + suitable for autocomplete suggestions. + + Examples: + - import foo -> Result(from_name=None, name='foo') + - import foo. -> Result(from_name=None, name='foo.') + - from foo -> Result(from_name='foo', name=None) + - from foo import bar -> Result(from_name='foo', name='bar') + - from .foo import ( -> Result(from_name='.foo', name='') + + Note that the parser works in reverse order, starting from the + last token in the input string. This makes the parser more robust + when parsing multiple statements. + """ + _ignored_tokens = { + token.INDENT, token.DEDENT, token.COMMENT, + token.NL, token.NEWLINE, token.ENDMARKER + } + _keywords = {'import', 'from', 'as'} + + def __init__(self, code: str) -> None: + self.code = code + tokens = [] + try: + for t in tokenize.generate_tokens(StringIO(code).readline): + if t.type not in self._ignored_tokens: + tokens.append(t) + except tokenize.TokenError as e: + if 'unexpected EOF' not in str(e): + # unexpected EOF is fine, since we're parsing an + # incomplete statement, but other errors are not + # because we may not have all the tokens so it's + # safer to bail out + tokens = [] + except SyntaxError: + tokens = [] + self.tokens = TokenQueue(tokens[::-1]) + + def parse(self) -> tuple[str | None, str | None] | None: + if not (res := self._parse()): + return None + return res.from_name, res.name + + def _parse(self) -> Result | None: + with self.tokens.save_state(): + return self.parse_from_import() + with self.tokens.save_state(): + return self.parse_import() + + def parse_import(self) -> Result: + if self.code.rstrip().endswith('import') and self.code.endswith(' '): + return Result(name='') + if self.tokens.peek_string(','): + name = '' + else: + if self.code.endswith(' '): + raise ParseError('parse_import') + name = self.parse_dotted_name() + if name.startswith('.'): + raise ParseError('parse_import') + while self.tokens.peek_string(','): + self.tokens.pop() + self.parse_dotted_as_name() + if self.tokens.peek_string('import'): + return Result(name=name) + raise ParseError('parse_import') + + def parse_from_import(self) -> Result: + stripped = self.code.rstrip() + if stripped.endswith('import') and self.code.endswith(' '): + return Result(from_name=self.parse_empty_from_import(), name='') + if stripped.endswith('from') and self.code.endswith(' '): + return Result(from_name='') + if self.tokens.peek_string('(') or self.tokens.peek_string(','): + return Result(from_name=self.parse_empty_from_import(), name='') + if self.code.endswith(' '): + raise ParseError('parse_from_import') + name = self.parse_dotted_name() + if '.' in name: + self.tokens.pop_string('from') + return Result(from_name=name) + if self.tokens.peek_string('from'): + return Result(from_name=name) + from_name = self.parse_empty_from_import() + return Result(from_name=from_name, name=name) + + def parse_empty_from_import(self) -> str: + if self.tokens.peek_string(','): + self.tokens.pop() + self.parse_as_names() + if self.tokens.peek_string('('): + self.tokens.pop() + self.tokens.pop_string('import') + return self.parse_from() + + def parse_from(self) -> str: + from_name = self.parse_dotted_name() + self.tokens.pop_string('from') + return from_name + + def parse_dotted_as_name(self) -> str: + self.tokens.pop_name() + if self.tokens.peek_string('as'): + self.tokens.pop() + with self.tokens.save_state(): + return self.parse_dotted_name() + + def parse_dotted_name(self) -> str: + name = [] + if self.tokens.peek_string('.'): + name.append('.') + self.tokens.pop() + if (self.tokens.peek_name() + and (tok := self.tokens.peek()) + and tok.string not in self._keywords): + name.append(self.tokens.pop_name()) + if not name: + raise ParseError('parse_dotted_name') + while self.tokens.peek_string('.'): + name.append('.') + self.tokens.pop() + if (self.tokens.peek_name() + and (tok := self.tokens.peek()) + and tok.string not in self._keywords): + name.append(self.tokens.pop_name()) + else: + break + + while self.tokens.peek_string('.'): + name.append('.') + self.tokens.pop() + return ''.join(name[::-1]) + + def parse_as_names(self) -> None: + self.parse_as_name() + while self.tokens.peek_string(','): + self.tokens.pop() + self.parse_as_name() + + def parse_as_name(self) -> None: + self.tokens.pop_name() + if self.tokens.peek_string('as'): + self.tokens.pop() + self.tokens.pop_name() + + +class ParseError(Exception): + pass + + +@dataclass(frozen=True) +class Result: + from_name: str | None = None + name: str | None = None + + +class TokenQueue: + """Provides helper functions for working with a sequence of tokens.""" + + def __init__(self, tokens: list[TokenInfo]) -> None: + self.tokens: list[TokenInfo] = tokens + self.index: int = 0 + self.stack: list[int] = [] + + @contextmanager + def save_state(self) -> Any: + try: + self.stack.append(self.index) + yield + except ParseError: + self.index = self.stack.pop() + else: + self.stack.pop() + + def __bool__(self) -> bool: + return self.index < len(self.tokens) + + def peek(self) -> TokenInfo | None: + if not self: + return None + return self.tokens[self.index] + + def peek_name(self) -> bool: + if not (tok := self.peek()): + return False + return tok.type == token.NAME + + def pop_name(self) -> str: + tok = self.pop() + if tok.type != token.NAME: + raise ParseError('pop_name') + return tok.string + + def peek_string(self, string: str) -> bool: + if not (tok := self.peek()): + return False + return tok.string == string + + def pop_string(self, string: str) -> str: + tok = self.pop() + if tok.string != string: + raise ParseError('pop_string') + return tok.string + + def pop(self) -> TokenInfo: + if not self: + raise ParseError('pop') + tok = self.tokens[self.index] + self.index += 1 + return tok diff --git a/Lib/_pyrepl/_threading_handler.py b/Lib/_pyrepl/_threading_handler.py new file mode 100644 index 00000000000..82f5e8650a2 --- /dev/null +++ b/Lib/_pyrepl/_threading_handler.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +import traceback + + +TYPE_CHECKING = False +if TYPE_CHECKING: + from threading import Thread + from types import TracebackType + from typing import Protocol + + class ExceptHookArgs(Protocol): + @property + def exc_type(self) -> type[BaseException]: ... + @property + def exc_value(self) -> BaseException | None: ... + @property + def exc_traceback(self) -> TracebackType | None: ... + @property + def thread(self) -> Thread | None: ... + + class ShowExceptions(Protocol): + def __call__(self) -> int: ... + def add(self, s: str) -> None: ... + + from .reader import Reader + + +def install_threading_hook(reader: Reader) -> None: + import threading + + @dataclass + class ExceptHookHandler: + lock: threading.Lock = field(default_factory=threading.Lock) + messages: list[str] = field(default_factory=list) + + def show(self) -> int: + count = 0 + with self.lock: + if not self.messages: + return 0 + reader.restore() + for tb in self.messages: + count += 1 + if tb: + print(tb) + self.messages.clear() + reader.scheduled_commands.append("ctrl-c") + reader.prepare() + return count + + def add(self, s: str) -> None: + with self.lock: + self.messages.append(s) + + def exception(self, args: ExceptHookArgs) -> None: + lines = traceback.format_exception( + args.exc_type, + args.exc_value, + args.exc_traceback, + colorize=reader.can_colorize, + ) # type: ignore[call-overload] + pre = f"\nException in {args.thread.name}:\n" if args.thread else "\n" + tb = pre + "".join(lines) + self.add(tb) + + def __call__(self) -> int: + return self.show() + + + handler = ExceptHookHandler() + reader.threading_hook = handler + threading.excepthook = handler.exception diff --git a/Lib/_pyrepl/base_eventqueue.py b/Lib/_pyrepl/base_eventqueue.py new file mode 100644 index 00000000000..0589a0f437e --- /dev/null +++ b/Lib/_pyrepl/base_eventqueue.py @@ -0,0 +1,110 @@ +# Copyright 2000-2008 Michael Hudson-Doyle +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +""" +OS-independent base for an event and VT sequence scanner + +See unix_eventqueue and windows_eventqueue for subclasses. +""" + +from collections import deque + +from . import keymap +from .console import Event +from .trace import trace + +class BaseEventQueue: + def __init__(self, encoding: str, keymap_dict: dict[bytes, str]) -> None: + self.compiled_keymap = keymap.compile_keymap(keymap_dict) + self.keymap = self.compiled_keymap + trace("keymap {k!r}", k=self.keymap) + self.encoding = encoding + self.events: deque[Event] = deque() + self.buf = bytearray() + + def get(self) -> Event | None: + """ + Retrieves the next event from the queue. + """ + if self.events: + return self.events.popleft() + else: + return None + + def empty(self) -> bool: + """ + Checks if the queue is empty. + """ + return not self.events + + def flush_buf(self) -> bytearray: + """ + Flushes the buffer and returns its contents. + """ + old = self.buf + self.buf = bytearray() + return old + + def insert(self, event: Event) -> None: + """ + Inserts an event into the queue. + """ + trace('added event {event}', event=event) + self.events.append(event) + + def push(self, char: int | bytes) -> None: + """ + Processes a character by updating the buffer and handling special key mappings. + """ + assert isinstance(char, (int, bytes)) + ord_char = char if isinstance(char, int) else ord(char) + char = ord_char.to_bytes() + self.buf.append(ord_char) + + if char in self.keymap: + if self.keymap is self.compiled_keymap: + # sanity check, buffer is empty when a special key comes + assert len(self.buf) == 1 + k = self.keymap[char] + trace('found map {k!r}', k=k) + if isinstance(k, dict): + self.keymap = k + else: + self.insert(Event('key', k, bytes(self.flush_buf()))) + self.keymap = self.compiled_keymap + + elif self.buf and self.buf[0] == 27: # escape + # escape sequence not recognized by our keymap: propagate it + # outside so that i can be recognized as an M-... key (see also + # the docstring in keymap.py + trace('unrecognized escape sequence, propagating...') + self.keymap = self.compiled_keymap + self.insert(Event('key', '\033', b'\033')) + for _c in self.flush_buf()[1:]: + self.push(_c) + + else: + try: + decoded = bytes(self.buf).decode(self.encoding) + except UnicodeError: + return + else: + self.insert(Event('key', decoded, bytes(self.flush_buf()))) + self.keymap = self.compiled_keymap diff --git a/Lib/_pyrepl/commands.py b/Lib/_pyrepl/commands.py new file mode 100644 index 00000000000..10127e58897 --- /dev/null +++ b/Lib/_pyrepl/commands.py @@ -0,0 +1,505 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Antonio Cuni +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations +import os +import time + +# Categories of actions: +# killing +# yanking +# motion +# editing +# history +# finishing +# [completion] + +from .trace import trace + +# types +if False: + from .historical_reader import HistoricalReader + + +class Command: + finish: bool = False + kills_digit_arg: bool = True + + def __init__( + self, reader: HistoricalReader, event_name: str, event: list[str] + ) -> None: + # Reader should really be "any reader" but there's too much usage of + # HistoricalReader methods and fields in the code below for us to + # refactor at the moment. + + self.reader = reader + self.event = event + self.event_name = event_name + + def do(self) -> None: + pass + + +class KillCommand(Command): + def kill_range(self, start: int, end: int) -> None: + if start == end: + return + r = self.reader + b = r.buffer + text = b[start:end] + del b[start:end] + if is_kill(r.last_command): + if start < r.pos: + r.kill_ring[-1] = text + r.kill_ring[-1] + else: + r.kill_ring[-1] = r.kill_ring[-1] + text + else: + r.kill_ring.append(text) + r.pos = start + r.dirty = True + + +class YankCommand(Command): + pass + + +class MotionCommand(Command): + pass + + +class EditCommand(Command): + pass + + +class FinishCommand(Command): + finish = True + pass + + +def is_kill(command: type[Command] | None) -> bool: + return command is not None and issubclass(command, KillCommand) + + +def is_yank(command: type[Command] | None) -> bool: + return command is not None and issubclass(command, YankCommand) + + +# etc + + +class digit_arg(Command): + kills_digit_arg = False + + def do(self) -> None: + r = self.reader + c = self.event[-1] + if c == "-": + if r.arg is not None: + r.arg = -r.arg + else: + r.arg = -1 + else: + d = int(c) + if r.arg is None: + r.arg = d + else: + if r.arg < 0: + r.arg = 10 * r.arg - d + else: + r.arg = 10 * r.arg + d + r.dirty = True + + +class clear_screen(Command): + def do(self) -> None: + r = self.reader + r.console.clear() + r.dirty = True + + +class refresh(Command): + def do(self) -> None: + self.reader.dirty = True + + +class repaint(Command): + def do(self) -> None: + self.reader.dirty = True + self.reader.console.repaint() + + +class kill_line(KillCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + eol = r.eol() + for c in b[r.pos : eol]: + if not c.isspace(): + self.kill_range(r.pos, eol) + return + else: + self.kill_range(r.pos, eol + 1) + + +class unix_line_discard(KillCommand): + def do(self) -> None: + r = self.reader + self.kill_range(r.bol(), r.pos) + + +class unix_word_rubout(KillCommand): + def do(self) -> None: + r = self.reader + for i in range(r.get_arg()): + self.kill_range(r.bow(), r.pos) + + +class kill_word(KillCommand): + def do(self) -> None: + r = self.reader + for i in range(r.get_arg()): + self.kill_range(r.pos, r.eow()) + + +class backward_kill_word(KillCommand): + def do(self) -> None: + r = self.reader + for i in range(r.get_arg()): + self.kill_range(r.bow(), r.pos) + + +class yank(YankCommand): + def do(self) -> None: + r = self.reader + if not r.kill_ring: + r.error("nothing to yank") + return + r.insert(r.kill_ring[-1]) + + +class yank_pop(YankCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + if not r.kill_ring: + r.error("nothing to yank") + return + if not is_yank(r.last_command): + r.error("previous command was not a yank") + return + repl = len(r.kill_ring[-1]) + r.kill_ring.insert(0, r.kill_ring.pop()) + t = r.kill_ring[-1] + b[r.pos - repl : r.pos] = t + r.pos = r.pos - repl + len(t) + r.dirty = True + + +class interrupt(FinishCommand): + def do(self) -> None: + import signal + + self.reader.console.finish() + self.reader.finish() + os.kill(os.getpid(), signal.SIGINT) + + +class ctrl_c(Command): + def do(self) -> None: + self.reader.console.finish() + self.reader.finish() + raise KeyboardInterrupt + + +class suspend(Command): + def do(self) -> None: + import signal + + r = self.reader + p = r.pos + r.console.finish() + os.kill(os.getpid(), signal.SIGSTOP) + ## this should probably be done + ## in a handler for SIGCONT? + r.console.prepare() + r.pos = p + # r.posxy = 0, 0 # XXX this is invalid + r.dirty = True + r.console.screen = [] + + +class up(MotionCommand): + def do(self) -> None: + r = self.reader + for _ in range(r.get_arg()): + x, y = r.pos2xy() + new_y = y - 1 + + if r.bol() == 0: + if r.historyi > 0: + r.select_item(r.historyi - 1) + return + r.pos = 0 + r.error("start of buffer") + return + + if ( + x + > ( + new_x := r.max_column(new_y) + ) # we're past the end of the previous line + or x == r.max_column(y) + and any( + not i.isspace() for i in r.buffer[r.bol() :] + ) # move between eols + ): + x = new_x + + r.setpos_from_xy(x, new_y) + + +class down(MotionCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + for _ in range(r.get_arg()): + x, y = r.pos2xy() + new_y = y + 1 + + if r.eol() == len(b): + if r.historyi < len(r.history): + r.select_item(r.historyi + 1) + r.pos = r.eol(0) + return + r.pos = len(b) + r.error("end of buffer") + return + + if ( + x + > ( + new_x := r.max_column(new_y) + ) # we're past the end of the previous line + or x == r.max_column(y) + and any( + not i.isspace() for i in r.buffer[r.bol() :] + ) # move between eols + ): + x = new_x + + r.setpos_from_xy(x, new_y) + + +class left(MotionCommand): + def do(self) -> None: + r = self.reader + for _ in range(r.get_arg()): + p = r.pos - 1 + if p >= 0: + r.pos = p + else: + self.reader.error("start of buffer") + + +class right(MotionCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + for _ in range(r.get_arg()): + p = r.pos + 1 + if p <= len(b): + r.pos = p + else: + self.reader.error("end of buffer") + + +class beginning_of_line(MotionCommand): + def do(self) -> None: + self.reader.pos = self.reader.bol() + + +class end_of_line(MotionCommand): + def do(self) -> None: + self.reader.pos = self.reader.eol() + + +class home(MotionCommand): + def do(self) -> None: + self.reader.pos = 0 + + +class end(MotionCommand): + def do(self) -> None: + self.reader.pos = len(self.reader.buffer) + + +class forward_word(MotionCommand): + def do(self) -> None: + r = self.reader + for i in range(r.get_arg()): + r.pos = r.eow() + + +class backward_word(MotionCommand): + def do(self) -> None: + r = self.reader + for i in range(r.get_arg()): + r.pos = r.bow() + + +class self_insert(EditCommand): + def do(self) -> None: + r = self.reader + text = self.event * r.get_arg() + r.insert(text) + if r.paste_mode: + data = "" + ev = r.console.getpending() + data += ev.data + if data: + r.insert(data) + r.last_refresh_cache.invalidated = True + + +class insert_nl(EditCommand): + def do(self) -> None: + r = self.reader + r.insert("\n" * r.get_arg()) + + +class transpose_characters(EditCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + s = r.pos - 1 + if s < 0: + r.error("cannot transpose at start of buffer") + else: + if s == len(b): + s -= 1 + t = min(s + r.get_arg(), len(b) - 1) + c = b[s] + del b[s] + b.insert(t, c) + r.pos = t + r.dirty = True + + +class backspace(EditCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + for i in range(r.get_arg()): + if r.pos > 0: + r.pos -= 1 + del b[r.pos] + r.dirty = True + else: + self.reader.error("can't backspace at start") + + +class delete(EditCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + if self.event[-1] == "\004": + if b and b[-1].endswith("\n"): + self.finish = True + elif ( + r.pos == 0 + and len(b) == 0 # this is something of a hack + ): + r.update_screen() + r.console.finish() + raise EOFError + + for i in range(r.get_arg()): + if r.pos != len(b): + del b[r.pos] + r.dirty = True + else: + self.reader.error("end of buffer") + + +class accept(FinishCommand): + def do(self) -> None: + pass + + +class help(Command): + def do(self) -> None: + import _sitebuiltins + + with self.reader.suspend(): + self.reader.msg = _sitebuiltins._Helper()() # type: ignore[assignment] + + +class invalid_key(Command): + def do(self) -> None: + pending = self.reader.console.getpending() + s = "".join(self.event) + pending.data + self.reader.error("`%r' not bound" % s) + + +class invalid_command(Command): + def do(self) -> None: + s = self.event_name + self.reader.error("command `%s' not known" % s) + + +class show_history(Command): + def do(self) -> None: + from .pager import get_pager + from site import gethistoryfile + + history = os.linesep.join(self.reader.history[:]) + self.reader.console.restore() + pager = get_pager() + pager(history, gethistoryfile()) + self.reader.console.prepare() + + # We need to copy over the state so that it's consistent between + # console and reader, and console does not overwrite/append stuff + self.reader.console.screen = self.reader.screen.copy() + self.reader.console.posxy = self.reader.cxy + + +class paste_mode(Command): + def do(self) -> None: + self.reader.paste_mode = not self.reader.paste_mode + self.reader.dirty = True + + +class perform_bracketed_paste(Command): + def do(self) -> None: + done = "\x1b[201~" + data = "" + start = time.time() + while done not in data: + ev = self.reader.console.getpending() + data += ev.data + trace( + "bracketed pasting of {l} chars done in {s:.2f}s", + l=len(data), + s=time.time() - start, + ) + self.reader.insert(data.replace(done, "")) + self.reader.last_refresh_cache.invalidated = True diff --git a/Lib/_pyrepl/completing_reader.py b/Lib/_pyrepl/completing_reader.py new file mode 100644 index 00000000000..9d2d43be514 --- /dev/null +++ b/Lib/_pyrepl/completing_reader.py @@ -0,0 +1,299 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Antonio Cuni +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +from dataclasses import dataclass, field + +import re +from . import commands, console, reader +from .reader import Reader + + +# types +Command = commands.Command +if False: + from .types import KeySpec, CommandName + + +def prefix(wordlist: list[str], j: int = 0) -> str: + d = {} + i = j + try: + while 1: + for word in wordlist: + d[word[i]] = 1 + if len(d) > 1: + return wordlist[0][j:i] + i += 1 + d = {} + except IndexError: + return wordlist[0][j:i] + return "" + + +STRIPCOLOR_REGEX = re.compile(r"\x1B\[([0-9]{1,3}(;[0-9]{1,2})?)?[m|K]") + +def stripcolor(s: str) -> str: + return STRIPCOLOR_REGEX.sub('', s) + + +def real_len(s: str) -> int: + return len(stripcolor(s)) + + +def left_align(s: str, maxlen: int) -> str: + stripped = stripcolor(s) + if len(stripped) > maxlen: + # too bad, we remove the color + return stripped[:maxlen] + padding = maxlen - len(stripped) + return s + ' '*padding + + +def build_menu( + cons: console.Console, + wordlist: list[str], + start: int, + use_brackets: bool, + sort_in_column: bool, +) -> tuple[list[str], int]: + if use_brackets: + item = "[ %s ]" + padding = 4 + else: + item = "%s " + padding = 2 + maxlen = min(max(map(real_len, wordlist)), cons.width - padding) + cols = int(cons.width / (maxlen + padding)) + rows = int((len(wordlist) - 1)/cols + 1) + + if sort_in_column: + # sort_in_column=False (default) sort_in_column=True + # A B C A D G + # D E F B E + # G C F + # + # "fill" the table with empty words, so we always have the same amount + # of rows for each column + missing = cols*rows - len(wordlist) + wordlist = wordlist + ['']*missing + indexes = [(i % cols) * rows + i // cols for i in range(len(wordlist))] + wordlist = [wordlist[i] for i in indexes] + menu = [] + i = start + for r in range(rows): + row = [] + for col in range(cols): + row.append(item % left_align(wordlist[i], maxlen)) + i += 1 + if i >= len(wordlist): + break + menu.append(''.join(row)) + if i >= len(wordlist): + i = 0 + break + if r + 5 > cons.height: + menu.append(" %d more... " % (len(wordlist) - i)) + break + return menu, i + +# this gets somewhat user interface-y, and as a result the logic gets +# very convoluted. +# +# To summarise the summary of the summary:- people are a problem. +# -- The Hitch-Hikers Guide to the Galaxy, Episode 12 + +#### Desired behaviour of the completions commands. +# the considerations are: +# (1) how many completions are possible +# (2) whether the last command was a completion +# (3) if we can assume that the completer is going to return the same set of +# completions: this is controlled by the ``assume_immutable_completions`` +# variable on the reader, which is True by default to match the historical +# behaviour of pyrepl, but e.g. False in the ReadlineAlikeReader to match +# more closely readline's semantics (this is needed e.g. by +# fancycompleter) +# +# if there's no possible completion, beep at the user and point this out. +# this is easy. +# +# if there's only one possible completion, stick it in. if the last thing +# user did was a completion, point out that he isn't getting anywhere, but +# only if the ``assume_immutable_completions`` is True. +# +# now it gets complicated. +# +# for the first press of a completion key: +# if there's a common prefix, stick it in. + +# irrespective of whether anything got stuck in, if the word is now +# complete, show the "complete but not unique" message + +# if there's no common prefix and if the word is not now complete, +# beep. + +# common prefix -> yes no +# word complete \/ +# yes "cbnu" "cbnu" +# no - beep + +# for the second bang on the completion key +# there will necessarily be no common prefix +# show a menu of the choices. + +# for subsequent bangs, rotate the menu around (if there are sufficient +# choices). + + +class complete(commands.Command): + def do(self) -> None: + r: CompletingReader + r = self.reader # type: ignore[assignment] + last_is_completer = r.last_command_is(self.__class__) + immutable_completions = r.assume_immutable_completions + completions_unchangable = last_is_completer and immutable_completions + stem = r.get_stem() + if not completions_unchangable: + r.cmpltn_menu_choices = r.get_completions(stem) + + completions = r.cmpltn_menu_choices + if not completions: + r.error("no matches") + elif len(completions) == 1: + if completions_unchangable and len(completions[0]) == len(stem): + r.msg = "[ sole completion ]" + r.dirty = True + r.insert(completions[0][len(stem):]) + else: + p = prefix(completions, len(stem)) + if p: + r.insert(p) + if last_is_completer: + r.cmpltn_menu_visible = True + r.cmpltn_message_visible = False + r.cmpltn_menu, r.cmpltn_menu_end = build_menu( + r.console, completions, r.cmpltn_menu_end, + r.use_brackets, r.sort_in_column) + r.dirty = True + elif not r.cmpltn_menu_visible: + r.cmpltn_message_visible = True + if stem + p in completions: + r.msg = "[ complete but not unique ]" + r.dirty = True + else: + r.msg = "[ not unique ]" + r.dirty = True + + +class self_insert(commands.self_insert): + def do(self) -> None: + r: CompletingReader + r = self.reader # type: ignore[assignment] + + commands.self_insert.do(self) + if r.cmpltn_menu_visible: + stem = r.get_stem() + if len(stem) < 1: + r.cmpltn_reset() + else: + completions = [w for w in r.cmpltn_menu_choices + if w.startswith(stem)] + if completions: + r.cmpltn_menu, r.cmpltn_menu_end = build_menu( + r.console, completions, 0, + r.use_brackets, r.sort_in_column) + else: + r.cmpltn_reset() + + +@dataclass +class CompletingReader(Reader): + """Adds completion support""" + + ### Class variables + # see the comment for the complete command + assume_immutable_completions = True + use_brackets = True # display completions inside [] + sort_in_column = False + + ### Instance variables + cmpltn_menu: list[str] = field(init=False) + cmpltn_menu_visible: bool = field(init=False) + cmpltn_message_visible: bool = field(init=False) + cmpltn_menu_end: int = field(init=False) + cmpltn_menu_choices: list[str] = field(init=False) + + def __post_init__(self) -> None: + super().__post_init__() + self.cmpltn_reset() + for c in (complete, self_insert): + self.commands[c.__name__] = c + self.commands[c.__name__.replace('_', '-')] = c + + def collect_keymap(self) -> tuple[tuple[KeySpec, CommandName], ...]: + return super().collect_keymap() + ( + (r'\t', 'complete'),) + + def after_command(self, cmd: Command) -> None: + super().after_command(cmd) + if not isinstance(cmd, (complete, self_insert)): + self.cmpltn_reset() + + def calc_screen(self) -> list[str]: + screen = super().calc_screen() + if self.cmpltn_menu_visible: + # We display the completions menu below the current prompt + ly = self.lxy[1] + 1 + screen[ly:ly] = self.cmpltn_menu + # If we're not in the middle of multiline edit, don't append to screeninfo + # since that screws up the position calculation in pos2xy function. + # This is a hack to prevent the cursor jumping + # into the completions menu when pressing left or down arrow. + if self.pos != len(self.buffer): + self.screeninfo[ly:ly] = [(0, [])]*len(self.cmpltn_menu) + return screen + + def finish(self) -> None: + super().finish() + self.cmpltn_reset() + + def cmpltn_reset(self) -> None: + self.cmpltn_menu = [] + self.cmpltn_menu_visible = False + self.cmpltn_message_visible = False + self.cmpltn_menu_end = 0 + self.cmpltn_menu_choices = [] + + def get_stem(self) -> str: + st = self.syntax_table + SW = reader.SYNTAX_WORD + b = self.buffer + p = self.pos - 1 + while p >= 0 and st.get(b[p], SW) == SW: + p -= 1 + return ''.join(b[p+1:self.pos]) + + def get_completions(self, stem: str) -> list[str]: + return [] + + def get_line(self) -> str: + """Return the current line until the cursor position.""" + return ''.join(self.buffer[:self.pos]) diff --git a/Lib/_pyrepl/console.py b/Lib/_pyrepl/console.py new file mode 100644 index 00000000000..8956fb1242e --- /dev/null +++ b/Lib/_pyrepl/console.py @@ -0,0 +1,229 @@ +# Copyright 2000-2004 Michael Hudson-Doyle +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +import _colorize + +from abc import ABC, abstractmethod +import ast +import code +import linecache +from dataclasses import dataclass, field +import os.path +import sys + + +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import IO + from typing import Callable + + +@dataclass +class Event: + evt: str + data: str + raw: bytes = b"" + + +@dataclass +class Console(ABC): + posxy: tuple[int, int] + screen: list[str] = field(default_factory=list) + height: int = 25 + width: int = 80 + + def __init__( + self, + f_in: IO[bytes] | int = 0, + f_out: IO[bytes] | int = 1, + term: str = "", + encoding: str = "", + ): + self.encoding = encoding or sys.getdefaultencoding() + + if isinstance(f_in, int): + self.input_fd = f_in + else: + self.input_fd = f_in.fileno() + + if isinstance(f_out, int): + self.output_fd = f_out + else: + self.output_fd = f_out.fileno() + + @abstractmethod + def refresh(self, screen: list[str], xy: tuple[int, int]) -> None: ... + + @abstractmethod + def prepare(self) -> None: ... + + @abstractmethod + def restore(self) -> None: ... + + @abstractmethod + def move_cursor(self, x: int, y: int) -> None: ... + + @abstractmethod + def set_cursor_vis(self, visible: bool) -> None: ... + + @abstractmethod + def getheightwidth(self) -> tuple[int, int]: + """Return (height, width) where height and width are the height + and width of the terminal window in characters.""" + ... + + @abstractmethod + def get_event(self, block: bool = True) -> Event | None: + """Return an Event instance. Returns None if |block| is false + and there is no event pending, otherwise waits for the + completion of an event.""" + ... + + @abstractmethod + def push_char(self, char: int | bytes) -> None: + """ + Push a character to the console event queue. + """ + ... + + @abstractmethod + def beep(self) -> None: ... + + @abstractmethod + def clear(self) -> None: + """Wipe the screen""" + ... + + @abstractmethod + def finish(self) -> None: + """Move the cursor to the end of the display and otherwise get + ready for end. XXX could be merged with restore? Hmm.""" + ... + + @abstractmethod + def flushoutput(self) -> None: + """Flush all output to the screen (assuming there's some + buffering going on somewhere).""" + ... + + @abstractmethod + def forgetinput(self) -> None: + """Forget all pending, but not yet processed input.""" + ... + + @abstractmethod + def getpending(self) -> Event: + """Return the characters that have been typed but not yet + processed.""" + ... + + @abstractmethod + def wait(self, timeout: float | None) -> bool: + """Wait for an event. The return value is True if an event is + available, False if the timeout has been reached. If timeout is + None, wait forever. The timeout is in milliseconds.""" + ... + + @property + def input_hook(self) -> Callable[[], int] | None: + """Returns the current input hook.""" + ... + + @abstractmethod + def repaint(self) -> None: ... + + +class InteractiveColoredConsole(code.InteractiveConsole): + STATEMENT_FAILED = object() + + def __init__( + self, + locals: dict[str, object] | None = None, + filename: str = "", + *, + local_exit: bool = False, + ) -> None: + super().__init__(locals=locals, filename=filename, local_exit=local_exit) + self.can_colorize = _colorize.can_colorize() + + def showsyntaxerror(self, filename=None, **kwargs): + super().showsyntaxerror(filename=filename, **kwargs) + + def _excepthook(self, typ, value, tb): + import traceback + lines = traceback.format_exception( + typ, value, tb, + colorize=self.can_colorize, + limit=traceback.BUILTIN_EXCEPTION_LIMIT) + self.write(''.join(lines)) + + def runcode(self, code): + try: + exec(code, self.locals) + except SystemExit: + raise + except BaseException: + self.showtraceback() + return self.STATEMENT_FAILED + return None + + def runsource(self, source, filename="", symbol="single"): + try: + tree = self.compile.compiler( + source, + filename, + "exec", + ast.PyCF_ONLY_AST, + incomplete_input=False, + ) + except (SyntaxError, OverflowError, ValueError): + self.showsyntaxerror(filename, source=source) + return False + if tree.body: + *_, last_stmt = tree.body + for stmt in tree.body: + wrapper = ast.Interactive if stmt is last_stmt else ast.Module + the_symbol = symbol if stmt is last_stmt else "exec" + item = wrapper([stmt]) + try: + code = self.compile.compiler(item, filename, the_symbol) + linecache._register_code(code, source, filename) + except SyntaxError as e: + if e.args[0] == "'await' outside function": + python = os.path.basename(sys.executable) + e.add_note( + f"Try the asyncio REPL ({python} -m asyncio) to use" + f" top-level 'await' and run background asyncio tasks." + ) + self.showsyntaxerror(filename, source=source) + return False + except (OverflowError, ValueError): + self.showsyntaxerror(filename, source=source) + return False + + if code is None: + return True + + result = self.runcode(code) + if result is self.STATEMENT_FAILED: + break + return False diff --git a/Lib/_pyrepl/fancy_termios.py b/Lib/_pyrepl/fancy_termios.py new file mode 100644 index 00000000000..8d5bd183f21 --- /dev/null +++ b/Lib/_pyrepl/fancy_termios.py @@ -0,0 +1,82 @@ +# Copyright 2000-2004 Michael Hudson-Doyle +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +import termios + + +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import cast +else: + cast = lambda typ, val: val + + +class TermState: + def __init__(self, attrs: list[int | list[bytes]]) -> None: + self.iflag = cast(int, attrs[0]) + self.oflag = cast(int, attrs[1]) + self.cflag = cast(int, attrs[2]) + self.lflag = cast(int, attrs[3]) + self.ispeed = cast(int, attrs[4]) + self.ospeed = cast(int, attrs[5]) + self.cc = cast(list[bytes], attrs[6]) + + def as_list(self) -> list[int | list[bytes]]: + return [ + self.iflag, + self.oflag, + self.cflag, + self.lflag, + self.ispeed, + self.ospeed, + # Always return a copy of the control characters list to ensure + # there are not any additional references to self.cc + self.cc[:], + ] + + def copy(self) -> "TermState": + return self.__class__(self.as_list()) + + +def tcgetattr(fd: int) -> TermState: + return TermState(termios.tcgetattr(fd)) + + +def tcsetattr(fd: int, when: int, attrs: TermState) -> None: + termios.tcsetattr(fd, when, attrs.as_list()) + + +class Term(TermState): + TS__init__ = TermState.__init__ + + def __init__(self, fd: int = 0) -> None: + self.TS__init__(termios.tcgetattr(fd)) + self.fd = fd + self.stack: list[list[int | list[bytes]]] = [] + + def save(self) -> None: + self.stack.append(self.as_list()) + + def set(self, when: int = termios.TCSANOW) -> None: + termios.tcsetattr(self.fd, when, self.as_list()) + + def restore(self) -> None: + self.TS__init__(self.stack.pop()) + self.set() diff --git a/Lib/_pyrepl/historical_reader.py b/Lib/_pyrepl/historical_reader.py new file mode 100644 index 00000000000..c4b95fa2e81 --- /dev/null +++ b/Lib/_pyrepl/historical_reader.py @@ -0,0 +1,419 @@ +# Copyright 2000-2004 Michael Hudson-Doyle +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +from contextlib import contextmanager +from dataclasses import dataclass, field + +from . import commands, input +from .reader import Reader + + +if False: + from .types import SimpleContextManager, KeySpec, CommandName + + +isearch_keymap: tuple[tuple[KeySpec, CommandName], ...] = tuple( + [("\\%03o" % c, "isearch-end") for c in range(256) if chr(c) != "\\"] + + [(c, "isearch-add-character") for c in map(chr, range(32, 127)) if c != "\\"] + + [ + ("\\%03o" % c, "isearch-add-character") + for c in range(256) + if chr(c).isalpha() and chr(c) != "\\" + ] + + [ + ("\\\\", "self-insert"), + (r"\C-r", "isearch-backwards"), + (r"\C-s", "isearch-forwards"), + (r"\C-c", "isearch-cancel"), + (r"\C-g", "isearch-cancel"), + (r"\", "isearch-backspace"), + ] +) + +ISEARCH_DIRECTION_NONE = "" +ISEARCH_DIRECTION_BACKWARDS = "r" +ISEARCH_DIRECTION_FORWARDS = "f" + + +class next_history(commands.Command): + def do(self) -> None: + r = self.reader + if r.historyi == len(r.history): + r.error("end of history list") + return + r.select_item(r.historyi + 1) + + +class previous_history(commands.Command): + def do(self) -> None: + r = self.reader + if r.historyi == 0: + r.error("start of history list") + return + r.select_item(r.historyi - 1) + + +class history_search_backward(commands.Command): + def do(self) -> None: + r = self.reader + r.search_next(forwards=False) + + +class history_search_forward(commands.Command): + def do(self) -> None: + r = self.reader + r.search_next(forwards=True) + + +class restore_history(commands.Command): + def do(self) -> None: + r = self.reader + if r.historyi != len(r.history): + if r.get_unicode() != r.history[r.historyi]: + r.buffer = list(r.history[r.historyi]) + r.pos = len(r.buffer) + r.dirty = True + + +class first_history(commands.Command): + def do(self) -> None: + self.reader.select_item(0) + + +class last_history(commands.Command): + def do(self) -> None: + self.reader.select_item(len(self.reader.history)) + + +class operate_and_get_next(commands.FinishCommand): + def do(self) -> None: + self.reader.next_history = self.reader.historyi + 1 + + +class yank_arg(commands.Command): + def do(self) -> None: + r = self.reader + if r.last_command is self.__class__: + r.yank_arg_i += 1 + else: + r.yank_arg_i = 0 + if r.historyi < r.yank_arg_i: + r.error("beginning of history list") + return + a = r.get_arg(-1) + # XXX how to split? + words = r.get_item(r.historyi - r.yank_arg_i - 1).split() + if a < -len(words) or a >= len(words): + r.error("no such arg") + return + w = words[a] + b = r.buffer + if r.yank_arg_i > 0: + o = len(r.yank_arg_yanked) + else: + o = 0 + b[r.pos - o : r.pos] = list(w) + r.yank_arg_yanked = w + r.pos += len(w) - o + r.dirty = True + + +class forward_history_isearch(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_FORWARDS + r.isearch_start = r.historyi, r.pos + r.isearch_term = "" + r.dirty = True + r.push_input_trans(r.isearch_trans) + + +class reverse_history_isearch(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_BACKWARDS + r.dirty = True + r.isearch_term = "" + r.push_input_trans(r.isearch_trans) + r.isearch_start = r.historyi, r.pos + + +class isearch_cancel(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_NONE + r.pop_input_trans() + r.select_item(r.isearch_start[0]) + r.pos = r.isearch_start[1] + r.dirty = True + + +class isearch_add_character(commands.Command): + def do(self) -> None: + r = self.reader + b = r.buffer + r.isearch_term += self.event[-1] + r.dirty = True + p = r.pos + len(r.isearch_term) - 1 + if b[p : p + 1] != [r.isearch_term[-1]]: + r.isearch_next() + + +class isearch_backspace(commands.Command): + def do(self) -> None: + r = self.reader + if len(r.isearch_term) > 0: + r.isearch_term = r.isearch_term[:-1] + r.dirty = True + else: + r.error("nothing to rubout") + + +class isearch_forwards(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_FORWARDS + r.isearch_next() + + +class isearch_backwards(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_BACKWARDS + r.isearch_next() + + +class isearch_end(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_NONE + r.console.forgetinput() + r.pop_input_trans() + r.dirty = True + + +@dataclass +class HistoricalReader(Reader): + """Adds history support (with incremental history searching) to the + Reader class. + """ + + history: list[str] = field(default_factory=list) + historyi: int = 0 + next_history: int | None = None + transient_history: dict[int, str] = field(default_factory=dict) + isearch_term: str = "" + isearch_direction: str = ISEARCH_DIRECTION_NONE + isearch_start: tuple[int, int] = field(init=False) + isearch_trans: input.KeymapTranslator = field(init=False) + yank_arg_i: int = 0 + yank_arg_yanked: str = "" + + def __post_init__(self) -> None: + super().__post_init__() + for c in [ + next_history, + previous_history, + restore_history, + first_history, + last_history, + yank_arg, + forward_history_isearch, + reverse_history_isearch, + isearch_end, + isearch_add_character, + isearch_cancel, + isearch_add_character, + isearch_backspace, + isearch_forwards, + isearch_backwards, + operate_and_get_next, + history_search_backward, + history_search_forward, + ]: + self.commands[c.__name__] = c + self.commands[c.__name__.replace("_", "-")] = c + self.isearch_start = self.historyi, self.pos + self.isearch_trans = input.KeymapTranslator( + isearch_keymap, invalid_cls=isearch_end, character_cls=isearch_add_character + ) + + def collect_keymap(self) -> tuple[tuple[KeySpec, CommandName], ...]: + return super().collect_keymap() + ( + (r"\C-n", "next-history"), + (r"\C-p", "previous-history"), + (r"\C-o", "operate-and-get-next"), + (r"\C-r", "reverse-history-isearch"), + (r"\C-s", "forward-history-isearch"), + (r"\M-r", "restore-history"), + (r"\M-.", "yank-arg"), + (r"\", "history-search-forward"), + (r"\x1b[6~", "history-search-forward"), + (r"\", "history-search-backward"), + (r"\x1b[5~", "history-search-backward"), + ) + + def select_item(self, i: int) -> None: + self.transient_history[self.historyi] = self.get_unicode() + buf = self.transient_history.get(i) + if buf is None: + buf = self.history[i].rstrip() + self.buffer = list(buf) + self.historyi = i + self.pos = len(self.buffer) + self.dirty = True + self.last_refresh_cache.invalidated = True + + def get_item(self, i: int) -> str: + if i != len(self.history): + return self.transient_history.get(i, self.history[i]) + else: + return self.transient_history.get(i, self.get_unicode()) + + @contextmanager + def suspend(self) -> SimpleContextManager: + with super().suspend(), self.suspend_history(): + yield + + @contextmanager + def suspend_history(self) -> SimpleContextManager: + try: + old_history = self.history[:] + del self.history[:] + yield + finally: + self.history[:] = old_history + + def prepare(self) -> None: + super().prepare() + try: + self.transient_history = {} + if self.next_history is not None and self.next_history < len(self.history): + self.historyi = self.next_history + self.buffer[:] = list(self.history[self.next_history]) + self.pos = len(self.buffer) + self.transient_history[len(self.history)] = "" + else: + self.historyi = len(self.history) + self.next_history = None + except: + self.restore() + raise + + def get_prompt(self, lineno: int, cursor_on_line: bool) -> str: + if cursor_on_line and self.isearch_direction != ISEARCH_DIRECTION_NONE: + d = "rf"[self.isearch_direction == ISEARCH_DIRECTION_FORWARDS] + return "(%s-search `%s') " % (d, self.isearch_term) + else: + return super().get_prompt(lineno, cursor_on_line) + + def search_next(self, *, forwards: bool) -> None: + """Search history for the current line contents up to the cursor. + + Selects the first item found. If nothing is under the cursor, any next + item in history is selected. + """ + pos = self.pos + s = self.get_unicode() + history_index = self.historyi + + # In multiline contexts, we're only interested in the current line. + nl_index = s.rfind('\n', 0, pos) + prefix = s[nl_index + 1:pos] + pos = len(prefix) + + match_prefix = len(prefix) + len_item = 0 + if history_index < len(self.history): + len_item = len(self.get_item(history_index)) + if len_item and pos == len_item: + match_prefix = False + elif not pos: + match_prefix = False + + while 1: + if forwards: + out_of_bounds = history_index >= len(self.history) - 1 + else: + out_of_bounds = history_index == 0 + if out_of_bounds: + if forwards and not match_prefix: + self.pos = 0 + self.buffer = [] + self.dirty = True + else: + self.error("not found") + return + + history_index += 1 if forwards else -1 + s = self.get_item(history_index) + + if not match_prefix: + self.select_item(history_index) + return + + len_acc = 0 + for i, line in enumerate(s.splitlines(keepends=True)): + if line.startswith(prefix): + self.select_item(history_index) + self.pos = pos + len_acc + return + len_acc += len(line) + + def isearch_next(self) -> None: + st = self.isearch_term + p = self.pos + i = self.historyi + s = self.get_unicode() + forwards = self.isearch_direction == ISEARCH_DIRECTION_FORWARDS + while 1: + if forwards: + p = s.find(st, p + 1) + else: + p = s.rfind(st, 0, p + len(st) - 1) + if p != -1: + self.select_item(i) + self.pos = p + return + elif (forwards and i >= len(self.history) - 1) or (not forwards and i == 0): + self.error("not found") + return + else: + if forwards: + i += 1 + s = self.get_item(i) + p = -1 + else: + i -= 1 + s = self.get_item(i) + p = len(s) + + def finish(self) -> None: + super().finish() + ret = self.get_unicode() + for i, t in self.transient_history.items(): + if i < len(self.history) and i != self.historyi: + self.history[i] = t + if ret and should_auto_add_history: + self.history.append(ret) + + +should_auto_add_history = True diff --git a/Lib/_pyrepl/input.py b/Lib/_pyrepl/input.py new file mode 100644 index 00000000000..21c24eb5cde --- /dev/null +++ b/Lib/_pyrepl/input.py @@ -0,0 +1,114 @@ +# Copyright 2000-2004 Michael Hudson-Doyle +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +# (naming modules after builtin functions is not such a hot idea...) + +# an KeyTrans instance translates Event objects into Command objects + +# hmm, at what level do we want [C-i] and [tab] to be equivalent? +# [meta-a] and [esc a]? obviously, these are going to be equivalent +# for the UnixConsole, but should they be for PygameConsole? + +# it would in any situation seem to be a bad idea to bind, say, [tab] +# and [C-i] to *different* things... but should binding one bind the +# other? + +# executive, temporary decision: [tab] and [C-i] are distinct, but +# [meta-key] is identified with [esc key]. We demand that any console +# class does quite a lot towards emulating a unix terminal. + +from __future__ import annotations + +from abc import ABC, abstractmethod +import unicodedata +from collections import deque + + +# types +if False: + from .types import EventTuple + + +class InputTranslator(ABC): + @abstractmethod + def push(self, evt: EventTuple) -> None: + pass + + @abstractmethod + def get(self) -> EventTuple | None: + return None + + @abstractmethod + def empty(self) -> bool: + return True + + +class KeymapTranslator(InputTranslator): + def __init__(self, keymap, verbose=False, invalid_cls=None, character_cls=None): + self.verbose = verbose + from .keymap import compile_keymap, parse_keys + + self.keymap = keymap + self.invalid_cls = invalid_cls + self.character_cls = character_cls + d = {} + for keyspec, command in keymap: + keyseq = tuple(parse_keys(keyspec)) + d[keyseq] = command + if self.verbose: + print(d) + self.k = self.ck = compile_keymap(d, ()) + self.results = deque() + self.stack = [] + + def push(self, evt): + if self.verbose: + print("pushed", evt.data, end="") + key = evt.data + d = self.k.get(key) + if isinstance(d, dict): + if self.verbose: + print("transition") + self.stack.append(key) + self.k = d + else: + if d is None: + if self.verbose: + print("invalid") + if self.stack or len(key) > 1 or unicodedata.category(key) == "C": + self.results.append((self.invalid_cls, self.stack + [key])) + else: + # small optimization: + self.k[key] = self.character_cls + self.results.append((self.character_cls, [key])) + else: + if self.verbose: + print("matched", d) + self.results.append((d, self.stack + [key])) + self.stack = [] + self.k = self.ck + + def get(self): + if self.results: + return self.results.popleft() + else: + return None + + def empty(self) -> bool: + return not self.results diff --git a/Lib/_pyrepl/keymap.py b/Lib/_pyrepl/keymap.py new file mode 100644 index 00000000000..d11df4b5164 --- /dev/null +++ b/Lib/_pyrepl/keymap.py @@ -0,0 +1,213 @@ +# Copyright 2000-2008 Michael Hudson-Doyle +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +""" +Keymap contains functions for parsing keyspecs and turning keyspecs into +appropriate sequences. + +A keyspec is a string representing a sequence of key presses that can +be bound to a command. All characters other than the backslash represent +themselves. In the traditional manner, a backslash introduces an escape +sequence. + +pyrepl uses its own keyspec format that is meant to be a strict superset of +readline's KEYSEQ format. This means that if a spec is found that readline +accepts that this doesn't, it should be logged as a bug. Note that this means +we're using the '\\C-o' style of readline's keyspec, not the 'Control-o' sort. + +The extension to readline is that the sequence \\ denotes the +sequence of characters produced by hitting KEY. + +Examples: +'a' - what you get when you hit the 'a' key +'\\EOA' - Escape - O - A (up, on my terminal) +'\\' - the up arrow key +'\\' - ditto (keynames are case-insensitive) +'\\C-o', '\\c-o' - control-o +'\\M-.' - meta-period +'\\E.' - ditto (that's how meta works for pyrepl) +'\\', '\\', '\\t', '\\011', '\\x09', '\\X09', '\\C-i', '\\C-I' + - all of these are the tab character. +""" + +_escapes = { + "\\": "\\", + "'": "'", + '"': '"', + "a": "\a", + "b": "\b", + "e": "\033", + "f": "\f", + "n": "\n", + "r": "\r", + "t": "\t", + "v": "\v", +} + +_keynames = { + "backspace": "backspace", + "delete": "delete", + "down": "down", + "end": "end", + "enter": "\r", + "escape": "\033", + "f1": "f1", + "f2": "f2", + "f3": "f3", + "f4": "f4", + "f5": "f5", + "f6": "f6", + "f7": "f7", + "f8": "f8", + "f9": "f9", + "f10": "f10", + "f11": "f11", + "f12": "f12", + "f13": "f13", + "f14": "f14", + "f15": "f15", + "f16": "f16", + "f17": "f17", + "f18": "f18", + "f19": "f19", + "f20": "f20", + "home": "home", + "insert": "insert", + "left": "left", + "page down": "page down", + "page up": "page up", + "return": "\r", + "right": "right", + "space": " ", + "tab": "\t", + "up": "up", +} + + +class KeySpecError(Exception): + pass + + +def parse_keys(keys: str) -> list[str]: + """Parse keys in keyspec format to a sequence of keys.""" + s = 0 + r: list[str] = [] + while s < len(keys): + k, s = _parse_single_key_sequence(keys, s) + r.extend(k) + return r + + +def _parse_single_key_sequence(key: str, s: int) -> tuple[list[str], int]: + ctrl = 0 + meta = 0 + ret = "" + while not ret and s < len(key): + if key[s] == "\\": + c = key[s + 1].lower() + if c in _escapes: + ret = _escapes[c] + s += 2 + elif c == "c": + if key[s + 2] != "-": + raise KeySpecError( + "\\C must be followed by `-' (char %d of %s)" + % (s + 2, repr(key)) + ) + if ctrl: + raise KeySpecError( + "doubled \\C- (char %d of %s)" % (s + 1, repr(key)) + ) + ctrl = 1 + s += 3 + elif c == "m": + if key[s + 2] != "-": + raise KeySpecError( + "\\M must be followed by `-' (char %d of %s)" + % (s + 2, repr(key)) + ) + if meta: + raise KeySpecError( + "doubled \\M- (char %d of %s)" % (s + 1, repr(key)) + ) + meta = 1 + s += 3 + elif c.isdigit(): + n = key[s + 1 : s + 4] + ret = chr(int(n, 8)) + s += 4 + elif c == "x": + n = key[s + 2 : s + 4] + ret = chr(int(n, 16)) + s += 4 + elif c == "<": + t = key.find(">", s) + if t == -1: + raise KeySpecError( + "unterminated \\< starting at char %d of %s" + % (s + 1, repr(key)) + ) + ret = key[s + 2 : t].lower() + if ret not in _keynames: + raise KeySpecError( + "unrecognised keyname `%s' at char %d of %s" + % (ret, s + 2, repr(key)) + ) + ret = _keynames[ret] + s = t + 1 + else: + raise KeySpecError( + "unknown backslash escape %s at char %d of %s" + % (repr(c), s + 2, repr(key)) + ) + else: + ret = key[s] + s += 1 + if ctrl: + if len(ret) == 1: + ret = chr(ord(ret) & 0x1F) # curses.ascii.ctrl() + elif ret in {"left", "right"}: + ret = f"ctrl {ret}" + else: + raise KeySpecError("\\C- followed by invalid key") + + result = [ret], s + if meta: + result[0].insert(0, "\033") + return result + + +def compile_keymap(keymap, empty=b""): + r = {} + for key, value in keymap.items(): + if isinstance(key, bytes): + first = key[:1] + else: + first = key[0] + r.setdefault(first, {})[key[1:]] = value + for key, value in r.items(): + if empty in value: + if len(value) != 1: + raise KeySpecError("key definitions for %s clash" % (value.values(),)) + else: + r[key] = value[empty] + else: + r[key] = compile_keymap(value, empty) + return r diff --git a/Lib/_pyrepl/main.py b/Lib/_pyrepl/main.py new file mode 100644 index 00000000000..447eb1e551e --- /dev/null +++ b/Lib/_pyrepl/main.py @@ -0,0 +1,58 @@ +import errno +import os +import sys +import types + + +CAN_USE_PYREPL: bool +FAIL_REASON: str +try: + if sys.platform == "win32" and sys.getwindowsversion().build < 10586: + raise RuntimeError("Windows 10 TH2 or later required") + if not os.isatty(sys.stdin.fileno()): + raise OSError(errno.ENOTTY, "tty required", "stdin") + from .simple_interact import check + if err := check(): + raise RuntimeError(err) +except Exception as e: + CAN_USE_PYREPL = False + FAIL_REASON = f"warning: can't use pyrepl: {e}" +else: + CAN_USE_PYREPL = True + FAIL_REASON = "" + + +def interactive_console(mainmodule=None, quiet=False, pythonstartup=False): + if not CAN_USE_PYREPL: + if not os.getenv('PYTHON_BASIC_REPL') and FAIL_REASON: + from .trace import trace + trace(FAIL_REASON) + print(FAIL_REASON, file=sys.stderr) + return sys._baserepl() + + if not mainmodule: + mainmodule = types.ModuleType("__main__") + + namespace = mainmodule.__dict__ + + # sys._baserepl() above does this internally, we do it here + startup_path = os.getenv("PYTHONSTARTUP") + if pythonstartup and startup_path: + sys.audit("cpython.run_startup", startup_path) + + import tokenize + with tokenize.open(startup_path) as f: + startup_code = compile(f.read(), startup_path, "exec") + exec(startup_code, namespace) + + # set sys.{ps1,ps2} just before invoking the interactive interpreter. This + # mimics what CPython does in pythonrun.c + if not hasattr(sys, "ps1"): + sys.ps1 = ">>> " + if not hasattr(sys, "ps2"): + sys.ps2 = "... " + + from .console import InteractiveColoredConsole + from .simple_interact import run_multiline_interactive_console + console = InteractiveColoredConsole(namespace, filename="") + run_multiline_interactive_console(console) diff --git a/Lib/_pyrepl/mypy.ini b/Lib/_pyrepl/mypy.ini new file mode 100644 index 00000000000..9375a55b53c --- /dev/null +++ b/Lib/_pyrepl/mypy.ini @@ -0,0 +1,25 @@ +# Config file for running mypy on _pyrepl. +# Run mypy by invoking `mypy --config-file Lib/_pyrepl/mypy.ini` +# on the command-line from the repo root + +[mypy] +files = Lib/_pyrepl +mypy_path = $MYPY_CONFIG_FILE_DIR/../../Misc/mypy +explicit_package_bases = True +python_version = 3.13 +platform = linux +pretty = True + +# Enable most stricter settings +enable_error_code = ignore-without-code,redundant-expr +strict = True + +# Various stricter settings that we can't yet enable +# Try to enable these in the following order: +disallow_untyped_calls = False +disallow_untyped_defs = False +check_untyped_defs = False + +# Various internal modules that typeshed deliberately doesn't have stubs for: +[mypy-_abc.*,_opcode.*,_overlapped.*,_testcapi.*,_testinternalcapi.*,test.*] +ignore_missing_imports = True diff --git a/Lib/_pyrepl/pager.py b/Lib/_pyrepl/pager.py new file mode 100644 index 00000000000..1fddc63e3ee --- /dev/null +++ b/Lib/_pyrepl/pager.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +import io +import os +import re +import sys + + +# types +if False: + from typing import Protocol + class Pager(Protocol): + def __call__(self, text: str, title: str = "") -> None: + ... + + +def get_pager() -> Pager: + """Decide what method to use for paging through text.""" + if not hasattr(sys.stdin, "isatty"): + return plain_pager + if not hasattr(sys.stdout, "isatty"): + return plain_pager + if not sys.stdin.isatty() or not sys.stdout.isatty(): + return plain_pager + if sys.platform == "emscripten": + return plain_pager + use_pager = os.environ.get('MANPAGER') or os.environ.get('PAGER') + if use_pager: + if sys.platform == 'win32': # pipes completely broken in Windows + return lambda text, title='': tempfile_pager(plain(text), use_pager) + elif os.environ.get('TERM') in ('dumb', 'emacs'): + return lambda text, title='': pipe_pager(plain(text), use_pager, title) + else: + return lambda text, title='': pipe_pager(text, use_pager, title) + if os.environ.get('TERM') in ('dumb', 'emacs'): + return plain_pager + if sys.platform == 'win32': + return lambda text, title='': tempfile_pager(plain(text), 'more <') + if hasattr(os, 'system') and os.system('(pager) 2>/dev/null') == 0: + return lambda text, title='': pipe_pager(text, 'pager', title) + if hasattr(os, 'system') and os.system('(less) 2>/dev/null') == 0: + return lambda text, title='': pipe_pager(text, 'less', title) + + import tempfile + (fd, filename) = tempfile.mkstemp() + os.close(fd) + try: + if hasattr(os, 'system') and os.system('more "%s"' % filename) == 0: + return lambda text, title='': pipe_pager(text, 'more', title) + else: + return tty_pager + finally: + os.unlink(filename) + + +def escape_stdout(text: str) -> str: + # Escape non-encodable characters to avoid encoding errors later + encoding = getattr(sys.stdout, 'encoding', None) or 'utf-8' + return text.encode(encoding, 'backslashreplace').decode(encoding) + + +def escape_less(s: str) -> str: + return re.sub(r'([?:.%\\])', r'\\\1', s) + + +def plain(text: str) -> str: + """Remove boldface formatting from text.""" + return re.sub('.\b', '', text) + + +def tty_pager(text: str, title: str = '') -> None: + """Page through text on a text terminal.""" + lines = plain(escape_stdout(text)).split('\n') + has_tty = False + try: + import tty + import termios + fd = sys.stdin.fileno() + old = termios.tcgetattr(fd) + tty.setcbreak(fd) + has_tty = True + + def getchar() -> str: + return sys.stdin.read(1) + + except (ImportError, AttributeError, io.UnsupportedOperation): + def getchar() -> str: + return sys.stdin.readline()[:-1][:1] + + try: + try: + h = int(os.environ.get('LINES', 0)) + except ValueError: + h = 0 + if h <= 1: + h = 25 + r = inc = h - 1 + sys.stdout.write('\n'.join(lines[:inc]) + '\n') + while lines[r:]: + sys.stdout.write('-- more --') + sys.stdout.flush() + c = getchar() + + if c in ('q', 'Q'): + sys.stdout.write('\r \r') + break + elif c in ('\r', '\n'): + sys.stdout.write('\r \r' + lines[r] + '\n') + r = r + 1 + continue + if c in ('b', 'B', '\x1b'): + r = r - inc - inc + if r < 0: r = 0 + sys.stdout.write('\n' + '\n'.join(lines[r:r+inc]) + '\n') + r = r + inc + + finally: + if has_tty: + termios.tcsetattr(fd, termios.TCSAFLUSH, old) + + +def plain_pager(text: str, title: str = '') -> None: + """Simply print unformatted text. This is the ultimate fallback.""" + sys.stdout.write(plain(escape_stdout(text))) + + +def pipe_pager(text: str, cmd: str, title: str = '') -> None: + """Page through text by feeding it to another program.""" + import subprocess + env = os.environ.copy() + if title: + title += ' ' + esc_title = escape_less(title) + prompt_string = ( + f' {esc_title}' + + '?ltline %lt?L/%L.' + ':byte %bB?s/%s.' + '.' + '?e (END):?pB %pB\\%..' + ' (press h for help or q to quit)') + env['LESS'] = '-RmPm{0}$PM{0}$'.format(prompt_string) + proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, + errors='backslashreplace', env=env) + assert proc.stdin is not None + try: + with proc.stdin as pipe: + try: + pipe.write(text) + except KeyboardInterrupt: + # We've hereby abandoned whatever text hasn't been written, + # but the pager is still in control of the terminal. + pass + except OSError: + pass # Ignore broken pipes caused by quitting the pager program. + while True: + try: + proc.wait() + break + except KeyboardInterrupt: + # Ignore ctl-c like the pager itself does. Otherwise the pager is + # left running and the terminal is in raw mode and unusable. + pass + + +def tempfile_pager(text: str, cmd: str, title: str = '') -> None: + """Page through text by invoking a program on a temporary file.""" + import tempfile + with tempfile.TemporaryDirectory() as tempdir: + filename = os.path.join(tempdir, 'pydoc.out') + with open(filename, 'w', errors='backslashreplace', + encoding=os.device_encoding(0) if + sys.platform == 'win32' else None + ) as file: + file.write(text) + os.system(cmd + ' "' + filename + '"') diff --git a/Lib/_pyrepl/reader.py b/Lib/_pyrepl/reader.py new file mode 100644 index 00000000000..f0116e742d2 --- /dev/null +++ b/Lib/_pyrepl/reader.py @@ -0,0 +1,773 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Antonio Cuni +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +import sys +import _colorize + +from contextlib import contextmanager +from dataclasses import dataclass, field, fields + +from . import commands, console, input +from .utils import wlen, unbracket, disp_str, gen_colors, THEME +from .trace import trace + + +# types +Command = commands.Command +from .types import Callback, SimpleContextManager, KeySpec, CommandName + + +# syntax classes +SYNTAX_WHITESPACE, SYNTAX_WORD, SYNTAX_SYMBOL = range(3) + + +def make_default_syntax_table() -> dict[str, int]: + # XXX perhaps should use some unicodedata here? + st: dict[str, int] = {} + for c in map(chr, range(256)): + st[c] = SYNTAX_SYMBOL + for c in [a for a in map(chr, range(256)) if a.isalnum()]: + st[c] = SYNTAX_WORD + st["\n"] = st[" "] = SYNTAX_WHITESPACE + return st + + +def make_default_commands() -> dict[CommandName, type[Command]]: + result: dict[CommandName, type[Command]] = {} + for v in vars(commands).values(): + if isinstance(v, type) and issubclass(v, Command) and v.__name__[0].islower(): + result[v.__name__] = v + result[v.__name__.replace("_", "-")] = v + return result + + +default_keymap: tuple[tuple[KeySpec, CommandName], ...] = tuple( + [ + (r"\C-a", "beginning-of-line"), + (r"\C-b", "left"), + (r"\C-c", "interrupt"), + (r"\C-d", "delete"), + (r"\C-e", "end-of-line"), + (r"\C-f", "right"), + (r"\C-g", "cancel"), + (r"\C-h", "backspace"), + (r"\C-j", "accept"), + (r"\", "accept"), + (r"\C-k", "kill-line"), + (r"\C-l", "clear-screen"), + (r"\C-m", "accept"), + (r"\C-t", "transpose-characters"), + (r"\C-u", "unix-line-discard"), + (r"\C-w", "unix-word-rubout"), + (r"\C-x\C-u", "upcase-region"), + (r"\C-y", "yank"), + *(() if sys.platform == "win32" else ((r"\C-z", "suspend"), )), + (r"\M-b", "backward-word"), + (r"\M-c", "capitalize-word"), + (r"\M-d", "kill-word"), + (r"\M-f", "forward-word"), + (r"\M-l", "downcase-word"), + (r"\M-t", "transpose-words"), + (r"\M-u", "upcase-word"), + (r"\M-y", "yank-pop"), + (r"\M--", "digit-arg"), + (r"\M-0", "digit-arg"), + (r"\M-1", "digit-arg"), + (r"\M-2", "digit-arg"), + (r"\M-3", "digit-arg"), + (r"\M-4", "digit-arg"), + (r"\M-5", "digit-arg"), + (r"\M-6", "digit-arg"), + (r"\M-7", "digit-arg"), + (r"\M-8", "digit-arg"), + (r"\M-9", "digit-arg"), + (r"\M-\n", "accept"), + ("\\\\", "self-insert"), + (r"\x1b[200~", "perform-bracketed-paste"), + (r"\x03", "ctrl-c"), + ] + + [(c, "self-insert") for c in map(chr, range(32, 127)) if c != "\\"] + + [(c, "self-insert") for c in map(chr, range(128, 256)) if c.isalpha()] + + [ + (r"\", "up"), + (r"\", "down"), + (r"\", "left"), + (r"\C-\", "backward-word"), + (r"\", "right"), + (r"\C-\", "forward-word"), + (r"\", "delete"), + (r"\x1b[3~", "delete"), + (r"\", "backspace"), + (r"\M-\", "backward-kill-word"), + (r"\", "end-of-line"), # was 'end' + (r"\", "beginning-of-line"), # was 'home' + (r"\", "help"), + (r"\", "show-history"), + (r"\", "paste-mode"), + (r"\EOF", "end"), # the entries in the terminfo database for xterms + (r"\EOH", "home"), # seem to be wrong. this is a less than ideal + # workaround + ] +) + + +@dataclass(slots=True) +class Reader: + """The Reader class implements the bare bones of a command reader, + handling such details as editing and cursor motion. What it does + not support are such things as completion or history support - + these are implemented elsewhere. + + Instance variables of note include: + + * buffer: + A per-character list containing all the characters that have been + entered. Does not include color information. + * console: + Hopefully encapsulates the OS dependent stuff. + * pos: + A 0-based index into 'buffer' for where the insertion point + is. + * screeninfo: + A list of screen position tuples. Each list element is a tuple + representing information on visible line length for a given line. + Allows for efficient skipping of color escape sequences. + * cxy, lxy: + the position of the insertion point in screen ... + * syntax_table: + Dictionary mapping characters to 'syntax class'; read the + emacs docs to see what this means :-) + * commands: + Dictionary mapping command names to command classes. + * arg: + The emacs-style prefix argument. It will be None if no such + argument has been provided. + * dirty: + True if we need to refresh the display. + * kill_ring: + The emacs-style kill-ring; manipulated with yank & yank-pop + * ps1, ps2, ps3, ps4: + prompts. ps1 is the prompt for a one-line input; for a + multiline input it looks like: + ps2> first line of input goes here + ps3> second and further + ps3> lines get ps3 + ... + ps4> and the last one gets ps4 + As with the usual top-level, you can set these to instances if + you like; str() will be called on them (once) at the beginning + of each command. Don't put really long or newline containing + strings here, please! + This is just the default policy; you can change it freely by + overriding get_prompt() (and indeed some standard subclasses + do). + * finished: + handle1 will set this to a true value if a command signals + that we're done. + """ + + console: console.Console + + ## state + buffer: list[str] = field(default_factory=list) + pos: int = 0 + ps1: str = "->> " + ps2: str = "/>> " + ps3: str = "|.. " + ps4: str = R"\__ " + kill_ring: list[list[str]] = field(default_factory=list) + msg: str = "" + arg: int | None = None + dirty: bool = False + finished: bool = False + paste_mode: bool = False + commands: dict[str, type[Command]] = field(default_factory=make_default_commands) + last_command: type[Command] | None = None + syntax_table: dict[str, int] = field(default_factory=make_default_syntax_table) + keymap: tuple[tuple[str, str], ...] = () + input_trans: input.KeymapTranslator = field(init=False) + input_trans_stack: list[input.KeymapTranslator] = field(default_factory=list) + screen: list[str] = field(default_factory=list) + screeninfo: list[tuple[int, list[int]]] = field(init=False) + cxy: tuple[int, int] = field(init=False) + lxy: tuple[int, int] = field(init=False) + scheduled_commands: list[str] = field(default_factory=list) + can_colorize: bool = False + threading_hook: Callback | None = None + + ## cached metadata to speed up screen refreshes + @dataclass + class RefreshCache: + screen: list[str] = field(default_factory=list) + screeninfo: list[tuple[int, list[int]]] = field(init=False) + line_end_offsets: list[int] = field(default_factory=list) + pos: int = field(init=False) + cxy: tuple[int, int] = field(init=False) + dimensions: tuple[int, int] = field(init=False) + invalidated: bool = False + + def update_cache(self, + reader: Reader, + screen: list[str], + screeninfo: list[tuple[int, list[int]]], + ) -> None: + self.screen = screen.copy() + self.screeninfo = screeninfo.copy() + self.pos = reader.pos + self.cxy = reader.cxy + self.dimensions = reader.console.width, reader.console.height + self.invalidated = False + + def valid(self, reader: Reader) -> bool: + if self.invalidated: + return False + dimensions = reader.console.width, reader.console.height + dimensions_changed = dimensions != self.dimensions + return not dimensions_changed + + def get_cached_location(self, reader: Reader) -> tuple[int, int]: + if self.invalidated: + raise ValueError("Cache is invalidated") + offset = 0 + earliest_common_pos = min(reader.pos, self.pos) + num_common_lines = len(self.line_end_offsets) + while num_common_lines > 0: + offset = self.line_end_offsets[num_common_lines - 1] + if earliest_common_pos > offset: + break + num_common_lines -= 1 + else: + offset = 0 + return offset, num_common_lines + + last_refresh_cache: RefreshCache = field(default_factory=RefreshCache) + + def __post_init__(self) -> None: + # Enable the use of `insert` without a `prepare` call - necessary to + # facilitate the tab completion hack implemented for + # . + self.keymap = self.collect_keymap() + self.input_trans = input.KeymapTranslator( + self.keymap, invalid_cls="invalid-key", character_cls="self-insert" + ) + self.screeninfo = [(0, [])] + self.cxy = self.pos2xy() + self.lxy = (self.pos, 0) + self.can_colorize = _colorize.can_colorize() + + self.last_refresh_cache.screeninfo = self.screeninfo + self.last_refresh_cache.pos = self.pos + self.last_refresh_cache.cxy = self.cxy + self.last_refresh_cache.dimensions = (0, 0) + + def collect_keymap(self) -> tuple[tuple[KeySpec, CommandName], ...]: + return default_keymap + + def calc_screen(self) -> list[str]: + """Translate changes in self.buffer into changes in self.console.screen.""" + # Since the last call to calc_screen: + # screen and screeninfo may differ due to a completion menu being shown + # pos and cxy may differ due to edits, cursor movements, or completion menus + + # Lines that are above both the old and new cursor position can't have changed, + # unless the terminal has been resized (which might cause reflowing) or we've + # entered or left paste mode (which changes prompts, causing reflowing). + num_common_lines = 0 + offset = 0 + if self.last_refresh_cache.valid(self): + offset, num_common_lines = self.last_refresh_cache.get_cached_location(self) + + screen = self.last_refresh_cache.screen + del screen[num_common_lines:] + + screeninfo = self.last_refresh_cache.screeninfo + del screeninfo[num_common_lines:] + + last_refresh_line_end_offsets = self.last_refresh_cache.line_end_offsets + del last_refresh_line_end_offsets[num_common_lines:] + + pos = self.pos + pos -= offset + + prompt_from_cache = (offset and self.buffer[offset - 1] != "\n") + + if self.can_colorize: + colors = list(gen_colors(self.get_unicode())) + else: + colors = None + trace("colors = {colors}", colors=colors) + lines = "".join(self.buffer[offset:]).split("\n") + cursor_found = False + lines_beyond_cursor = 0 + for ln, line in enumerate(lines, num_common_lines): + line_len = len(line) + if 0 <= pos <= line_len: + self.lxy = pos, ln + cursor_found = True + elif cursor_found: + lines_beyond_cursor += 1 + if lines_beyond_cursor > self.console.height: + # No need to keep formatting lines. + # The console can't show them. + break + if prompt_from_cache: + # Only the first line's prompt can come from the cache + prompt_from_cache = False + prompt = "" + else: + prompt = self.get_prompt(ln, line_len >= pos >= 0) + while "\n" in prompt: + pre_prompt, _, prompt = prompt.partition("\n") + last_refresh_line_end_offsets.append(offset) + screen.append(pre_prompt) + screeninfo.append((0, [])) + pos -= line_len + 1 + prompt, prompt_len = self.process_prompt(prompt) + chars, char_widths = disp_str(line, colors, offset) + wrapcount = (sum(char_widths) + prompt_len) // self.console.width + if wrapcount == 0 or not char_widths: + offset += line_len + 1 # Takes all of the line plus the newline + last_refresh_line_end_offsets.append(offset) + screen.append(prompt + "".join(chars)) + screeninfo.append((prompt_len, char_widths)) + else: + pre = prompt + prelen = prompt_len + for wrap in range(wrapcount + 1): + index_to_wrap_before = 0 + column = 0 + for char_width in char_widths: + if column + char_width + prelen >= self.console.width: + break + index_to_wrap_before += 1 + column += char_width + if len(chars) > index_to_wrap_before: + offset += index_to_wrap_before + post = "\\" + after = [1] + else: + offset += index_to_wrap_before + 1 # Takes the newline + post = "" + after = [] + last_refresh_line_end_offsets.append(offset) + render = pre + "".join(chars[:index_to_wrap_before]) + post + render_widths = char_widths[:index_to_wrap_before] + after + screen.append(render) + screeninfo.append((prelen, render_widths)) + chars = chars[index_to_wrap_before:] + char_widths = char_widths[index_to_wrap_before:] + pre = "" + prelen = 0 + self.screeninfo = screeninfo + self.cxy = self.pos2xy() + if self.msg: + for mline in self.msg.split("\n"): + screen.append(mline) + screeninfo.append((0, [])) + + self.last_refresh_cache.update_cache(self, screen, screeninfo) + return screen + + @staticmethod + def process_prompt(prompt: str) -> tuple[str, int]: + r"""Return a tuple with the prompt string and its visible length. + + The prompt string has the zero-width brackets recognized by shells + (\x01 and \x02) removed. The length ignores anything between those + brackets as well as any ANSI escape sequences. + """ + out_prompt = unbracket(prompt, including_content=False) + visible_prompt = unbracket(prompt, including_content=True) + return out_prompt, wlen(visible_prompt) + + def bow(self, p: int | None = None) -> int: + """Return the 0-based index of the word break preceding p most + immediately. + + p defaults to self.pos; word boundaries are determined using + self.syntax_table.""" + if p is None: + p = self.pos + st = self.syntax_table + b = self.buffer + p -= 1 + while p >= 0 and st.get(b[p], SYNTAX_WORD) != SYNTAX_WORD: + p -= 1 + while p >= 0 and st.get(b[p], SYNTAX_WORD) == SYNTAX_WORD: + p -= 1 + return p + 1 + + def eow(self, p: int | None = None) -> int: + """Return the 0-based index of the word break following p most + immediately. + + p defaults to self.pos; word boundaries are determined using + self.syntax_table.""" + if p is None: + p = self.pos + st = self.syntax_table + b = self.buffer + while p < len(b) and st.get(b[p], SYNTAX_WORD) != SYNTAX_WORD: + p += 1 + while p < len(b) and st.get(b[p], SYNTAX_WORD) == SYNTAX_WORD: + p += 1 + return p + + def bol(self, p: int | None = None) -> int: + """Return the 0-based index of the line break preceding p most + immediately. + + p defaults to self.pos.""" + if p is None: + p = self.pos + b = self.buffer + p -= 1 + while p >= 0 and b[p] != "\n": + p -= 1 + return p + 1 + + def eol(self, p: int | None = None) -> int: + """Return the 0-based index of the line break following p most + immediately. + + p defaults to self.pos.""" + if p is None: + p = self.pos + b = self.buffer + while p < len(b) and b[p] != "\n": + p += 1 + return p + + def max_column(self, y: int) -> int: + """Return the last x-offset for line y""" + return self.screeninfo[y][0] + sum(self.screeninfo[y][1]) + + def max_row(self) -> int: + return len(self.screeninfo) - 1 + + def get_arg(self, default: int = 1) -> int: + """Return any prefix argument that the user has supplied, + returning 'default' if there is None. Defaults to 1. + """ + if self.arg is None: + return default + return self.arg + + def get_prompt(self, lineno: int, cursor_on_line: bool) -> str: + """Return what should be in the left-hand margin for line + 'lineno'.""" + if self.arg is not None and cursor_on_line: + prompt = f"(arg: {self.arg}) " + elif self.paste_mode: + prompt = "(paste) " + elif "\n" in self.buffer: + if lineno == 0: + prompt = self.ps2 + elif self.ps4 and lineno == self.buffer.count("\n"): + prompt = self.ps4 + else: + prompt = self.ps3 + else: + prompt = self.ps1 + + if self.can_colorize: + t = THEME() + prompt = f"{t.prompt}{prompt}{t.reset}" + return prompt + + def push_input_trans(self, itrans: input.KeymapTranslator) -> None: + self.input_trans_stack.append(self.input_trans) + self.input_trans = itrans + + def pop_input_trans(self) -> None: + self.input_trans = self.input_trans_stack.pop() + + def setpos_from_xy(self, x: int, y: int) -> None: + """Set pos according to coordinates x, y""" + pos = 0 + i = 0 + while i < y: + prompt_len, char_widths = self.screeninfo[i] + offset = len(char_widths) + in_wrapped_line = prompt_len + sum(char_widths) >= self.console.width + if in_wrapped_line: + pos += offset - 1 # -1 cause backslash is not in buffer + else: + pos += offset + 1 # +1 cause newline is in buffer + i += 1 + + j = 0 + cur_x = self.screeninfo[i][0] + while cur_x < x: + if self.screeninfo[i][1][j] == 0: + j += 1 # prevent potential future infinite loop + continue + cur_x += self.screeninfo[i][1][j] + j += 1 + pos += 1 + + self.pos = pos + + def pos2xy(self) -> tuple[int, int]: + """Return the x, y coordinates of position 'pos'.""" + + prompt_len, y = 0, 0 + char_widths: list[int] = [] + pos = self.pos + assert 0 <= pos <= len(self.buffer) + + # optimize for the common case: typing at the end of the buffer + if pos == len(self.buffer) and len(self.screeninfo) > 0: + y = len(self.screeninfo) - 1 + prompt_len, char_widths = self.screeninfo[y] + return prompt_len + sum(char_widths), y + + for prompt_len, char_widths in self.screeninfo: + offset = len(char_widths) + in_wrapped_line = prompt_len + sum(char_widths) >= self.console.width + if in_wrapped_line: + offset -= 1 # need to remove line-wrapping backslash + + if offset >= pos: + break + + if not in_wrapped_line: + offset += 1 # there's a newline in buffer + + pos -= offset + y += 1 + return prompt_len + sum(char_widths[:pos]), y + + def insert(self, text: str | list[str]) -> None: + """Insert 'text' at the insertion point.""" + self.buffer[self.pos : self.pos] = list(text) + self.pos += len(text) + self.dirty = True + + def update_cursor(self) -> None: + """Move the cursor to reflect changes in self.pos""" + self.cxy = self.pos2xy() + trace("update_cursor({pos}) = {cxy}", pos=self.pos, cxy=self.cxy) + self.console.move_cursor(*self.cxy) + + def after_command(self, cmd: Command) -> None: + """This function is called to allow post command cleanup.""" + if getattr(cmd, "kills_digit_arg", True): + if self.arg is not None: + self.dirty = True + self.arg = None + + def prepare(self) -> None: + """Get ready to run. Call restore when finished. You must not + write to the console in between the calls to prepare and + restore.""" + try: + self.console.prepare() + self.arg = None + self.finished = False + del self.buffer[:] + self.pos = 0 + self.dirty = True + self.last_command = None + self.calc_screen() + except BaseException: + self.restore() + raise + + while self.scheduled_commands: + cmd = self.scheduled_commands.pop() + self.do_cmd((cmd, [])) + + def last_command_is(self, cls: type) -> bool: + if not self.last_command: + return False + return issubclass(cls, self.last_command) + + def restore(self) -> None: + """Clean up after a run.""" + self.console.restore() + + @contextmanager + def suspend(self) -> SimpleContextManager: + """A context manager to delegate to another reader.""" + prev_state = {f.name: getattr(self, f.name) for f in fields(self)} + try: + self.restore() + yield + finally: + for arg in ("msg", "ps1", "ps2", "ps3", "ps4", "paste_mode"): + setattr(self, arg, prev_state[arg]) + self.prepare() + + @contextmanager + def suspend_colorization(self) -> SimpleContextManager: + try: + old_can_colorize = self.can_colorize + self.can_colorize = False + yield + finally: + self.can_colorize = old_can_colorize + + + def finish(self) -> None: + """Called when a command signals that we're finished.""" + pass + + def error(self, msg: str = "none") -> None: + self.msg = "! " + msg + " " + self.dirty = True + self.console.beep() + + def update_screen(self) -> None: + if self.dirty: + self.refresh() + + def refresh(self) -> None: + """Recalculate and refresh the screen.""" + self.console.height, self.console.width = self.console.getheightwidth() + # this call sets up self.cxy, so call it first. + self.screen = self.calc_screen() + self.console.refresh(self.screen, self.cxy) + self.dirty = False + + def do_cmd(self, cmd: tuple[str, list[str]]) -> None: + """`cmd` is a tuple of "event_name" and "event", which in the current + implementation is always just the "buffer" which happens to be a list + of single-character strings.""" + + trace("received command {cmd}", cmd=cmd) + if isinstance(cmd[0], str): + command_type = self.commands.get(cmd[0], commands.invalid_command) + elif isinstance(cmd[0], type): + command_type = cmd[0] + else: + return # nothing to do + + command = command_type(self, *cmd) # type: ignore[arg-type] + command.do() + + self.after_command(command) + + if self.dirty: + self.refresh() + else: + self.update_cursor() + + if not isinstance(cmd, commands.digit_arg): + self.last_command = command_type + + self.finished = bool(command.finish) + if self.finished: + self.console.finish() + self.finish() + + def run_hooks(self) -> None: + threading_hook = self.threading_hook + if threading_hook is None and 'threading' in sys.modules: + from ._threading_handler import install_threading_hook + install_threading_hook(self) + if threading_hook is not None: + try: + threading_hook() + except Exception: + pass + + input_hook = self.console.input_hook + if input_hook: + try: + input_hook() + except Exception: + pass + + def handle1(self, block: bool = True) -> bool: + """Handle a single event. Wait as long as it takes if block + is true (the default), otherwise return False if no event is + pending.""" + + if self.msg: + self.msg = "" + self.dirty = True + + while True: + # We use the same timeout as in readline.c: 100ms + self.run_hooks() + self.console.wait(100) + event = self.console.get_event(block=False) + if not event: + if block: + continue + return False + + translate = True + + if event.evt == "key": + self.input_trans.push(event) + elif event.evt == "scroll": + self.refresh() + elif event.evt == "resize": + self.refresh() + else: + translate = False + + if translate: + cmd = self.input_trans.get() + else: + cmd = [event.evt, event.data] + + if cmd is None: + if block: + continue + return False + + self.do_cmd(cmd) + return True + + def push_char(self, char: int | bytes) -> None: + self.console.push_char(char) + self.handle1(block=False) + + def readline(self, startup_hook: Callback | None = None) -> str: + """Read a line. The implementation of this method also shows + how to drive Reader if you want more control over the event + loop.""" + self.prepare() + try: + if startup_hook is not None: + startup_hook() + self.refresh() + while not self.finished: + self.handle1() + return self.get_unicode() + + finally: + self.restore() + + def bind(self, spec: KeySpec, command: CommandName) -> None: + self.keymap = self.keymap + ((spec, command),) + self.input_trans = input.KeymapTranslator( + self.keymap, invalid_cls="invalid-key", character_cls="self-insert" + ) + + def get_unicode(self) -> str: + """Return the current buffer as a unicode string.""" + return "".join(self.buffer) diff --git a/Lib/_pyrepl/readline.py b/Lib/_pyrepl/readline.py new file mode 100644 index 00000000000..23b8fa6b9c7 --- /dev/null +++ b/Lib/_pyrepl/readline.py @@ -0,0 +1,620 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Alex Gaynor +# Antonio Cuni +# Armin Rigo +# Holger Krekel +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +"""A compatibility wrapper reimplementing the 'readline' standard module +on top of pyrepl. Not all functionalities are supported. Contains +extensions for multiline input. +""" + +from __future__ import annotations + +import warnings +from dataclasses import dataclass, field + +import os +from site import gethistoryfile +import sys +from rlcompleter import Completer as RLCompleter + +from . import commands, historical_reader +from .completing_reader import CompletingReader +from .console import Console as ConsoleType +from ._module_completer import ModuleCompleter, make_default_module_completer + +Console: type[ConsoleType] +_error: tuple[type[Exception], ...] | type[Exception] + +if os.name == "nt": + from .windows_console import WindowsConsole as Console, _error +else: + from .unix_console import UnixConsole as Console, _error + +ENCODING = sys.getdefaultencoding() or "latin1" + + +# types +Command = commands.Command +from collections.abc import Callable, Collection +from .types import Callback, Completer, KeySpec, CommandName + +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import Any, Mapping + + +MoreLinesCallable = Callable[[str], bool] + + +__all__ = [ + "add_history", + "clear_history", + "get_begidx", + "get_completer", + "get_completer_delims", + "get_current_history_length", + "get_endidx", + "get_history_item", + "get_history_length", + "get_line_buffer", + "insert_text", + "parse_and_bind", + "read_history_file", + # "read_init_file", + # "redisplay", + "remove_history_item", + "replace_history_item", + "set_auto_history", + "set_completer", + "set_completer_delims", + "set_history_length", + # "set_pre_input_hook", + "set_startup_hook", + "write_history_file", + "append_history_file", + # ---- multiline extensions ---- + "multiline_input", +] + +# ____________________________________________________________ + +@dataclass +class ReadlineConfig: + readline_completer: Completer | None = None + completer_delims: frozenset[str] = frozenset(" \t\n`~!@#$%^&*()-=+[{]}\\|;:'\",<>/?") + module_completer: ModuleCompleter = field(default_factory=make_default_module_completer) + +@dataclass(kw_only=True) +class ReadlineAlikeReader(historical_reader.HistoricalReader, CompletingReader): + # Class fields + assume_immutable_completions = False + use_brackets = False + sort_in_column = True + + # Instance fields + config: ReadlineConfig + more_lines: MoreLinesCallable | None = None + last_used_indentation: str | None = None + + def __post_init__(self) -> None: + super().__post_init__() + self.commands["maybe_accept"] = maybe_accept + self.commands["maybe-accept"] = maybe_accept + self.commands["backspace_dedent"] = backspace_dedent + self.commands["backspace-dedent"] = backspace_dedent + + def error(self, msg: str = "none") -> None: + pass # don't show error messages by default + + def get_stem(self) -> str: + b = self.buffer + p = self.pos - 1 + completer_delims = self.config.completer_delims + while p >= 0 and b[p] not in completer_delims: + p -= 1 + return "".join(b[p + 1 : self.pos]) + + def get_completions(self, stem: str) -> list[str]: + module_completions = self.get_module_completions() + if module_completions is not None: + return module_completions + if len(stem) == 0 and self.more_lines is not None: + b = self.buffer + p = self.pos + while p > 0 and b[p - 1] != "\n": + p -= 1 + num_spaces = 4 - ((self.pos - p) % 4) + return [" " * num_spaces] + result = [] + function = self.config.readline_completer + if function is not None: + try: + stem = str(stem) # rlcompleter.py seems to not like unicode + except UnicodeEncodeError: + pass # but feed unicode anyway if we have no choice + state = 0 + while True: + try: + next = function(stem, state) + except Exception: + break + if not isinstance(next, str): + break + result.append(next) + state += 1 + # emulate the behavior of the standard readline that sorts + # the completions before displaying them. + result.sort() + return result + + def get_module_completions(self) -> list[str] | None: + line = self.get_line() + return self.config.module_completer.get_completions(line) + + def get_trimmed_history(self, maxlength: int) -> list[str]: + if maxlength >= 0: + cut = len(self.history) - maxlength + if cut < 0: + cut = 0 + else: + cut = 0 + return self.history[cut:] + + def update_last_used_indentation(self) -> None: + indentation = _get_first_indentation(self.buffer) + if indentation is not None: + self.last_used_indentation = indentation + + # --- simplified support for reading multiline Python statements --- + + def collect_keymap(self) -> tuple[tuple[KeySpec, CommandName], ...]: + return super().collect_keymap() + ( + (r"\n", "maybe-accept"), + (r"\", "backspace-dedent"), + ) + + def after_command(self, cmd: Command) -> None: + super().after_command(cmd) + if self.more_lines is None: + # Force single-line input if we are in raw_input() mode. + # Although there is no direct way to add a \n in this mode, + # multiline buffers can still show up using various + # commands, e.g. navigating the history. + try: + index = self.buffer.index("\n") + except ValueError: + pass + else: + self.buffer = self.buffer[:index] + if self.pos > len(self.buffer): + self.pos = len(self.buffer) + + +def set_auto_history(_should_auto_add_history: bool) -> None: + """Enable or disable automatic history""" + historical_reader.should_auto_add_history = bool(_should_auto_add_history) + + +def _get_this_line_indent(buffer: list[str], pos: int) -> int: + indent = 0 + while pos > 0 and buffer[pos - 1] in " \t": + indent += 1 + pos -= 1 + if pos > 0 and buffer[pos - 1] == "\n": + return indent + return 0 + + +def _get_previous_line_indent(buffer: list[str], pos: int) -> tuple[int, int | None]: + prevlinestart = pos + while prevlinestart > 0 and buffer[prevlinestart - 1] != "\n": + prevlinestart -= 1 + prevlinetext = prevlinestart + while prevlinetext < pos and buffer[prevlinetext] in " \t": + prevlinetext += 1 + if prevlinetext == pos: + indent = None + else: + indent = prevlinetext - prevlinestart + return prevlinestart, indent + + +def _get_first_indentation(buffer: list[str]) -> str | None: + indented_line_start = None + for i in range(len(buffer)): + if (i < len(buffer) - 1 + and buffer[i] == "\n" + and buffer[i + 1] in " \t" + ): + indented_line_start = i + 1 + elif indented_line_start is not None and buffer[i] not in " \t\n": + return ''.join(buffer[indented_line_start : i]) + return None + + +def _should_auto_indent(buffer: list[str], pos: int) -> bool: + # check if last character before "pos" is a colon, ignoring + # whitespaces and comments. + last_char = None + while pos > 0: + pos -= 1 + if last_char is None: + if buffer[pos] not in " \t\n#": # ignore whitespaces and comments + last_char = buffer[pos] + else: + # even if we found a non-whitespace character before + # original pos, we keep going back until newline is reached + # to make sure we ignore comments + if buffer[pos] == "\n": + break + if buffer[pos] == "#": + last_char = None + return last_char == ":" + + +class maybe_accept(commands.Command): + def do(self) -> None: + r: ReadlineAlikeReader + r = self.reader # type: ignore[assignment] + r.dirty = True # this is needed to hide the completion menu, if visible + + # if there are already several lines and the cursor + # is not on the last one, always insert a new \n. + text = r.get_unicode() + + if "\n" in r.buffer[r.pos :] or ( + r.more_lines is not None and r.more_lines(text) + ): + def _newline_before_pos(): + before_idx = r.pos - 1 + while before_idx > 0 and text[before_idx].isspace(): + before_idx -= 1 + return text[before_idx : r.pos].count("\n") > 0 + + # if there's already a new line before the cursor then + # even if the cursor is followed by whitespace, we assume + # the user is trying to terminate the block + if _newline_before_pos() and text[r.pos:].isspace(): + self.finish = True + return + + # auto-indent the next line like the previous line + prevlinestart, indent = _get_previous_line_indent(r.buffer, r.pos) + r.insert("\n") + if not self.reader.paste_mode: + if indent: + for i in range(prevlinestart, prevlinestart + indent): + r.insert(r.buffer[i]) + r.update_last_used_indentation() + if _should_auto_indent(r.buffer, r.pos): + if r.last_used_indentation is not None: + indentation = r.last_used_indentation + else: + # default + indentation = " " * 4 + r.insert(indentation) + elif not self.reader.paste_mode: + self.finish = True + else: + r.insert("\n") + + +class backspace_dedent(commands.Command): + def do(self) -> None: + r = self.reader + b = r.buffer + if r.pos > 0: + repeat = 1 + if b[r.pos - 1] != "\n": + indent = _get_this_line_indent(b, r.pos) + if indent > 0: + ls = r.pos - indent + while ls > 0: + ls, pi = _get_previous_line_indent(b, ls - 1) + if pi is not None and pi < indent: + repeat = indent - pi + break + r.pos -= repeat + del b[r.pos : r.pos + repeat] + r.dirty = True + else: + self.reader.error("can't backspace at start") + + +# ____________________________________________________________ + + +@dataclass(slots=True) +class _ReadlineWrapper: + f_in: int = -1 + f_out: int = -1 + reader: ReadlineAlikeReader | None = field(default=None, repr=False) + saved_history_length: int = -1 + startup_hook: Callback | None = None + config: ReadlineConfig = field(default_factory=ReadlineConfig, repr=False) + + def __post_init__(self) -> None: + if self.f_in == -1: + self.f_in = os.dup(0) + if self.f_out == -1: + self.f_out = os.dup(1) + + def get_reader(self) -> ReadlineAlikeReader: + if self.reader is None: + console = Console(self.f_in, self.f_out, encoding=ENCODING) + self.reader = ReadlineAlikeReader(console=console, config=self.config) + return self.reader + + def input(self, prompt: object = "") -> str: + try: + reader = self.get_reader() + except _error: + assert raw_input is not None + return raw_input(prompt) + prompt_str = str(prompt) + reader.ps1 = prompt_str + sys.audit("builtins.input", prompt_str) + result = reader.readline(startup_hook=self.startup_hook) + sys.audit("builtins.input/result", result) + return result + + def multiline_input(self, more_lines: MoreLinesCallable, ps1: str, ps2: str) -> str: + """Read an input on possibly multiple lines, asking for more + lines as long as 'more_lines(unicodetext)' returns an object whose + boolean value is true. + """ + reader = self.get_reader() + saved = reader.more_lines + try: + reader.more_lines = more_lines + reader.ps1 = ps1 + reader.ps2 = ps1 + reader.ps3 = ps2 + reader.ps4 = "" + with warnings.catch_warnings(action="ignore"): + return reader.readline() + finally: + reader.more_lines = saved + reader.paste_mode = False + + def parse_and_bind(self, string: str) -> None: + pass # XXX we don't support parsing GNU-readline-style init files + + def set_completer(self, function: Completer | None = None) -> None: + self.config.readline_completer = function + + def get_completer(self) -> Completer | None: + return self.config.readline_completer + + def set_completer_delims(self, delimiters: Collection[str]) -> None: + self.config.completer_delims = frozenset(delimiters) + + def get_completer_delims(self) -> str: + return "".join(sorted(self.config.completer_delims)) + + def _histline(self, line: str) -> str: + line = line.rstrip("\n") + return line + + def get_history_length(self) -> int: + return self.saved_history_length + + def set_history_length(self, length: int) -> None: + self.saved_history_length = length + + def get_current_history_length(self) -> int: + return len(self.get_reader().history) + + def read_history_file(self, filename: str = gethistoryfile()) -> None: + # multiline extension (really a hack) for the end of lines that + # are actually continuations inside a single multiline_input() + # history item: we use \r\n instead of just \n. If the history + # file is passed to GNU readline, the extra \r are just ignored. + history = self.get_reader().history + + with open(os.path.expanduser(filename), 'rb') as f: + is_editline = f.readline().startswith(b"_HiStOrY_V2_") + if is_editline: + encoding = "unicode-escape" + else: + f.seek(0) + encoding = "utf-8" + + lines = [line.decode(encoding, errors='replace') for line in f.read().split(b'\n')] + buffer = [] + for line in lines: + if line.endswith("\r"): + buffer.append(line+'\n') + else: + line = self._histline(line) + if buffer: + line = self._histline("".join(buffer).replace("\r", "") + line) + del buffer[:] + if line: + history.append(line) + self.set_history_length(self.get_current_history_length()) + + def write_history_file(self, filename: str = gethistoryfile()) -> None: + maxlength = self.saved_history_length + history = self.get_reader().get_trimmed_history(maxlength) + f = open(os.path.expanduser(filename), "w", + encoding="utf-8", newline="\n") + with f: + for entry in history: + entry = entry.replace("\n", "\r\n") # multiline history support + f.write(entry + "\n") + + def append_history_file(self, filename: str = gethistoryfile()) -> None: + reader = self.get_reader() + saved_length = self.get_history_length() + length = self.get_current_history_length() - saved_length + history = reader.get_trimmed_history(length) + f = open(os.path.expanduser(filename), "a", + encoding="utf-8", newline="\n") + with f: + for entry in history: + entry = entry.replace("\n", "\r\n") # multiline history support + f.write(entry + "\n") + self.set_history_length(saved_length + length) + + def clear_history(self) -> None: + del self.get_reader().history[:] + + def get_history_item(self, index: int) -> str | None: + history = self.get_reader().history + if 1 <= index <= len(history): + return history[index - 1] + else: + return None # like readline.c + + def remove_history_item(self, index: int) -> None: + history = self.get_reader().history + if 0 <= index < len(history): + del history[index] + else: + raise ValueError("No history item at position %d" % index) + # like readline.c + + def replace_history_item(self, index: int, line: str) -> None: + history = self.get_reader().history + if 0 <= index < len(history): + history[index] = self._histline(line) + else: + raise ValueError("No history item at position %d" % index) + # like readline.c + + def add_history(self, line: str) -> None: + self.get_reader().history.append(self._histline(line)) + + def set_startup_hook(self, function: Callback | None = None) -> None: + self.startup_hook = function + + def get_line_buffer(self) -> str: + return self.get_reader().get_unicode() + + def _get_idxs(self) -> tuple[int, int]: + start = cursor = self.get_reader().pos + buf = self.get_line_buffer() + for i in range(cursor - 1, -1, -1): + if buf[i] in self.get_completer_delims(): + break + start = i + return start, cursor + + def get_begidx(self) -> int: + return self._get_idxs()[0] + + def get_endidx(self) -> int: + return self._get_idxs()[1] + + def insert_text(self, text: str) -> None: + self.get_reader().insert(text) + + +_wrapper = _ReadlineWrapper() + +# ____________________________________________________________ +# Public API + +parse_and_bind = _wrapper.parse_and_bind +set_completer = _wrapper.set_completer +get_completer = _wrapper.get_completer +set_completer_delims = _wrapper.set_completer_delims +get_completer_delims = _wrapper.get_completer_delims +get_history_length = _wrapper.get_history_length +set_history_length = _wrapper.set_history_length +get_current_history_length = _wrapper.get_current_history_length +read_history_file = _wrapper.read_history_file +write_history_file = _wrapper.write_history_file +append_history_file = _wrapper.append_history_file +clear_history = _wrapper.clear_history +get_history_item = _wrapper.get_history_item +remove_history_item = _wrapper.remove_history_item +replace_history_item = _wrapper.replace_history_item +add_history = _wrapper.add_history +set_startup_hook = _wrapper.set_startup_hook +get_line_buffer = _wrapper.get_line_buffer +get_begidx = _wrapper.get_begidx +get_endidx = _wrapper.get_endidx +insert_text = _wrapper.insert_text + +# Extension +multiline_input = _wrapper.multiline_input + +# Internal hook +_get_reader = _wrapper.get_reader + +# ____________________________________________________________ +# Stubs + + +def _make_stub(_name: str, _ret: object) -> None: + def stub(*args: object, **kwds: object) -> None: + import warnings + + warnings.warn("readline.%s() not implemented" % _name, stacklevel=2) + + stub.__name__ = _name + globals()[_name] = stub + + +for _name, _ret in [ + ("read_init_file", None), + ("redisplay", None), + ("set_pre_input_hook", None), +]: + assert _name not in globals(), _name + _make_stub(_name, _ret) + +# ____________________________________________________________ + + +def _setup(namespace: Mapping[str, Any]) -> None: + global raw_input + if raw_input is not None: + return # don't run _setup twice + + try: + f_in = sys.stdin.fileno() + f_out = sys.stdout.fileno() + except (AttributeError, ValueError): + return + if not os.isatty(f_in) or not os.isatty(f_out): + return + + _wrapper.f_in = f_in + _wrapper.f_out = f_out + + # set up namespace in rlcompleter, which requires it to be a bona fide dict + if not isinstance(namespace, dict): + namespace = dict(namespace) + _wrapper.config.module_completer = ModuleCompleter(namespace) + _wrapper.config.readline_completer = RLCompleter(namespace).complete + + # this is not really what readline.c does. Better than nothing I guess + import builtins + raw_input = builtins.input + builtins.input = _wrapper.input + + +raw_input: Callable[[object], str] | None = None diff --git a/Lib/_pyrepl/simple_interact.py b/Lib/_pyrepl/simple_interact.py new file mode 100644 index 00000000000..6508f0233b9 --- /dev/null +++ b/Lib/_pyrepl/simple_interact.py @@ -0,0 +1,181 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +"""This is an alternative to python_reader which tries to emulate +the CPython prompt as closely as possible, with the exception of +allowing multiline input and multiline history entries. +""" + +from __future__ import annotations + +import _sitebuiltins +import functools +import os +import sys +import code +import warnings +import errno + +from .readline import _get_reader, multiline_input, append_history_file + + +_error: tuple[type[Exception], ...] | type[Exception] +try: + from .unix_console import _error +except ModuleNotFoundError: + from .windows_console import _error + +def check() -> str: + """Returns the error message if there is a problem initializing the state.""" + try: + _get_reader() + except _error as e: + if term := os.environ.get("TERM", ""): + term = f"; TERM={term}" + return str(str(e) or repr(e) or "unknown error") + term + return "" + + +def _strip_final_indent(text: str) -> str: + # kill spaces and tabs at the end, but only if they follow '\n'. + # meant to remove the auto-indentation only (although it would of + # course also remove explicitly-added indentation). + short = text.rstrip(" \t") + n = len(short) + if n > 0 and text[n - 1] == "\n": + return short + return text + + +def _clear_screen(): + reader = _get_reader() + reader.scheduled_commands.append("clear_screen") + + +REPL_COMMANDS = { + "exit": _sitebuiltins.Quitter('exit', ''), + "quit": _sitebuiltins.Quitter('quit' ,''), + "copyright": _sitebuiltins._Printer('copyright', sys.copyright), + "help": _sitebuiltins._Helper(), + "clear": _clear_screen, + "\x1a": _sitebuiltins.Quitter('\x1a', ''), +} + + +def _more_lines(console: code.InteractiveConsole, unicodetext: str) -> bool: + # ooh, look at the hack: + src = _strip_final_indent(unicodetext) + try: + code = console.compile(src, "", "single") + except (OverflowError, SyntaxError, ValueError): + lines = src.splitlines(keepends=True) + if len(lines) == 1: + return False + + last_line = lines[-1] + was_indented = last_line.startswith((" ", "\t")) + not_empty = last_line.strip() != "" + incomplete = not last_line.endswith("\n") + return (was_indented or not_empty) and incomplete + else: + return code is None + + +def run_multiline_interactive_console( + console: code.InteractiveConsole, + *, + future_flags: int = 0, +) -> None: + from .readline import _setup + _setup(console.locals) + if future_flags: + console.compile.compiler.flags |= future_flags + + more_lines = functools.partial(_more_lines, console) + input_n = 0 + + _is_x_showrefcount_set = sys._xoptions.get("showrefcount") + _is_pydebug_build = hasattr(sys, "gettotalrefcount") + show_ref_count = _is_x_showrefcount_set and _is_pydebug_build + + def maybe_run_command(statement: str) -> bool: + statement = statement.strip() + if statement in console.locals or statement not in REPL_COMMANDS: + return False + + reader = _get_reader() + reader.history.pop() # skip internal commands in history + command = REPL_COMMANDS[statement] + if callable(command): + # Make sure that history does not change because of commands + with reader.suspend_history(), reader.suspend_colorization(): + command() + return True + return False + + while True: + try: + try: + sys.stdout.flush() + except Exception: + pass + + ps1 = getattr(sys, "ps1", ">>> ") + ps2 = getattr(sys, "ps2", "... ") + try: + statement = multiline_input(more_lines, ps1, ps2) + except EOFError: + break + + if maybe_run_command(statement): + continue + + input_name = f"" + more = console.push(_strip_final_indent(statement), filename=input_name, _symbol="single") # type: ignore[call-arg] + assert not more + try: + append_history_file() + except (FileNotFoundError, PermissionError, OSError) as e: + warnings.warn(f"failed to open the history file for writing: {e}") + + input_n += 1 + except KeyboardInterrupt: + r = _get_reader() + r.cmpltn_reset() + if r.input_trans is r.isearch_trans: + r.do_cmd(("isearch-end", [""])) + r.pos = len(r.get_unicode()) + r.dirty = True + r.refresh() + console.write("\nKeyboardInterrupt\n") + console.resetbuffer() + except MemoryError: + console.write("\nMemoryError\n") + console.resetbuffer() + except SystemExit: + raise + except: + console.showtraceback() + console.resetbuffer() + if show_ref_count: + console.write( + f"[{sys.gettotalrefcount()} refs," + f" {sys.getallocatedblocks()} blocks]\n" + ) diff --git a/Lib/_pyrepl/terminfo.py b/Lib/_pyrepl/terminfo.py new file mode 100644 index 00000000000..d02ef69cce0 --- /dev/null +++ b/Lib/_pyrepl/terminfo.py @@ -0,0 +1,488 @@ +"""Pure Python curses-like terminal capability queries.""" + +from dataclasses import dataclass, field +import errno +import os +from pathlib import Path +import re +import struct + + +# Terminfo constants +MAGIC16 = 0o432 # Magic number for 16-bit terminfo format +MAGIC32 = 0o1036 # Magic number for 32-bit terminfo format + +# Special values for absent/cancelled capabilities +ABSENT_BOOLEAN = -1 +ABSENT_NUMERIC = -1 +CANCELLED_NUMERIC = -2 +ABSENT_STRING = None +CANCELLED_STRING = None + + +# Standard string capability names from ncurses Caps file +# This matches the order used by ncurses when compiling terminfo +# fmt: off +_STRING_NAMES: tuple[str, ...] = ( + "cbt", "bel", "cr", "csr", "tbc", "clear", "el", "ed", "hpa", "cmdch", + "cup", "cud1", "home", "civis", "cub1", "mrcup", "cnorm", "cuf1", "ll", + "cuu1", "cvvis", "dch1", "dl1", "dsl", "hd", "smacs", "blink", "bold", + "smcup", "smdc", "dim", "smir", "invis", "prot", "rev", "smso", "smul", + "ech", "rmacs", "sgr0", "rmcup", "rmdc", "rmir", "rmso", "rmul", "flash", + "ff", "fsl", "is1", "is2", "is3", "if", "ich1", "il1", "ip", "kbs", "ktbc", + "kclr", "kctab", "kdch1", "kdl1", "kcud1", "krmir", "kel", "ked", "kf0", + "kf1", "kf10", "kf2", "kf3", "kf4", "kf5", "kf6", "kf7", "kf8", "kf9", + "khome", "kich1", "kil1", "kcub1", "kll", "knp", "kpp", "kcuf1", "kind", + "kri", "khts", "kcuu1", "rmkx", "smkx", "lf0", "lf1", "lf10", "lf2", "lf3", + "lf4", "lf5", "lf6", "lf7", "lf8", "lf9", "rmm", "smm", "nel", "pad", "dch", + "dl", "cud", "ich", "indn", "il", "cub", "cuf", "rin", "cuu", "pfkey", + "pfloc", "pfx", "mc0", "mc4", "mc5", "rep", "rs1", "rs2", "rs3", "rf", "rc", + "vpa", "sc", "ind", "ri", "sgr", "hts", "wind", "ht", "tsl", "uc", "hu", + "iprog", "ka1", "ka3", "kb2", "kc1", "kc3", "mc5p", "rmp", "acsc", "pln", + "kcbt", "smxon", "rmxon", "smam", "rmam", "xonc", "xoffc", "enacs", "smln", + "rmln", "kbeg", "kcan", "kclo", "kcmd", "kcpy", "kcrt", "kend", "kent", + "kext", "kfnd", "khlp", "kmrk", "kmsg", "kmov", "knxt", "kopn", "kopt", + "kprv", "kprt", "krdo", "kref", "krfr", "krpl", "krst", "kres", "ksav", + "kspd", "kund", "kBEG", "kCAN", "kCMD", "kCPY", "kCRT", "kDC", "kDL", + "kslt", "kEND", "kEOL", "kEXT", "kFND", "kHLP", "kHOM", "kIC", "kLFT", + "kMSG", "kMOV", "kNXT", "kOPT", "kPRV", "kPRT", "kRDO", "kRPL", "kRIT", + "kRES", "kSAV", "kSPD", "kUND", "rfi", "kf11", "kf12", "kf13", "kf14", + "kf15", "kf16", "kf17", "kf18", "kf19", "kf20", "kf21", "kf22", "kf23", + "kf24", "kf25", "kf26", "kf27", "kf28", "kf29", "kf30", "kf31", "kf32", + "kf33", "kf34", "kf35", "kf36", "kf37", "kf38", "kf39", "kf40", "kf41", + "kf42", "kf43", "kf44", "kf45", "kf46", "kf47", "kf48", "kf49", "kf50", + "kf51", "kf52", "kf53", "kf54", "kf55", "kf56", "kf57", "kf58", "kf59", + "kf60", "kf61", "kf62", "kf63", "el1", "mgc", "smgl", "smgr", "fln", "sclk", + "dclk", "rmclk", "cwin", "wingo", "hup","dial", "qdial", "tone", "pulse", + "hook", "pause", "wait", "u0", "u1", "u2", "u3", "u4", "u5", "u6", "u7", + "u8", "u9", "op", "oc", "initc", "initp", "scp", "setf", "setb", "cpi", + "lpi", "chr", "cvr", "defc", "swidm", "sdrfq", "sitm", "slm", "smicm", + "snlq", "snrmq", "sshm", "ssubm", "ssupm", "sum", "rwidm", "ritm", "rlm", + "rmicm", "rshm", "rsubm", "rsupm", "rum", "mhpa", "mcud1", "mcub1", "mcuf1", + "mvpa", "mcuu1", "porder", "mcud", "mcub", "mcuf", "mcuu", "scs", "smgb", + "smgbp", "smglp", "smgrp", "smgt", "smgtp", "sbim", "scsd", "rbim", "rcsd", + "subcs", "supcs", "docr", "zerom", "csnm", "kmous", "minfo", "reqmp", + "getm", "setaf", "setab", "pfxl", "devt", "csin", "s0ds", "s1ds", "s2ds", + "s3ds", "smglr", "smgtb", "birep", "binel", "bicr", "colornm", "defbi", + "endbi", "setcolor", "slines", "dispc", "smpch", "rmpch", "smsc", "rmsc", + "pctrm", "scesc", "scesa", "ehhlm", "elhlm", "elohlm", "erhlm", "ethlm", + "evhlm", "sgr1", "slength", "OTi2", "OTrs", "OTnl", "OTbc", "OTko", "OTma", + "OTG2", "OTG3", "OTG1", "OTG4", "OTGR", "OTGL", "OTGU", "OTGD", "OTGH", + "OTGV", "OTGC","meml", "memu", "box1" +) +# fmt: on + + +def _get_terminfo_dirs() -> list[Path]: + """Get list of directories to search for terminfo files. + + Based on ncurses behavior in: + - ncurses/tinfo/db_iterator.c:_nc_next_db() + - ncurses/tinfo/read_entry.c:_nc_read_entry() + """ + dirs = [] + + terminfo = os.environ.get("TERMINFO") + if terminfo: + dirs.append(terminfo) + + try: + home = Path.home() + dirs.append(str(home / ".terminfo")) + except RuntimeError: + pass + + # Check TERMINFO_DIRS + terminfo_dirs = os.environ.get("TERMINFO_DIRS", "") + if terminfo_dirs: + for d in terminfo_dirs.split(":"): + if d: + dirs.append(d) + + dirs.extend( + [ + "/etc/terminfo", + "/lib/terminfo", + "/usr/lib/terminfo", + "/usr/share/terminfo", + "/usr/share/lib/terminfo", + "/usr/share/misc/terminfo", + "/usr/local/lib/terminfo", + "/usr/local/share/terminfo", + ] + ) + + return [Path(d) for d in dirs if Path(d).is_dir()] + + +def _validate_terminal_name_or_raise(terminal_name: str) -> None: + if not isinstance(terminal_name, str): + raise TypeError("`terminal_name` must be a string") + + if not terminal_name: + raise ValueError("`terminal_name` cannot be empty") + + if "\x00" in terminal_name: + raise ValueError("NUL character found in `terminal_name`") + + t = Path(terminal_name) + if len(t.parts) > 1: + raise ValueError("`terminal_name` cannot contain path separators") + + +def _read_terminfo_file(terminal_name: str) -> bytes: + """Find and read terminfo file for given terminal name. + + Terminfo files are stored in directories using the first character + of the terminal name as a subdirectory. + """ + _validate_terminal_name_or_raise(terminal_name) + first_char = terminal_name[0].lower() + filename = terminal_name + + for directory in _get_terminfo_dirs(): + path = directory / first_char / filename + if path.is_file(): + return path.read_bytes() + + # Try with hex encoding of first char (for special chars) + hex_dir = "%02x" % ord(first_char) + path = directory / hex_dir / filename + if path.is_file(): + return path.read_bytes() + + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), filename) + + +# Hard-coded terminal capabilities for common terminals +# This is a minimal subset needed by PyREPL +_TERMINAL_CAPABILITIES = { + # ANSI/xterm-compatible terminals + "ansi": { + # Bell + "bel": b"\x07", + # Cursor movement + "cub": b"\x1b[%p1%dD", # Move cursor left N columns + "cud": b"\x1b[%p1%dB", # Move cursor down N rows + "cuf": b"\x1b[%p1%dC", # Move cursor right N columns + "cuu": b"\x1b[%p1%dA", # Move cursor up N rows + "cub1": b"\x08", # Move cursor left 1 column + "cud1": b"\n", # Move cursor down 1 row + "cuf1": b"\x1b[C", # Move cursor right 1 column + "cuu1": b"\x1b[A", # Move cursor up 1 row + "cup": b"\x1b[%i%p1%d;%p2%dH", # Move cursor to row, column + "hpa": b"\x1b[%i%p1%dG", # Move cursor to column + # Clear operations + "clear": b"\x1b[H\x1b[2J", # Clear screen and home cursor + "el": b"\x1b[K", # Clear to end of line + # Insert/delete + "dch": b"\x1b[%p1%dP", # Delete N characters + "dch1": b"\x1b[P", # Delete 1 character + "ich": b"\x1b[%p1%d@", # Insert N characters + "ich1": b"", # Insert 1 character + # Cursor visibility + "civis": b"\x1b[?25l", # Make cursor invisible + "cnorm": b"\x1b[?12l\x1b[?25h", # Make cursor normal (visible) + # Scrolling + "ind": b"\n", # Scroll up one line + "ri": b"\x1bM", # Scroll down one line + # Keypad mode + "smkx": b"\x1b[?1h\x1b=", # Enable keypad mode + "rmkx": b"\x1b[?1l\x1b>", # Disable keypad mode + # Padding (not used in modern terminals) + "pad": b"", + # Function keys and special keys + "kdch1": b"\x1b[3~", # Delete key + "kcud1": b"\x1bOB", # Down arrow + "kend": b"\x1bOF", # End key + "kent": b"\x1bOM", # Enter key + "khome": b"\x1bOH", # Home key + "kich1": b"\x1b[2~", # Insert key + "kcub1": b"\x1bOD", # Left arrow + "knp": b"\x1b[6~", # Page down + "kpp": b"\x1b[5~", # Page up + "kcuf1": b"\x1bOC", # Right arrow + "kcuu1": b"\x1bOA", # Up arrow + # Function keys F1-F20 + "kf1": b"\x1bOP", + "kf2": b"\x1bOQ", + "kf3": b"\x1bOR", + "kf4": b"\x1bOS", + "kf5": b"\x1b[15~", + "kf6": b"\x1b[17~", + "kf7": b"\x1b[18~", + "kf8": b"\x1b[19~", + "kf9": b"\x1b[20~", + "kf10": b"\x1b[21~", + "kf11": b"\x1b[23~", + "kf12": b"\x1b[24~", + "kf13": b"\x1b[1;2P", + "kf14": b"\x1b[1;2Q", + "kf15": b"\x1b[1;2R", + "kf16": b"\x1b[1;2S", + "kf17": b"\x1b[15;2~", + "kf18": b"\x1b[17;2~", + "kf19": b"\x1b[18;2~", + "kf20": b"\x1b[19;2~", + }, + # Dumb terminal - minimal capabilities + "dumb": { + "bel": b"\x07", # Bell + "cud1": b"\n", # Move down 1 row (newline) + "ind": b"\n", # Scroll up one line (newline) + }, + # Linux console + "linux": { + # Bell + "bel": b"\x07", + # Cursor movement + "cub": b"\x1b[%p1%dD", # Move cursor left N columns + "cud": b"\x1b[%p1%dB", # Move cursor down N rows + "cuf": b"\x1b[%p1%dC", # Move cursor right N columns + "cuu": b"\x1b[%p1%dA", # Move cursor up N rows + "cub1": b"\x08", # Move cursor left 1 column (backspace) + "cud1": b"\n", # Move cursor down 1 row (newline) + "cuf1": b"\x1b[C", # Move cursor right 1 column + "cuu1": b"\x1b[A", # Move cursor up 1 row + "cup": b"\x1b[%i%p1%d;%p2%dH", # Move cursor to row, column + "hpa": b"\x1b[%i%p1%dG", # Move cursor to column + # Clear operations + "clear": b"\x1b[H\x1b[J", # Clear screen and home cursor (different from ansi!) + "el": b"\x1b[K", # Clear to end of line + # Insert/delete + "dch": b"\x1b[%p1%dP", # Delete N characters + "dch1": b"\x1b[P", # Delete 1 character + "ich": b"\x1b[%p1%d@", # Insert N characters + "ich1": b"\x1b[@", # Insert 1 character + # Cursor visibility + "civis": b"\x1b[?25l\x1b[?1c", # Make cursor invisible + "cnorm": b"\x1b[?25h\x1b[?0c", # Make cursor normal + # Scrolling + "ind": b"\n", # Scroll up one line + "ri": b"\x1bM", # Scroll down one line + # Keypad mode + "smkx": b"\x1b[?1h\x1b=", # Enable keypad mode + "rmkx": b"\x1b[?1l\x1b>", # Disable keypad mode + # Function keys and special keys + "kdch1": b"\x1b[3~", # Delete key + "kcud1": b"\x1b[B", # Down arrow + "kend": b"\x1b[4~", # End key (different from ansi!) + "khome": b"\x1b[1~", # Home key (different from ansi!) + "kich1": b"\x1b[2~", # Insert key + "kcub1": b"\x1b[D", # Left arrow + "knp": b"\x1b[6~", # Page down + "kpp": b"\x1b[5~", # Page up + "kcuf1": b"\x1b[C", # Right arrow + "kcuu1": b"\x1b[A", # Up arrow + # Function keys + "kf1": b"\x1b[[A", + "kf2": b"\x1b[[B", + "kf3": b"\x1b[[C", + "kf4": b"\x1b[[D", + "kf5": b"\x1b[[E", + "kf6": b"\x1b[17~", + "kf7": b"\x1b[18~", + "kf8": b"\x1b[19~", + "kf9": b"\x1b[20~", + "kf10": b"\x1b[21~", + "kf11": b"\x1b[23~", + "kf12": b"\x1b[24~", + "kf13": b"\x1b[25~", + "kf14": b"\x1b[26~", + "kf15": b"\x1b[28~", + "kf16": b"\x1b[29~", + "kf17": b"\x1b[31~", + "kf18": b"\x1b[32~", + "kf19": b"\x1b[33~", + "kf20": b"\x1b[34~", + }, +} + +# Map common TERM values to capability sets +_TERM_ALIASES = { + "xterm": "ansi", + "xterm-color": "ansi", + "xterm-256color": "ansi", + "screen": "ansi", + "screen-256color": "ansi", + "tmux": "ansi", + "tmux-256color": "ansi", + "vt100": "ansi", + "vt220": "ansi", + "rxvt": "ansi", + "rxvt-unicode": "ansi", + "rxvt-unicode-256color": "ansi", + "unknown": "dumb", +} + + +@dataclass +class TermInfo: + terminal_name: str | bytes | None + fallback: bool = True + + _capabilities: dict[str, bytes] = field(default_factory=dict) + + def __post_init__(self) -> None: + """Initialize terminal capabilities for the given terminal type. + + Based on ncurses implementation in: + - ncurses/tinfo/lib_setup.c:setupterm() and _nc_setupterm() + - ncurses/tinfo/lib_setup.c:TINFO_SETUP_TERM() + + This version first attempts to read terminfo database files like ncurses, + then, if `fallback` is True, falls back to hardcoded capabilities for + common terminal types. + """ + # If termstr is None or empty, try to get from environment + if not self.terminal_name: + self.terminal_name = os.environ.get("TERM") or "ANSI" + + if isinstance(self.terminal_name, bytes): + self.terminal_name = self.terminal_name.decode("ascii") + + try: + self._parse_terminfo_file(self.terminal_name) + except (OSError, ValueError): + if not self.fallback: + raise + + term_type = _TERM_ALIASES.get( + self.terminal_name, self.terminal_name + ) + if term_type not in _TERMINAL_CAPABILITIES: + term_type = "dumb" + self._capabilities = _TERMINAL_CAPABILITIES[term_type].copy() + + def _parse_terminfo_file(self, terminal_name: str) -> None: + """Parse a terminfo file. + + Populate the _capabilities dict for easy retrieval + + Based on ncurses implementation in: + - ncurses/tinfo/read_entry.c:_nc_read_termtype() + - ncurses/tinfo/read_entry.c:_nc_read_file_entry() + - ncurses/tinfo/lib_ti.c:tigetstr() + """ + data = _read_terminfo_file(terminal_name) + too_short = f"TermInfo file for {terminal_name!r} too short" + offset = 12 + if len(data) < offset: + raise ValueError(too_short) + + magic, name_size, bool_count, num_count, str_count, str_size = ( + struct.unpack(" len(data): + raise ValueError(too_short) + + # Read string offsets + end_offset = offset + 2 * str_count + if offset > len(data): + raise ValueError(too_short) + string_offset_data = data[offset:end_offset] + string_offsets = [ + off for [off] in struct.iter_unpack(" len(data): + raise ValueError(too_short) + string_table = data[offset : offset + str_size] + + # Extract strings from string table + capabilities = {} + for cap, off in zip(_STRING_NAMES, string_offsets): + if off < 0: + # CANCELLED_STRING; we do not store those + continue + elif off < len(string_table): + # Find null terminator + end = string_table.find(0, off) + if end >= 0: + capabilities[cap] = string_table[off:end] + # in other cases this is ABSENT_STRING; we don't store those. + + # Note: we don't support extended capabilities since PyREPL doesn't + # need them. + + self._capabilities = capabilities + + def get(self, cap: str) -> bytes | None: + """Get terminal capability string by name. + """ + if not isinstance(cap, str): + raise TypeError(f"`cap` must be a string, not {type(cap)}") + + return self._capabilities.get(cap) + + +def tparm(cap_bytes: bytes, *params: int) -> bytes: + """Parameterize a terminal capability string. + + Based on ncurses implementation in: + - ncurses/tinfo/lib_tparm.c:tparm() + - ncurses/tinfo/lib_tparm.c:tparam_internal() + + The ncurses version implements a full stack-based interpreter for + terminfo parameter strings. This pure Python version implements only + the subset of parameter substitution operations needed by PyREPL: + - %i (increment parameters for 1-based indexing) + - %p[1-9]%d (parameter substitution) + - %p[1-9]%{n}%+%d (parameter plus constant) + """ + if not isinstance(cap_bytes, bytes): + raise TypeError(f"`cap` must be bytes, not {type(cap_bytes)}") + + result = cap_bytes + + # %i - increment parameters (1-based instead of 0-based) + increment = b"%i" in result + if increment: + result = result.replace(b"%i", b"") + + # Replace %p1%d, %p2%d, etc. with actual parameter values + for i in range(len(params)): + pattern = b"%%p%d%%d" % (i + 1) + if pattern in result: + value = params[i] + if increment: + value += 1 + result = result.replace(pattern, str(value).encode("ascii")) + + # Handle %p1%{1}%+%d (parameter plus constant) + # Used in some cursor positioning sequences + pattern_re = re.compile(rb"%p(\d)%\{(\d+)\}%\+%d") + matches = list(pattern_re.finditer(result)) + for match in reversed(matches): # reversed to maintain positions + param_idx = int(match.group(1)) + constant = int(match.group(2)) + value = params[param_idx] + constant + result = ( + result[: match.start()] + + str(value).encode("ascii") + + result[match.end() :] + ) + + return result diff --git a/Lib/_pyrepl/trace.py b/Lib/_pyrepl/trace.py new file mode 100644 index 00000000000..943ee12f964 --- /dev/null +++ b/Lib/_pyrepl/trace.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +import os +import sys + +# types +if False: + from typing import IO + + +trace_file: IO[str] | None = None +if trace_filename := os.environ.get("PYREPL_TRACE"): + trace_file = open(trace_filename, "a") + + + +if sys.platform == "emscripten": + from posix import _emscripten_log + + def trace(line: str, *k: object, **kw: object) -> None: + if "PYREPL_TRACE" not in os.environ: + return + if k or kw: + line = line.format(*k, **kw) + _emscripten_log(line) + +else: + def trace(line: str, *k: object, **kw: object) -> None: + if trace_file is None: + return + if k or kw: + line = line.format(*k, **kw) + trace_file.write(line + "\n") + trace_file.flush() diff --git a/Lib/_pyrepl/types.py b/Lib/_pyrepl/types.py new file mode 100644 index 00000000000..c5b7ebc1a40 --- /dev/null +++ b/Lib/_pyrepl/types.py @@ -0,0 +1,10 @@ +from collections.abc import Callable, Iterator + +type Callback = Callable[[], object] +type SimpleContextManager = Iterator[None] +type KeySpec = str # like r"\C-c" +type CommandName = str # like "interrupt" +type EventTuple = tuple[CommandName, str] +type Completer = Callable[[str, int], str | None] +type CharBuffer = list[str] +type CharWidths = list[int] diff --git a/Lib/_pyrepl/unix_console.py b/Lib/_pyrepl/unix_console.py new file mode 100644 index 00000000000..639d16db3f8 --- /dev/null +++ b/Lib/_pyrepl/unix_console.py @@ -0,0 +1,842 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Antonio Cuni +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +import errno +import os +import re +import select +import signal +import struct +import termios +import time +import types +import platform +from fcntl import ioctl + +from . import terminfo +from .console import Console, Event +from .fancy_termios import tcgetattr, tcsetattr, TermState +from .trace import trace +from .unix_eventqueue import EventQueue +from .utils import wlen + +# declare posix optional to allow None assignment on other platforms +posix: types.ModuleType | None +try: + import posix +except ImportError: + posix = None + +TYPE_CHECKING = False + +# types +if TYPE_CHECKING: + from typing import AbstractSet, IO, Literal, overload, cast +else: + overload = lambda func: None + cast = lambda typ, val: val + + +class InvalidTerminal(RuntimeError): + def __init__(self, message: str) -> None: + super().__init__(errno.EIO, message) + + +_error = (termios.error, InvalidTerminal) +_error_codes_to_ignore = frozenset([errno.EIO, errno.ENXIO, errno.EPERM]) + +SIGWINCH_EVENT = "repaint" + +FIONREAD = getattr(termios, "FIONREAD", None) +TIOCGWINSZ = getattr(termios, "TIOCGWINSZ", None) + +# ------------ start of baudrate definitions ------------ + +# Add (possibly) missing baudrates (check termios man page) to termios + + +def add_baudrate_if_supported(dictionary: dict[int, int], rate: int) -> None: + baudrate_name = "B%d" % rate + if hasattr(termios, baudrate_name): + dictionary[getattr(termios, baudrate_name)] = rate + + +# Check the termios man page (Line speed) to know where these +# values come from. +potential_baudrates = [ + 0, + 110, + 115200, + 1200, + 134, + 150, + 1800, + 19200, + 200, + 230400, + 2400, + 300, + 38400, + 460800, + 4800, + 50, + 57600, + 600, + 75, + 9600, +] + +ratedict: dict[int, int] = {} +for rate in potential_baudrates: + add_baudrate_if_supported(ratedict, rate) + +# Clean up variables to avoid unintended usage +del rate, add_baudrate_if_supported + +# ------------ end of baudrate definitions ------------ + +delayprog = re.compile(b"\\$<([0-9]+)((?:/|\\*){0,2})>") + +try: + poll: type[select.poll] = select.poll +except AttributeError: + # this is exactly the minimum necessary to support what we + # do with poll objects + class MinimalPoll: + def __init__(self): + pass + + def register(self, fd, flag): + self.fd = fd + + # note: The 'timeout' argument is received as *milliseconds* + def poll(self, timeout: float | None = None) -> list[int]: + if timeout is None: + r, w, e = select.select([self.fd], [], []) + else: + r, w, e = select.select([self.fd], [], [], timeout / 1000) + return r + + poll = MinimalPoll # type: ignore[assignment] + + +class UnixConsole(Console): + def __init__( + self, + f_in: IO[bytes] | int = 0, + f_out: IO[bytes] | int = 1, + term: str = "", + encoding: str = "", + ): + """ + Initialize the UnixConsole. + + Parameters: + - f_in (int or file-like object): Input file descriptor or object. + - f_out (int or file-like object): Output file descriptor or object. + - term (str): Terminal name. + - encoding (str): Encoding to use for I/O operations. + """ + super().__init__(f_in, f_out, term, encoding) + + self.pollob = poll() + self.pollob.register(self.input_fd, select.POLLIN) + self.terminfo = terminfo.TermInfo(term or None) + self.term = term + self.is_apple_terminal = ( + platform.system() == "Darwin" + and os.getenv("TERM_PROGRAM") == "Apple_Terminal" + ) + + try: + self.__input_fd_set(tcgetattr(self.input_fd), ignore=frozenset()) + except _error as e: + raise RuntimeError(f"termios failure ({e.args[1]})") + + @overload + def _my_getstr( + cap: str, optional: Literal[False] = False + ) -> bytes: ... + + @overload + def _my_getstr(cap: str, optional: bool) -> bytes | None: ... + + def _my_getstr(cap: str, optional: bool = False) -> bytes | None: + r = self.terminfo.get(cap) + if not optional and r is None: + raise InvalidTerminal( + f"terminal doesn't have the required {cap} capability" + ) + return r + + self._bel = _my_getstr("bel") + self._civis = _my_getstr("civis", optional=True) + self._clear = _my_getstr("clear") + self._cnorm = _my_getstr("cnorm", optional=True) + self._cub = _my_getstr("cub", optional=True) + self._cub1 = _my_getstr("cub1", optional=True) + self._cud = _my_getstr("cud", optional=True) + self._cud1 = _my_getstr("cud1", optional=True) + self._cuf = _my_getstr("cuf", optional=True) + self._cuf1 = _my_getstr("cuf1", optional=True) + self._cup = _my_getstr("cup") + self._cuu = _my_getstr("cuu", optional=True) + self._cuu1 = _my_getstr("cuu1", optional=True) + self._dch1 = _my_getstr("dch1", optional=True) + self._dch = _my_getstr("dch", optional=True) + self._el = _my_getstr("el") + self._hpa = _my_getstr("hpa", optional=True) + self._ich = _my_getstr("ich", optional=True) + self._ich1 = _my_getstr("ich1", optional=True) + self._ind = _my_getstr("ind", optional=True) + self._pad = _my_getstr("pad", optional=True) + self._ri = _my_getstr("ri", optional=True) + self._rmkx = _my_getstr("rmkx", optional=True) + self._smkx = _my_getstr("smkx", optional=True) + + self.__setup_movement() + + self.event_queue = EventQueue( + self.input_fd, self.encoding, self.terminfo + ) + self.cursor_visible = 1 + + signal.signal(signal.SIGCONT, self._sigcont_handler) + + def _sigcont_handler(self, signum, frame): + self.restore() + self.prepare() + + def __read(self, n: int) -> bytes: + return os.read(self.input_fd, n) + + def change_encoding(self, encoding: str) -> None: + """ + Change the encoding used for I/O operations. + + Parameters: + - encoding (str): New encoding to use. + """ + self.encoding = encoding + + def refresh(self, screen, c_xy): + """ + Refresh the console screen. + + Parameters: + - screen (list): List of strings representing the screen contents. + - c_xy (tuple): Cursor position (x, y) on the screen. + """ + cx, cy = c_xy + if not self.__gone_tall: + while len(self.screen) < min(len(screen), self.height): + self.__hide_cursor() + if self.screen: + self.__move(0, len(self.screen) - 1) + self.__write("\n") + self.posxy = 0, len(self.screen) + self.screen.append("") + else: + while len(self.screen) < len(screen): + self.screen.append("") + + if len(screen) > self.height: + self.__gone_tall = 1 + self.__move = self.__move_tall + + px, py = self.posxy + old_offset = offset = self.__offset + height = self.height + + # we make sure the cursor is on the screen, and that we're + # using all of the screen if we can + if cy < offset: + offset = cy + elif cy >= offset + height: + offset = cy - height + 1 + elif offset > 0 and len(screen) < offset + height: + offset = max(len(screen) - height, 0) + screen.append("") + + oldscr = self.screen[old_offset : old_offset + height] + newscr = screen[offset : offset + height] + + # use hardware scrolling if we have it. + if old_offset > offset and self._ri: + self.__hide_cursor() + self.__write_code(self._cup, 0, 0) + self.posxy = 0, old_offset + for i in range(old_offset - offset): + self.__write_code(self._ri) + oldscr.pop(-1) + oldscr.insert(0, "") + elif old_offset < offset and self._ind: + self.__hide_cursor() + self.__write_code(self._cup, self.height - 1, 0) + self.posxy = 0, old_offset + self.height - 1 + for i in range(offset - old_offset): + self.__write_code(self._ind) + oldscr.pop(0) + oldscr.append("") + + self.__offset = offset + + for ( + y, + oldline, + newline, + ) in zip(range(offset, offset + height), oldscr, newscr): + if oldline != newline: + self.__write_changed_line(y, oldline, newline, px) + + y = len(newscr) + while y < len(oldscr): + self.__hide_cursor() + self.__move(0, y) + self.posxy = 0, y + self.__write_code(self._el) + y += 1 + + self.__show_cursor() + + self.screen = screen.copy() + self.move_cursor(cx, cy) + self.flushoutput() + + def move_cursor(self, x, y): + """ + Move the cursor to the specified position on the screen. + + Parameters: + - x (int): X coordinate. + - y (int): Y coordinate. + """ + if y < self.__offset or y >= self.__offset + self.height: + self.event_queue.insert(Event("scroll", None)) + else: + self.__move(x, y) + self.posxy = x, y + self.flushoutput() + + def prepare(self): + """ + Prepare the console for input/output operations. + """ + self.__buffer = [] + + self.__svtermstate = tcgetattr(self.input_fd) + raw = self.__svtermstate.copy() + raw.iflag &= ~(termios.INPCK | termios.ISTRIP | termios.IXON) + raw.oflag &= ~(termios.OPOST) + raw.cflag &= ~(termios.CSIZE | termios.PARENB) + raw.cflag |= termios.CS8 + raw.iflag |= termios.BRKINT + raw.lflag &= ~(termios.ICANON | termios.ECHO | termios.IEXTEN) + raw.lflag |= termios.ISIG + raw.cc[termios.VMIN] = 1 + raw.cc[termios.VTIME] = 0 + self.__input_fd_set(raw) + + # In macOS terminal we need to deactivate line wrap via ANSI escape code + if self.is_apple_terminal: + os.write(self.output_fd, b"\033[?7l") + + self.screen = [] + self.height, self.width = self.getheightwidth() + + self.posxy = 0, 0 + self.__gone_tall = 0 + self.__move = self.__move_short + self.__offset = 0 + + self.__maybe_write_code(self._smkx) + + try: + self.old_sigwinch = signal.signal(signal.SIGWINCH, self.__sigwinch) + except ValueError: + pass + + self.__enable_bracketed_paste() + + def restore(self): + """ + Restore the console to the default state + """ + self.__disable_bracketed_paste() + self.__maybe_write_code(self._rmkx) + self.flushoutput() + self.__input_fd_set(self.__svtermstate) + + if self.is_apple_terminal: + os.write(self.output_fd, b"\033[?7h") + + if hasattr(self, "old_sigwinch"): + try: + signal.signal(signal.SIGWINCH, self.old_sigwinch) + except ValueError as e: + import threading + if threading.current_thread() is threading.main_thread(): + raise e + del self.old_sigwinch + + def push_char(self, char: int | bytes) -> None: + """ + Push a character to the console event queue. + """ + trace("push char {char!r}", char=char) + self.event_queue.push(char) + + def get_event(self, block: bool = True) -> Event | None: + """ + Get an event from the console event queue. + + Parameters: + - block (bool): Whether to block until an event is available. + + Returns: + - Event: Event object from the event queue. + """ + if not block and not self.wait(timeout=0): + return None + + while self.event_queue.empty(): + while True: + try: + self.push_char(self.__read(1)) + except OSError as err: + if err.errno == errno.EINTR: + if not self.event_queue.empty(): + return self.event_queue.get() + else: + continue + elif err.errno == errno.EIO: + raise SystemExit(errno.EIO) + else: + raise + else: + break + return self.event_queue.get() + + def wait(self, timeout: float | None = None) -> bool: + """ + Wait for events on the console. + """ + return ( + not self.event_queue.empty() + or bool(self.pollob.poll(timeout)) + ) + + def set_cursor_vis(self, visible): + """ + Set the visibility of the cursor. + + Parameters: + - visible (bool): Visibility flag. + """ + if visible: + self.__show_cursor() + else: + self.__hide_cursor() + + if TIOCGWINSZ: + + def getheightwidth(self): + """ + Get the height and width of the console. + + Returns: + - tuple: Height and width of the console. + """ + try: + return int(os.environ["LINES"]), int(os.environ["COLUMNS"]) + except (KeyError, TypeError, ValueError): + try: + size = ioctl(self.input_fd, TIOCGWINSZ, b"\000" * 8) + except OSError: + return 25, 80 + height, width = struct.unpack("hhhh", size)[0:2] + if not height: + return 25, 80 + return height, width + + else: + + def getheightwidth(self): + """ + Get the height and width of the console. + + Returns: + - tuple: Height and width of the console. + """ + try: + return int(os.environ["LINES"]), int(os.environ["COLUMNS"]) + except (KeyError, TypeError, ValueError): + return 25, 80 + + def forgetinput(self): + """ + Discard any pending input on the console. + """ + termios.tcflush(self.input_fd, termios.TCIFLUSH) + + def flushoutput(self): + """ + Flush the output buffer. + """ + for text, iscode in self.__buffer: + if iscode: + self.__tputs(text) + else: + os.write(self.output_fd, text.encode(self.encoding, "replace")) + del self.__buffer[:] + + def finish(self): + """ + Finish console operations and flush the output buffer. + """ + y = len(self.screen) - 1 + while y >= 0 and not self.screen[y]: + y -= 1 + self.__move(0, min(y, self.height + self.__offset - 1)) + self.__write("\n\r") + self.flushoutput() + + def beep(self): + """ + Emit a beep sound. + """ + self.__maybe_write_code(self._bel) + self.flushoutput() + + if FIONREAD: + + def getpending(self): + """ + Get pending events from the console event queue. + + Returns: + - Event: Pending event from the event queue. + """ + e = Event("key", "", b"") + + while not self.event_queue.empty(): + e2 = self.event_queue.get() + e.data += e2.data + e.raw += e.raw + + amount = struct.unpack("i", ioctl(self.input_fd, FIONREAD, b"\0\0\0\0"))[0] + trace("getpending({a})", a=amount) + raw = self.__read(amount) + data = str(raw, self.encoding, "replace") + e.data += data + e.raw += raw + return e + + else: + + def getpending(self): + """ + Get pending events from the console event queue. + + Returns: + - Event: Pending event from the event queue. + """ + e = Event("key", "", b"") + + while not self.event_queue.empty(): + e2 = self.event_queue.get() + e.data += e2.data + e.raw += e.raw + + amount = 10000 + raw = self.__read(amount) + data = str(raw, self.encoding, "replace") + e.data += data + e.raw += raw + return e + + def clear(self): + """ + Clear the console screen. + """ + self.__write_code(self._clear) + self.__gone_tall = 1 + self.__move = self.__move_tall + self.posxy = 0, 0 + self.screen = [] + + @property + def input_hook(self): + # avoid inline imports here so the repl doesn't get flooded + # with import logging from -X importtime=2 + if posix is not None and posix._is_inputhook_installed(): + return posix._inputhook + + def __enable_bracketed_paste(self) -> None: + os.write(self.output_fd, b"\x1b[?2004h") + + def __disable_bracketed_paste(self) -> None: + os.write(self.output_fd, b"\x1b[?2004l") + + def __setup_movement(self): + """ + Set up the movement functions based on the terminal capabilities. + """ + if 0 and self._hpa: # hpa don't work in windows telnet :-( + self.__move_x = self.__move_x_hpa + elif self._cub and self._cuf: + self.__move_x = self.__move_x_cub_cuf + elif self._cub1 and self._cuf1: + self.__move_x = self.__move_x_cub1_cuf1 + else: + raise RuntimeError("insufficient terminal (horizontal)") + + if self._cuu and self._cud: + self.__move_y = self.__move_y_cuu_cud + elif self._cuu1 and self._cud1: + self.__move_y = self.__move_y_cuu1_cud1 + else: + raise RuntimeError("insufficient terminal (vertical)") + + if self._dch1: + self.dch1 = self._dch1 + elif self._dch: + self.dch1 = terminfo.tparm(self._dch, 1) + else: + self.dch1 = None + + if self._ich1: + self.ich1 = self._ich1 + elif self._ich: + self.ich1 = terminfo.tparm(self._ich, 1) + else: + self.ich1 = None + + self.__move = self.__move_short + + def __write_changed_line(self, y, oldline, newline, px_coord): + # this is frustrating; there's no reason to test (say) + # self.dch1 inside the loop -- but alternative ways of + # structuring this function are equally painful (I'm trying to + # avoid writing code generators these days...) + minlen = min(wlen(oldline), wlen(newline)) + x_pos = 0 + x_coord = 0 + + px_pos = 0 + j = 0 + for c in oldline: + if j >= px_coord: + break + j += wlen(c) + px_pos += 1 + + # reuse the oldline as much as possible, but stop as soon as we + # encounter an ESCAPE, because it might be the start of an escape + # sequence + while ( + x_coord < minlen + and oldline[x_pos] == newline[x_pos] + and newline[x_pos] != "\x1b" + ): + x_coord += wlen(newline[x_pos]) + x_pos += 1 + + # if we need to insert a single character right after the first detected change + if oldline[x_pos:] == newline[x_pos + 1 :] and self.ich1: + if ( + y == self.posxy[1] + and x_coord > self.posxy[0] + and oldline[px_pos:x_pos] == newline[px_pos + 1 : x_pos + 1] + ): + x_pos = px_pos + x_coord = px_coord + character_width = wlen(newline[x_pos]) + self.__move(x_coord, y) + self.__write_code(self.ich1) + self.__write(newline[x_pos]) + self.posxy = x_coord + character_width, y + + # if it's a single character change in the middle of the line + elif ( + x_coord < minlen + and oldline[x_pos + 1 :] == newline[x_pos + 1 :] + and wlen(oldline[x_pos]) == wlen(newline[x_pos]) + ): + character_width = wlen(newline[x_pos]) + self.__move(x_coord, y) + self.__write(newline[x_pos]) + self.posxy = x_coord + character_width, y + + # if this is the last character to fit in the line and we edit in the middle of the line + elif ( + self.dch1 + and self.ich1 + and wlen(newline) == self.width + and x_coord < wlen(newline) - 2 + and newline[x_pos + 1 : -1] == oldline[x_pos:-2] + ): + self.__hide_cursor() + self.__move(self.width - 2, y) + self.posxy = self.width - 2, y + self.__write_code(self.dch1) + + character_width = wlen(newline[x_pos]) + self.__move(x_coord, y) + self.__write_code(self.ich1) + self.__write(newline[x_pos]) + self.posxy = character_width + 1, y + + else: + self.__hide_cursor() + self.__move(x_coord, y) + if wlen(oldline) > wlen(newline): + self.__write_code(self._el) + self.__write(newline[x_pos:]) + self.posxy = wlen(newline), y + + if "\x1b" in newline: + # ANSI escape characters are present, so we can't assume + # anything about the position of the cursor. Moving the cursor + # to the left margin should work to get to a known position. + self.move_cursor(0, y) + + def __write(self, text): + self.__buffer.append((text, 0)) + + def __write_code(self, fmt, *args): + self.__buffer.append((terminfo.tparm(fmt, *args), 1)) + + def __maybe_write_code(self, fmt, *args): + if fmt: + self.__write_code(fmt, *args) + + def __move_y_cuu1_cud1(self, y): + assert self._cud1 is not None + assert self._cuu1 is not None + dy = y - self.posxy[1] + if dy > 0: + self.__write_code(dy * self._cud1) + elif dy < 0: + self.__write_code((-dy) * self._cuu1) + + def __move_y_cuu_cud(self, y): + dy = y - self.posxy[1] + if dy > 0: + self.__write_code(self._cud, dy) + elif dy < 0: + self.__write_code(self._cuu, -dy) + + def __move_x_hpa(self, x: int) -> None: + if x != self.posxy[0]: + self.__write_code(self._hpa, x) + + def __move_x_cub1_cuf1(self, x: int) -> None: + assert self._cuf1 is not None + assert self._cub1 is not None + dx = x - self.posxy[0] + if dx > 0: + self.__write_code(self._cuf1 * dx) + elif dx < 0: + self.__write_code(self._cub1 * (-dx)) + + def __move_x_cub_cuf(self, x: int) -> None: + dx = x - self.posxy[0] + if dx > 0: + self.__write_code(self._cuf, dx) + elif dx < 0: + self.__write_code(self._cub, -dx) + + def __move_short(self, x, y): + self.__move_x(x) + self.__move_y(y) + + def __move_tall(self, x, y): + assert 0 <= y - self.__offset < self.height, y - self.__offset + self.__write_code(self._cup, y - self.__offset, x) + + def __sigwinch(self, signum, frame): + self.event_queue.insert(Event("resize", None)) + + def __hide_cursor(self): + if self.cursor_visible: + self.__maybe_write_code(self._civis) + self.cursor_visible = 0 + + def __show_cursor(self): + if not self.cursor_visible: + self.__maybe_write_code(self._cnorm) + self.cursor_visible = 1 + + def repaint(self): + if not self.__gone_tall: + self.posxy = 0, self.posxy[1] + self.__write("\r") + ns = len(self.screen) * ["\000" * self.width] + self.screen = ns + else: + self.posxy = 0, self.__offset + self.__move(0, self.__offset) + ns = self.height * ["\000" * self.width] + self.screen = ns + + def __tputs(self, fmt, prog=delayprog): + """A Python implementation of the curses tputs function; the + curses one can't really be wrapped in a sane manner. + + I have the strong suspicion that this is complexity that + will never do anyone any good.""" + # using .get() means that things will blow up + # only if the bps is actually needed (which I'm + # betting is pretty unlikely) + bps = ratedict.get(self.__svtermstate.ospeed) + while True: + m = prog.search(fmt) + if not m: + os.write(self.output_fd, fmt) + break + x, y = m.span() + os.write(self.output_fd, fmt[:x]) + fmt = fmt[y:] + delay = int(m.group(1)) + if b"*" in m.group(2): + delay *= self.height + if self._pad and bps is not None: + nchars = (bps * delay) / 1000 + os.write(self.output_fd, self._pad * nchars) + else: + time.sleep(float(delay) / 1000.0) + + def __input_fd_set( + self, + state: TermState, + ignore: AbstractSet[int] = _error_codes_to_ignore, + ) -> bool: + try: + tcsetattr(self.input_fd, termios.TCSADRAIN, state) + except termios.error as te: + if te.args[0] not in ignore: + raise + return False + else: + return True diff --git a/Lib/_pyrepl/unix_eventqueue.py b/Lib/_pyrepl/unix_eventqueue.py new file mode 100644 index 00000000000..2a9cca59e74 --- /dev/null +++ b/Lib/_pyrepl/unix_eventqueue.py @@ -0,0 +1,77 @@ +# Copyright 2000-2008 Michael Hudson-Doyle +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from .terminfo import TermInfo +from .trace import trace +from .base_eventqueue import BaseEventQueue +from termios import tcgetattr, VERASE +import os + + +# Mapping of human-readable key names to their terminal-specific codes +TERMINAL_KEYNAMES = { + "delete": "kdch1", + "down": "kcud1", + "end": "kend", + "enter": "kent", + "home": "khome", + "insert": "kich1", + "left": "kcub1", + "page down": "knp", + "page up": "kpp", + "right": "kcuf1", + "up": "kcuu1", +} + + +# Function keys F1-F20 mapping +TERMINAL_KEYNAMES.update(("f%d" % i, "kf%d" % i) for i in range(1, 21)) + +# Known CTRL-arrow keycodes +CTRL_ARROW_KEYCODES= { + # for xterm, gnome-terminal, xfce terminal, etc. + b'\033[1;5D': 'ctrl left', + b'\033[1;5C': 'ctrl right', + # for rxvt + b'\033Od': 'ctrl left', + b'\033Oc': 'ctrl right', +} + +def get_terminal_keycodes(ti: TermInfo) -> dict[bytes, str]: + """ + Generates a dictionary mapping terminal keycodes to human-readable names. + """ + keycodes = {} + for key, terminal_code in TERMINAL_KEYNAMES.items(): + keycode = ti.get(terminal_code) + trace('key {key} tiname {terminal_code} keycode {keycode!r}', **locals()) + if keycode: + keycodes[keycode] = key + keycodes.update(CTRL_ARROW_KEYCODES) + return keycodes + + +class EventQueue(BaseEventQueue): + def __init__(self, fd: int, encoding: str, ti: TermInfo) -> None: + keycodes = get_terminal_keycodes(ti) + if os.isatty(fd): + backspace = tcgetattr(fd)[6][VERASE] + keycodes[backspace] = "backspace" + BaseEventQueue.__init__(self, encoding, keycodes) diff --git a/Lib/_pyrepl/utils.py b/Lib/_pyrepl/utils.py new file mode 100644 index 00000000000..06cddef851b --- /dev/null +++ b/Lib/_pyrepl/utils.py @@ -0,0 +1,391 @@ +from __future__ import annotations +import builtins +import functools +import keyword +import re +import token as T +import tokenize +import unicodedata +import _colorize + +from collections import deque +from io import StringIO +from tokenize import TokenInfo as TI +from typing import Iterable, Iterator, Match, NamedTuple, Self + +from .types import CharBuffer, CharWidths +from .trace import trace + +ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]") +ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02") +ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""}) +IDENTIFIERS_AFTER = {"def", "class"} +KEYWORD_CONSTANTS = {"True", "False", "None"} +BUILTINS = {str(name) for name in dir(builtins) if not name.startswith('_')} + + +def THEME(**kwargs): + # Not cached: the user can modify the theme inside the interactive session. + return _colorize.get_theme(**kwargs).syntax + + +class Span(NamedTuple): + """Span indexing that's inclusive on both ends.""" + + start: int + end: int + + @classmethod + def from_re(cls, m: Match[str], group: int | str) -> Self: + re_span = m.span(group) + return cls(re_span[0], re_span[1] - 1) + + @classmethod + def from_token(cls, token: TI, line_len: list[int]) -> Self: + end_offset = -1 + if (token.type in {T.FSTRING_MIDDLE, T.TSTRING_MIDDLE} + and token.string.endswith(("{", "}"))): + # gh-134158: a visible trailing brace comes from a double brace in input + end_offset += 1 + + return cls( + line_len[token.start[0] - 1] + token.start[1], + line_len[token.end[0] - 1] + token.end[1] + end_offset, + ) + + +class ColorSpan(NamedTuple): + span: Span + tag: str + + +@functools.cache +def str_width(c: str) -> int: + if ord(c) < 128: + return 1 + # gh-139246 for zero-width joiner and combining characters + if unicodedata.combining(c): + return 0 + category = unicodedata.category(c) + if category == "Cf" and c != "\u00ad": + return 0 + w = unicodedata.east_asian_width(c) + if w in ("N", "Na", "H", "A"): + return 1 + return 2 + + +def wlen(s: str) -> int: + if len(s) == 1 and s != "\x1a": + return str_width(s) + length = sum(str_width(i) for i in s) + # remove lengths of any escape sequences + sequence = ANSI_ESCAPE_SEQUENCE.findall(s) + ctrl_z_cnt = s.count("\x1a") + return length - sum(len(i) for i in sequence) + ctrl_z_cnt + + +def unbracket(s: str, including_content: bool = False) -> str: + r"""Return `s` with \001 and \002 characters removed. + + If `including_content` is True, content between \001 and \002 is also + stripped. + """ + if including_content: + return ZERO_WIDTH_BRACKET.sub("", s) + return s.translate(ZERO_WIDTH_TRANS) + + +def gen_colors(buffer: str) -> Iterator[ColorSpan]: + """Returns a list of index spans to color using the given color tag. + + The input `buffer` should be a valid start of a Python code block, i.e. + it cannot be a block starting in the middle of a multiline string. + """ + sio = StringIO(buffer) + line_lengths = [0] + [len(line) for line in sio.readlines()] + # make line_lengths cumulative + for i in range(1, len(line_lengths)): + line_lengths[i] += line_lengths[i-1] + + sio.seek(0) + gen = tokenize.generate_tokens(sio.readline) + last_emitted: ColorSpan | None = None + try: + for color in gen_colors_from_token_stream(gen, line_lengths): + yield color + last_emitted = color + except SyntaxError: + return + except tokenize.TokenError as te: + yield from recover_unterminated_string( + te, line_lengths, last_emitted, buffer + ) + + +def recover_unterminated_string( + exc: tokenize.TokenError, + line_lengths: list[int], + last_emitted: ColorSpan | None, + buffer: str, +) -> Iterator[ColorSpan]: + msg, loc = exc.args + if loc is None: + return + + line_no, column = loc + + if msg.startswith( + ( + "unterminated string literal", + "unterminated f-string literal", + "unterminated t-string literal", + "EOF in multi-line string", + "unterminated triple-quoted f-string literal", + "unterminated triple-quoted t-string literal", + ) + ): + start = line_lengths[line_no - 1] + column - 1 + end = line_lengths[-1] - 1 + + # in case FSTRING_START was already emitted + if last_emitted and start <= last_emitted.span.start: + trace("before last emitted = {s}", s=start) + start = last_emitted.span.end + 1 + + span = Span(start, end) + trace("yielding span {a} -> {b}", a=span.start, b=span.end) + yield ColorSpan(span, "string") + else: + trace( + "unhandled token error({buffer}) = {te}", + buffer=repr(buffer), + te=str(exc), + ) + + +def gen_colors_from_token_stream( + token_generator: Iterator[TI], + line_lengths: list[int], +) -> Iterator[ColorSpan]: + token_window = prev_next_window(token_generator) + + is_def_name = False + bracket_level = 0 + for prev_token, token, next_token in token_window: + assert token is not None + if token.start == token.end: + continue + + match token.type: + case ( + T.STRING + | T.FSTRING_START | T.FSTRING_MIDDLE | T.FSTRING_END + | T.TSTRING_START | T.TSTRING_MIDDLE | T.TSTRING_END + ): + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "string") + case T.COMMENT: + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "comment") + case T.NUMBER: + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "number") + case T.OP: + if token.string in "([{": + bracket_level += 1 + elif token.string in ")]}": + bracket_level -= 1 + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "op") + case T.NAME: + if is_def_name: + is_def_name = False + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "definition") + elif keyword.iskeyword(token.string): + span_cls = "keyword" + if token.string in KEYWORD_CONSTANTS: + span_cls = "keyword_constant" + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, span_cls) + if token.string in IDENTIFIERS_AFTER: + is_def_name = True + elif ( + keyword.issoftkeyword(token.string) + and bracket_level == 0 + and is_soft_keyword_used(prev_token, token, next_token) + ): + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "soft_keyword") + elif ( + token.string in BUILTINS + and not (prev_token and prev_token.exact_type == T.DOT) + ): + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "builtin") + + +keyword_first_sets_match = {"False", "None", "True", "await", "lambda", "not"} +keyword_first_sets_case = {"False", "None", "True"} + + +def is_soft_keyword_used(*tokens: TI | None) -> bool: + """Returns True if the current token is a keyword in this context. + + For the `*tokens` to match anything, they have to be a three-tuple of + (previous, current, next). + """ + trace("is_soft_keyword_used{t}", t=tokens) + match tokens: + case ( + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"), + TI(string="match"), + TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START) + | TI(T.OP, string="(" | "*" | "[" | "{" | "~" | "...") + ): + return True + case ( + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"), + TI(string="match"), + TI(T.NAME, string=s) + ): + if keyword.iskeyword(s): + return s in keyword_first_sets_match + return True + case ( + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"), + TI(string="case"), + TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START) + | TI(T.OP, string="(" | "*" | "-" | "[" | "{") + ): + return True + case ( + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"), + TI(string="case"), + TI(T.NAME, string=s) + ): + if keyword.iskeyword(s): + return s in keyword_first_sets_case + return True + case (TI(string="case"), TI(string="_"), TI(string=":")): + return True + case ( + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"), + TI(string="type"), + TI(T.NAME, string=s) + ): + return not keyword.iskeyword(s) + case _: + return False + + +def disp_str( + buffer: str, + colors: list[ColorSpan] | None = None, + start_index: int = 0, + force_color: bool = False, +) -> tuple[CharBuffer, CharWidths]: + r"""Decompose the input buffer into a printable variant with applied colors. + + Returns a tuple of two lists: + - the first list is the input buffer, character by character, with color + escape codes added (while those codes contain multiple ASCII characters, + each code is considered atomic *and is attached for the corresponding + visible character*); + - the second list is the visible width of each character in the input + buffer. + + Note on colors: + - The `colors` list, if provided, is partially consumed within. We're using + a list and not a generator since we need to hold onto the current + unfinished span between calls to disp_str in case of multiline strings. + - The `colors` list is computed from the start of the input block. `buffer` + is only a subset of that input block, a single line within. This is why + we need `start_index` to inform us which position is the start of `buffer` + actually within user input. This allows us to match color spans correctly. + + Examples: + >>> utils.disp_str("a = 9") + (['a', ' ', '=', ' ', '9'], [1, 1, 1, 1, 1]) + + >>> line = "while 1:" + >>> colors = list(utils.gen_colors(line)) + >>> utils.disp_str(line, colors=colors) + (['\x1b[1;34mw', 'h', 'i', 'l', 'e\x1b[0m', ' ', '1', ':'], [1, 1, 1, 1, 1, 1, 1, 1]) + + """ + chars: CharBuffer = [] + char_widths: CharWidths = [] + + if not buffer: + return chars, char_widths + + while colors and colors[0].span.end < start_index: + # move past irrelevant spans + colors.pop(0) + + theme = THEME(force_color=force_color) + pre_color = "" + post_color = "" + if colors and colors[0].span.start < start_index: + # looks like we're continuing a previous color (e.g. a multiline str) + pre_color = theme[colors[0].tag] + + for i, c in enumerate(buffer, start_index): + if colors and colors[0].span.start == i: # new color starts now + pre_color = theme[colors[0].tag] + + if c == "\x1a": # CTRL-Z on Windows + chars.append(c) + char_widths.append(2) + elif ord(c) < 128: + chars.append(c) + char_widths.append(1) + elif unicodedata.category(c).startswith("C"): + c = r"\u%04x" % ord(c) + chars.append(c) + char_widths.append(len(c)) + else: + chars.append(c) + char_widths.append(str_width(c)) + + if colors and colors[0].span.end == i: # current color ends now + post_color = theme.reset + colors.pop(0) + + chars[-1] = pre_color + chars[-1] + post_color + pre_color = "" + post_color = "" + + if colors and colors[0].span.start < i and colors[0].span.end > i: + # even though the current color should be continued, reset it for now. + # the next call to `disp_str()` will revive it. + chars[-1] += theme.reset + + return chars, char_widths + + +def prev_next_window[T]( + iterable: Iterable[T] +) -> Iterator[tuple[T | None, ...]]: + """Generates three-tuples of (previous, current, next) items. + + On the first iteration previous is None. On the last iteration next + is None. In case of exception next is None and the exception is re-raised + on a subsequent next() call. + + Inspired by `sliding_window` from `itertools` recipes. + """ + + iterator = iter(iterable) + window = deque((None, next(iterator)), maxlen=3) + try: + for x in iterator: + window.append(x) + yield tuple(window) + except Exception: + raise + finally: + window.append(None) + yield tuple(window) diff --git a/Lib/_pyrepl/windows_console.py b/Lib/_pyrepl/windows_console.py new file mode 100644 index 00000000000..46c6030748b --- /dev/null +++ b/Lib/_pyrepl/windows_console.py @@ -0,0 +1,730 @@ +# Copyright 2000-2004 Michael Hudson-Doyle +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +import io +import os +import sys + +import ctypes +import types +from ctypes.wintypes import ( + _COORD, + WORD, + SMALL_RECT, + BOOL, + HANDLE, + CHAR, + DWORD, + WCHAR, + SHORT, +) +from ctypes import Structure, POINTER, Union +from .console import Event, Console +from .trace import trace +from .utils import wlen +from .windows_eventqueue import EventQueue + +try: + from ctypes import get_last_error, GetLastError, WinDLL, windll, WinError # type: ignore[attr-defined] +except: + # Keep MyPy happy off Windows + from ctypes import CDLL as WinDLL, cdll as windll + + def GetLastError() -> int: + return 42 + + def get_last_error() -> int: + return 42 + + class WinError(OSError): # type: ignore[no-redef] + def __init__(self, err: int | None, descr: str | None = None) -> None: + self.err = err + self.descr = descr + +# declare nt optional to allow None assignment on other platforms +nt: types.ModuleType | None +try: + import nt +except ImportError: + nt = None + +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import IO + +# Virtual-Key Codes: https://learn.microsoft.com/en-us/windows/win32/inputdev/virtual-key-codes +VK_MAP: dict[int, str] = { + 0x23: "end", # VK_END + 0x24: "home", # VK_HOME + 0x25: "left", # VK_LEFT + 0x26: "up", # VK_UP + 0x27: "right", # VK_RIGHT + 0x28: "down", # VK_DOWN + 0x2E: "delete", # VK_DELETE + 0x70: "f1", # VK_F1 + 0x71: "f2", # VK_F2 + 0x72: "f3", # VK_F3 + 0x73: "f4", # VK_F4 + 0x74: "f5", # VK_F5 + 0x75: "f6", # VK_F6 + 0x76: "f7", # VK_F7 + 0x77: "f8", # VK_F8 + 0x78: "f9", # VK_F9 + 0x79: "f10", # VK_F10 + 0x7A: "f11", # VK_F11 + 0x7B: "f12", # VK_F12 + 0x7C: "f13", # VK_F13 + 0x7D: "f14", # VK_F14 + 0x7E: "f15", # VK_F15 + 0x7F: "f16", # VK_F16 + 0x80: "f17", # VK_F17 + 0x81: "f18", # VK_F18 + 0x82: "f19", # VK_F19 + 0x83: "f20", # VK_F20 +} + +# Virtual terminal output sequences +# Reference: https://learn.microsoft.com/en-us/windows/console/console-virtual-terminal-sequences#output-sequences +# Check `windows_eventqueue.py` for input sequences +ERASE_IN_LINE = "\x1b[K" +MOVE_LEFT = "\x1b[{}D" +MOVE_RIGHT = "\x1b[{}C" +MOVE_UP = "\x1b[{}A" +MOVE_DOWN = "\x1b[{}B" +CLEAR = "\x1b[H\x1b[J" + +# State of control keys: https://learn.microsoft.com/en-us/windows/console/key-event-record-str +ALT_ACTIVE = 0x01 | 0x02 +CTRL_ACTIVE = 0x04 | 0x08 + +WAIT_TIMEOUT = 0x102 +WAIT_FAILED = 0xFFFFFFFF + +# from winbase.h +INFINITE = 0xFFFFFFFF + + +class _error(Exception): + pass + +def _supports_vt(): + try: + return nt._supports_virtual_terminal() + except AttributeError: + return False + +class WindowsConsole(Console): + def __init__( + self, + f_in: IO[bytes] | int = 0, + f_out: IO[bytes] | int = 1, + term: str = "", + encoding: str = "", + ): + super().__init__(f_in, f_out, term, encoding) + + self.__vt_support = _supports_vt() + + if self.__vt_support: + trace('console supports virtual terminal') + + # Save original console modes so we can recover on cleanup. + original_input_mode = DWORD() + GetConsoleMode(InHandle, original_input_mode) + trace(f'saved original input mode 0x{original_input_mode.value:x}') + self.__original_input_mode = original_input_mode.value + + SetConsoleMode( + OutHandle, + ENABLE_WRAP_AT_EOL_OUTPUT + | ENABLE_PROCESSED_OUTPUT + | ENABLE_VIRTUAL_TERMINAL_PROCESSING, + ) + + self.screen: list[str] = [] + self.width = 80 + self.height = 25 + self.__offset = 0 + self.event_queue = EventQueue(encoding) + try: + self.out = io._WindowsConsoleIO(self.output_fd, "w") # type: ignore[attr-defined] + except ValueError: + # Console I/O is redirected, fallback... + self.out = None + + def refresh(self, screen: list[str], c_xy: tuple[int, int]) -> None: + """ + Refresh the console screen. + + Parameters: + - screen (list): List of strings representing the screen contents. + - c_xy (tuple): Cursor position (x, y) on the screen. + """ + cx, cy = c_xy + + while len(self.screen) < min(len(screen), self.height): + self._hide_cursor() + if self.screen: + self._move_relative(0, len(self.screen) - 1) + self.__write("\n") + self.posxy = 0, len(self.screen) + self.screen.append("") + + px, py = self.posxy + old_offset = offset = self.__offset + height = self.height + + # we make sure the cursor is on the screen, and that we're + # using all of the screen if we can + if cy < offset: + offset = cy + elif cy >= offset + height: + offset = cy - height + 1 + scroll_lines = offset - old_offset + + # Scrolling the buffer as the current input is greater than the visible + # portion of the window. We need to scroll the visible portion and the + # entire history + self._scroll(scroll_lines, self._getscrollbacksize()) + self.posxy = self.posxy[0], self.posxy[1] + scroll_lines + self.__offset += scroll_lines + + for i in range(scroll_lines): + self.screen.append("") + elif offset > 0 and len(screen) < offset + height: + offset = max(len(screen) - height, 0) + screen.append("") + + oldscr = self.screen[old_offset : old_offset + height] + newscr = screen[offset : offset + height] + + self.__offset = offset + + self._hide_cursor() + for ( + y, + oldline, + newline, + ) in zip(range(offset, offset + height), oldscr, newscr): + if oldline != newline: + self.__write_changed_line(y, oldline, newline, px) + + y = len(newscr) + while y < len(oldscr): + self._move_relative(0, y) + self.posxy = 0, y + self._erase_to_end() + y += 1 + + self._show_cursor() + + self.screen = screen + self.move_cursor(cx, cy) + + @property + def input_hook(self): + # avoid inline imports here so the repl doesn't get flooded + # with import logging from -X importtime=2 + if nt is not None and nt._is_inputhook_installed(): + return nt._inputhook + + def __write_changed_line( + self, y: int, oldline: str, newline: str, px_coord: int + ) -> None: + # this is frustrating; there's no reason to test (say) + # self.dch1 inside the loop -- but alternative ways of + # structuring this function are equally painful (I'm trying to + # avoid writing code generators these days...) + minlen = min(wlen(oldline), wlen(newline)) + x_pos = 0 + x_coord = 0 + + px_pos = 0 + j = 0 + for c in oldline: + if j >= px_coord: + break + j += wlen(c) + px_pos += 1 + + # reuse the oldline as much as possible, but stop as soon as we + # encounter an ESCAPE, because it might be the start of an escape + # sequence + while ( + x_coord < minlen + and oldline[x_pos] == newline[x_pos] + and newline[x_pos] != "\x1b" + ): + x_coord += wlen(newline[x_pos]) + x_pos += 1 + + self._hide_cursor() + self._move_relative(x_coord, y) + if wlen(oldline) > wlen(newline): + self._erase_to_end() + + self.__write(newline[x_pos:]) + self.posxy = min(wlen(newline), self.width - 1), y + + if "\x1b" in newline or y != self.posxy[1] or '\x1a' in newline: + # ANSI escape characters are present, so we can't assume + # anything about the position of the cursor. Moving the cursor + # to the left margin should work to get to a known position. + self.move_cursor(0, y) + + def _scroll( + self, top: int, bottom: int, left: int | None = None, right: int | None = None + ) -> None: + scroll_rect = SMALL_RECT() + scroll_rect.Top = SHORT(top) + scroll_rect.Bottom = SHORT(bottom) + scroll_rect.Left = SHORT(0 if left is None else left) + scroll_rect.Right = SHORT( + self.getheightwidth()[1] - 1 if right is None else right + ) + destination_origin = _COORD() + fill_info = CHAR_INFO() + fill_info.UnicodeChar = " " + + if not ScrollConsoleScreenBuffer( + OutHandle, scroll_rect, None, destination_origin, fill_info + ): + raise WinError(GetLastError()) + + def _hide_cursor(self): + self.__write("\x1b[?25l") + + def _show_cursor(self): + self.__write("\x1b[?25h") + + def _enable_blinking(self): + self.__write("\x1b[?12h") + + def _disable_blinking(self): + self.__write("\x1b[?12l") + + def _enable_bracketed_paste(self) -> None: + self.__write("\x1b[?2004h") + + def _disable_bracketed_paste(self) -> None: + self.__write("\x1b[?2004l") + + def __write(self, text: str) -> None: + if "\x1a" in text: + text = ''.join(["^Z" if x == '\x1a' else x for x in text]) + + if self.out is not None: + self.out.write(text.encode(self.encoding, "replace")) + self.out.flush() + else: + os.write(self.output_fd, text.encode(self.encoding, "replace")) + + @property + def screen_xy(self) -> tuple[int, int]: + info = CONSOLE_SCREEN_BUFFER_INFO() + if not GetConsoleScreenBufferInfo(OutHandle, info): + raise WinError(GetLastError()) + return info.dwCursorPosition.X, info.dwCursorPosition.Y + + def _erase_to_end(self) -> None: + self.__write(ERASE_IN_LINE) + + def prepare(self) -> None: + trace("prepare") + self.screen = [] + self.height, self.width = self.getheightwidth() + + self.posxy = 0, 0 + self.__gone_tall = 0 + self.__offset = 0 + + if self.__vt_support: + SetConsoleMode(InHandle, self.__original_input_mode | ENABLE_VIRTUAL_TERMINAL_INPUT) + self._enable_bracketed_paste() + + def restore(self) -> None: + if self.__vt_support: + # Recover to original mode before running REPL + self._disable_bracketed_paste() + SetConsoleMode(InHandle, self.__original_input_mode) + + def _move_relative(self, x: int, y: int) -> None: + """Moves relative to the current posxy""" + dx = x - self.posxy[0] + dy = y - self.posxy[1] + if dx < 0: + self.__write(MOVE_LEFT.format(-dx)) + elif dx > 0: + self.__write(MOVE_RIGHT.format(dx)) + + if dy < 0: + self.__write(MOVE_UP.format(-dy)) + elif dy > 0: + self.__write(MOVE_DOWN.format(dy)) + + def move_cursor(self, x: int, y: int) -> None: + if x < 0 or y < 0: + raise ValueError(f"Bad cursor position {x}, {y}") + + if y < self.__offset or y >= self.__offset + self.height: + self.event_queue.insert(Event("scroll", "")) + else: + self._move_relative(x, y) + self.posxy = x, y + + def set_cursor_vis(self, visible: bool) -> None: + if visible: + self._show_cursor() + else: + self._hide_cursor() + + def getheightwidth(self) -> tuple[int, int]: + """Return (height, width) where height and width are the height + and width of the terminal window in characters.""" + info = CONSOLE_SCREEN_BUFFER_INFO() + if not GetConsoleScreenBufferInfo(OutHandle, info): + raise WinError(GetLastError()) + return ( + info.srWindow.Bottom - info.srWindow.Top + 1, + info.srWindow.Right - info.srWindow.Left + 1, + ) + + def _getscrollbacksize(self) -> int: + info = CONSOLE_SCREEN_BUFFER_INFO() + if not GetConsoleScreenBufferInfo(OutHandle, info): + raise WinError(GetLastError()) + + return info.srWindow.Bottom # type: ignore[no-any-return] + + def _read_input(self) -> INPUT_RECORD | None: + rec = INPUT_RECORD() + read = DWORD() + if not ReadConsoleInput(InHandle, rec, 1, read): + raise WinError(GetLastError()) + + return rec + + def _read_input_bulk( + self, n: int + ) -> tuple[ctypes.Array[INPUT_RECORD], int]: + rec = (n * INPUT_RECORD)() + read = DWORD() + if not ReadConsoleInput(InHandle, rec, n, read): + raise WinError(GetLastError()) + + return rec, read.value + + def get_event(self, block: bool = True) -> Event | None: + """Return an Event instance. Returns None if |block| is false + and there is no event pending, otherwise waits for the + completion of an event.""" + + if not block and not self.wait(timeout=0): + return None + + while self.event_queue.empty(): + rec = self._read_input() + if rec is None: + return None + + if rec.EventType == WINDOW_BUFFER_SIZE_EVENT: + return Event("resize", "") + + if rec.EventType != KEY_EVENT or not rec.Event.KeyEvent.bKeyDown: + # Only process keys and keydown events + if block: + continue + return None + + key_event = rec.Event.KeyEvent + raw_key = key = key_event.uChar.UnicodeChar + + if key == "\r": + # Make enter unix-like + return Event(evt="key", data="\n") + elif key_event.wVirtualKeyCode == 8: + # Turn backspace directly into the command + key = "backspace" + elif key == "\x00": + # Handle special keys like arrow keys and translate them into the appropriate command + key = VK_MAP.get(key_event.wVirtualKeyCode) + if key: + if key_event.dwControlKeyState & CTRL_ACTIVE: + key = f"ctrl {key}" + elif key_event.dwControlKeyState & ALT_ACTIVE: + # queue the key, return the meta command + self.event_queue.insert(Event(evt="key", data=key)) + return Event(evt="key", data="\033") # keymap.py uses this for meta + return Event(evt="key", data=key) + if block: + continue + + return None + elif self.__vt_support: + # If virtual terminal is enabled, scanning VT sequences + for char in raw_key.encode(self.event_queue.encoding, "replace"): + self.event_queue.push(char) + continue + + if key_event.dwControlKeyState & ALT_ACTIVE: + # Do not swallow characters that have been entered via AltGr: + # Windows internally converts AltGr to CTRL+ALT, see + # https://learn.microsoft.com/en-us/windows/win32/api/winuser/nf-winuser-vkkeyscanw + if not key_event.dwControlKeyState & CTRL_ACTIVE: + # queue the key, return the meta command + self.event_queue.insert(Event(evt="key", data=key)) + return Event(evt="key", data="\033") # keymap.py uses this for meta + + return Event(evt="key", data=key) + return self.event_queue.get() + + def push_char(self, char: int | bytes) -> None: + """ + Push a character to the console event queue. + """ + raise NotImplementedError("push_char not supported on Windows") + + def beep(self) -> None: + self.__write("\x07") + + def clear(self) -> None: + """Wipe the screen""" + self.__write(CLEAR) + self.posxy = 0, 0 + self.screen = [] + + def finish(self) -> None: + """Move the cursor to the end of the display and otherwise get + ready for end. XXX could be merged with restore? Hmm.""" + y = len(self.screen) - 1 + while y >= 0 and not self.screen[y]: + y -= 1 + self._move_relative(0, min(y, self.height + self.__offset - 1)) + self.__write("\r\n") + + def flushoutput(self) -> None: + """Flush all output to the screen (assuming there's some + buffering going on somewhere). + + All output on Windows is unbuffered so this is a nop""" + pass + + def forgetinput(self) -> None: + """Forget all pending, but not yet processed input.""" + if not FlushConsoleInputBuffer(InHandle): + raise WinError(GetLastError()) + + def getpending(self) -> Event: + """Return the characters that have been typed but not yet + processed.""" + e = Event("key", "", b"") + + while not self.event_queue.empty(): + e2 = self.event_queue.get() + if e2: + e.data += e2.data + + recs, rec_count = self._read_input_bulk(1024) + for i in range(rec_count): + rec = recs[i] + # In case of a legacy console, we do not only receive a keydown + # event, but also a keyup event - and for uppercase letters + # an additional SHIFT_PRESSED event. + if rec and rec.EventType == KEY_EVENT: + key_event = rec.Event.KeyEvent + if not key_event.bKeyDown: + continue + ch = key_event.uChar.UnicodeChar + if ch == "\x00": + # ignore SHIFT_PRESSED and special keys + continue + if ch == "\r": + ch += "\n" + e.data += ch + return e + + def wait_for_event(self, timeout: float | None) -> bool: + """Wait for an event.""" + if timeout is None: + timeout = INFINITE + else: + timeout = int(timeout) + ret = WaitForSingleObject(InHandle, timeout) + if ret == WAIT_FAILED: + raise WinError(get_last_error()) + elif ret == WAIT_TIMEOUT: + return False + return True + + def wait(self, timeout: float | None) -> bool: + """ + Wait for events on the console. + """ + return ( + not self.event_queue.empty() + or self.wait_for_event(timeout) + ) + + def repaint(self) -> None: + raise NotImplementedError("No repaint support") + + +# Windows interop +class CONSOLE_SCREEN_BUFFER_INFO(Structure): + _fields_ = [ + ("dwSize", _COORD), + ("dwCursorPosition", _COORD), + ("wAttributes", WORD), + ("srWindow", SMALL_RECT), + ("dwMaximumWindowSize", _COORD), + ] + + +class CONSOLE_CURSOR_INFO(Structure): + _fields_ = [ + ("dwSize", DWORD), + ("bVisible", BOOL), + ] + + +class CHAR_INFO(Structure): + _fields_ = [ + ("UnicodeChar", WCHAR), + ("Attributes", WORD), + ] + + +class Char(Union): + _fields_ = [ + ("UnicodeChar", WCHAR), + ("Char", CHAR), + ] + + +class KeyEvent(ctypes.Structure): + _fields_ = [ + ("bKeyDown", BOOL), + ("wRepeatCount", WORD), + ("wVirtualKeyCode", WORD), + ("wVirtualScanCode", WORD), + ("uChar", Char), + ("dwControlKeyState", DWORD), + ] + + +class WindowsBufferSizeEvent(ctypes.Structure): + _fields_ = [("dwSize", _COORD)] + + +class ConsoleEvent(ctypes.Union): + _fields_ = [ + ("KeyEvent", KeyEvent), + ("WindowsBufferSizeEvent", WindowsBufferSizeEvent), + ] + + +class INPUT_RECORD(Structure): + _fields_ = [("EventType", WORD), ("Event", ConsoleEvent)] + + +KEY_EVENT = 0x01 +FOCUS_EVENT = 0x10 +MENU_EVENT = 0x08 +MOUSE_EVENT = 0x02 +WINDOW_BUFFER_SIZE_EVENT = 0x04 + +ENABLE_PROCESSED_INPUT = 0x0001 +ENABLE_LINE_INPUT = 0x0002 +ENABLE_ECHO_INPUT = 0x0004 +ENABLE_MOUSE_INPUT = 0x0010 +ENABLE_INSERT_MODE = 0x0020 +ENABLE_VIRTUAL_TERMINAL_INPUT = 0x0200 + +ENABLE_PROCESSED_OUTPUT = 0x01 +ENABLE_WRAP_AT_EOL_OUTPUT = 0x02 +ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x04 + +STD_INPUT_HANDLE = -10 +STD_OUTPUT_HANDLE = -11 + +if sys.platform == "win32": + _KERNEL32 = WinDLL("kernel32", use_last_error=True) + + GetStdHandle = windll.kernel32.GetStdHandle + GetStdHandle.argtypes = [DWORD] + GetStdHandle.restype = HANDLE + + GetConsoleScreenBufferInfo = _KERNEL32.GetConsoleScreenBufferInfo + GetConsoleScreenBufferInfo.argtypes = [ + HANDLE, + ctypes.POINTER(CONSOLE_SCREEN_BUFFER_INFO), + ] + GetConsoleScreenBufferInfo.restype = BOOL + + ScrollConsoleScreenBuffer = _KERNEL32.ScrollConsoleScreenBufferW + ScrollConsoleScreenBuffer.argtypes = [ + HANDLE, + POINTER(SMALL_RECT), + POINTER(SMALL_RECT), + _COORD, + POINTER(CHAR_INFO), + ] + ScrollConsoleScreenBuffer.restype = BOOL + + GetConsoleMode = _KERNEL32.GetConsoleMode + GetConsoleMode.argtypes = [HANDLE, POINTER(DWORD)] + GetConsoleMode.restype = BOOL + + SetConsoleMode = _KERNEL32.SetConsoleMode + SetConsoleMode.argtypes = [HANDLE, DWORD] + SetConsoleMode.restype = BOOL + + ReadConsoleInput = _KERNEL32.ReadConsoleInputW + ReadConsoleInput.argtypes = [HANDLE, POINTER(INPUT_RECORD), DWORD, POINTER(DWORD)] + ReadConsoleInput.restype = BOOL + + + FlushConsoleInputBuffer = _KERNEL32.FlushConsoleInputBuffer + FlushConsoleInputBuffer.argtypes = [HANDLE] + FlushConsoleInputBuffer.restype = BOOL + + WaitForSingleObject = _KERNEL32.WaitForSingleObject + WaitForSingleObject.argtypes = [HANDLE, DWORD] + WaitForSingleObject.restype = DWORD + + OutHandle = GetStdHandle(STD_OUTPUT_HANDLE) + InHandle = GetStdHandle(STD_INPUT_HANDLE) +else: + + def _win_only(*args, **kwargs): + raise NotImplementedError("Windows only") + + GetStdHandle = _win_only + GetConsoleScreenBufferInfo = _win_only + ScrollConsoleScreenBuffer = _win_only + GetConsoleMode = _win_only + SetConsoleMode = _win_only + ReadConsoleInput = _win_only + FlushConsoleInputBuffer = _win_only + WaitForSingleObject = _win_only + OutHandle = 0 + InHandle = 0 diff --git a/Lib/_pyrepl/windows_eventqueue.py b/Lib/_pyrepl/windows_eventqueue.py new file mode 100644 index 00000000000..d99722f9a16 --- /dev/null +++ b/Lib/_pyrepl/windows_eventqueue.py @@ -0,0 +1,42 @@ +""" +Windows event and VT sequence scanner +""" + +from .base_eventqueue import BaseEventQueue + + +# Reference: https://learn.microsoft.com/en-us/windows/console/console-virtual-terminal-sequences#input-sequences +VT_MAP: dict[bytes, str] = { + b'\x1b[A': 'up', + b'\x1b[B': 'down', + b'\x1b[C': 'right', + b'\x1b[D': 'left', + b'\x1b[1;5D': 'ctrl left', + b'\x1b[1;5C': 'ctrl right', + + b'\x1b[H': 'home', + b'\x1b[F': 'end', + + b'\x7f': 'backspace', + b'\x1b[2~': 'insert', + b'\x1b[3~': 'delete', + b'\x1b[5~': 'page up', + b'\x1b[6~': 'page down', + + b'\x1bOP': 'f1', + b'\x1bOQ': 'f2', + b'\x1bOR': 'f3', + b'\x1bOS': 'f4', + b'\x1b[15~': 'f5', + b'\x1b[17~': 'f6', + b'\x1b[18~': 'f7', + b'\x1b[19~': 'f8', + b'\x1b[20~': 'f9', + b'\x1b[21~': 'f10', + b'\x1b[23~': 'f11', + b'\x1b[24~': 'f12', +} + +class EventQueue(BaseEventQueue): + def __init__(self, encoding: str) -> None: + BaseEventQueue.__init__(self, encoding, VT_MAP) diff --git a/Lib/_strptime.py b/Lib/_strptime.py index 798cf9f9d3f..fc7e369c3d1 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -10,10 +10,13 @@ strptime -- Calculates the time struct represented by the passed-in string """ +import os import time import locale import calendar +import re from re import compile as re_compile +from re import sub as re_sub from re import IGNORECASE from re import escape as re_escape from datetime import (date as datetime_date, @@ -27,6 +30,41 @@ def _getlang(): # Figure out what the current language is set to. return locale.getlocale(locale.LC_TIME) +def _findall(haystack, needle): + # Find all positions of needle in haystack. + if not needle: + return + i = 0 + while True: + i = haystack.find(needle, i) + if i < 0: + break + yield i + i += len(needle) + +def _fixmonths(months): + yield from months + # The lower case of 'İ' ('\u0130') is 'i\u0307'. + # The re module only supports 1-to-1 character matching in + # case-insensitive mode. + for s in months: + if 'i\u0307' in s: + yield s.replace('i\u0307', '\u0130') + +lzh_TW_alt_digits = ( + # 〇:一:二:三:四:五:六:七:八:九 + '\u3007', '\u4e00', '\u4e8c', '\u4e09', '\u56db', + '\u4e94', '\u516d', '\u4e03', '\u516b', '\u4e5d', + # 十:十一:十二:十三:十四:十五:十六:十七:十八:十九 + '\u5341', '\u5341\u4e00', '\u5341\u4e8c', '\u5341\u4e09', '\u5341\u56db', + '\u5341\u4e94', '\u5341\u516d', '\u5341\u4e03', '\u5341\u516b', '\u5341\u4e5d', + # 廿:廿一:廿二:廿三:廿四:廿五:廿六:廿七:廿八:廿九 + '\u5eff', '\u5eff\u4e00', '\u5eff\u4e8c', '\u5eff\u4e09', '\u5eff\u56db', + '\u5eff\u4e94', '\u5eff\u516d', '\u5eff\u4e03', '\u5eff\u516b', '\u5eff\u4e5d', + # 卅:卅一 + '\u5345', '\u5345\u4e00') + + class LocaleTime(object): """Stores and handles locale-specific information related to time. @@ -70,6 +108,7 @@ def __init__(self): self.__calc_weekday() self.__calc_month() self.__calc_am_pm() + self.__calc_alt_digits() self.__calc_timezone() self.__calc_date_time() if _getlang() != self.lang: @@ -101,53 +140,184 @@ def __calc_am_pm(self): am_pm = [] for hour in (1, 22): time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0)) - am_pm.append(time.strftime("%p", time_tuple).lower()) + # br_FR has AM/PM info (' ',' '). + am_pm.append(time.strftime("%p", time_tuple).lower().strip()) self.am_pm = am_pm + def __calc_alt_digits(self): + # Set self.LC_alt_digits by using time.strftime(). + + # The magic data should contain all decimal digits. + time_tuple = time.struct_time((1998, 1, 27, 10, 43, 56, 1, 27, 0)) + s = time.strftime("%x%X", time_tuple) + if s.isascii(): + # Fast path -- all digits are ASCII. + self.LC_alt_digits = () + return + + digits = ''.join(sorted(set(re.findall(r'\d', s)))) + if len(digits) == 10 and ord(digits[-1]) == ord(digits[0]) + 9: + # All 10 decimal digits from the same set. + if digits.isascii(): + # All digits are ASCII. + self.LC_alt_digits = () + return + + self.LC_alt_digits = [a + b for a in digits for b in digits] + # Test whether the numbers contain leading zero. + time_tuple2 = time.struct_time((2000, 1, 1, 1, 1, 1, 5, 1, 0)) + if self.LC_alt_digits[1] not in time.strftime("%x %X", time_tuple2): + self.LC_alt_digits[:10] = digits + return + + # Either non-Gregorian calendar or non-decimal numbers. + if {'\u4e00', '\u4e03', '\u4e5d', '\u5341', '\u5eff'}.issubset(s): + # lzh_TW + self.LC_alt_digits = lzh_TW_alt_digits + return + + self.LC_alt_digits = None + def __calc_date_time(self): - # Set self.date_time, self.date, & self.time by using - # time.strftime(). + # Set self.LC_date_time, self.LC_date, self.LC_time and + # self.LC_time_ampm by using time.strftime(). # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of # overloaded numbers is minimized. The order in which searches for # values within the format string is very important; it eliminates # possible ambiguity for what something represents. time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0)) - date_time = [None, None, None] - date_time[0] = time.strftime("%c", time_tuple).lower() - date_time[1] = time.strftime("%x", time_tuple).lower() - date_time[2] = time.strftime("%X", time_tuple).lower() - replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'), - (self.f_month[3], '%B'), (self.a_weekday[2], '%a'), - (self.a_month[3], '%b'), (self.am_pm[1], '%p'), - ('1999', '%Y'), ('99', '%y'), ('22', '%H'), - ('44', '%M'), ('55', '%S'), ('76', '%j'), - ('17', '%d'), ('03', '%m'), ('3', '%m'), - # '3' needed for when no leading zero. - ('2', '%w'), ('10', '%I')] - replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone - for tz in tz_values]) - for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')): - current_format = date_time[offset] - for old, new in replacement_pairs: + time_tuple2 = time.struct_time((1999,1,3,1,1,1,6,3,0)) + replacement_pairs = [] + + # Non-ASCII digits + if self.LC_alt_digits or self.LC_alt_digits is None: + for n, d in [(19, '%OC'), (99, '%Oy'), (22, '%OH'), + (44, '%OM'), (55, '%OS'), (17, '%Od'), + (3, '%Om'), (2, '%Ow'), (10, '%OI')]: + if self.LC_alt_digits is None: + s = chr(0x660 + n // 10) + chr(0x660 + n % 10) + replacement_pairs.append((s, d)) + if n < 10: + replacement_pairs.append((s[1], d)) + elif len(self.LC_alt_digits) > n: + replacement_pairs.append((self.LC_alt_digits[n], d)) + else: + replacement_pairs.append((time.strftime(d, time_tuple), d)) + replacement_pairs += [ + ('1999', '%Y'), ('99', '%y'), ('22', '%H'), + ('44', '%M'), ('55', '%S'), ('76', '%j'), + ('17', '%d'), ('03', '%m'), ('3', '%m'), + # '3' needed for when no leading zero. + ('2', '%w'), ('10', '%I'), + ] + + date_time = [] + for directive in ('%c', '%x', '%X', '%r'): + current_format = time.strftime(directive, time_tuple).lower() + current_format = current_format.replace('%', '%%') + # The month and the day of the week formats are treated specially + # because of a possible ambiguity in some locales where the full + # and abbreviated names are equal or names of different types + # are equal. See doc of __find_month_format for more details. + lst, fmt = self.__find_weekday_format(directive) + if lst: + current_format = current_format.replace(lst[2], fmt, 1) + lst, fmt = self.__find_month_format(directive) + if lst: + current_format = current_format.replace(lst[3], fmt, 1) + if self.am_pm[1]: # Must deal with possible lack of locale info # manifesting itself as the empty string (e.g., Swedish's # lack of AM/PM info) or a platform returning a tuple of empty # strings (e.g., MacOS 9 having timezone as ('','')). - if old: - current_format = current_format.replace(old, new) + current_format = current_format.replace(self.am_pm[1], '%p') + for tz_values in self.timezone: + for tz in tz_values: + if tz: + current_format = current_format.replace(tz, "%Z") + # Transform all non-ASCII digits to digits in range U+0660 to U+0669. + if not current_format.isascii() and self.LC_alt_digits is None: + current_format = re_sub(r'\d(?3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", 'f': r"(?P[0-9]{1,6})", - 'H': r"(?P2[0-3]|[0-1]\d|\d)", - 'I': r"(?P1[0-2]|0[1-9]|[1-9])", + 'H': r"(?P2[0-3]|[0-1]\d|\d| \d)", + 'k': r"(?P2[0-3]|[0-1]\d|\d| \d)", + 'I': r"(?P1[0-2]|0[1-9]|[1-9]| [1-9])", + 'l': r"(?P1[0-2]|0[1-9]|[1-9]| [1-9])", 'G': r"(?P\d\d\d\d)", 'j': r"(?P36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])", 'm': r"(?P1[0-2]|0[1-9]|[1-9])", @@ -198,25 +370,60 @@ def __init__(self, locale_time=None): 'V': r"(?P5[0-3]|0[1-9]|[1-4]\d|\d)", # W is set below by using 'U' 'y': r"(?P\d\d)", - #XXX: Does 'Y' need to worry about having less or more than - # 4 digits? 'Y': r"(?P\d\d\d\d)", 'z': r"(?P[+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?|(?-i:Z))", 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), 'a': self.__seqToRE(self.locale_time.a_weekday, 'a'), - 'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'), - 'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'), + 'B': self.__seqToRE(_fixmonths(self.locale_time.f_month[1:]), 'B'), + 'b': self.__seqToRE(_fixmonths(self.locale_time.a_month[1:]), 'b'), 'p': self.__seqToRE(self.locale_time.am_pm, 'p'), 'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone for tz in tz_names), 'Z'), - '%': '%'}) - base.__setitem__('W', base.__getitem__('U').replace('U', 'W')) - base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) - base.__setitem__('x', self.pattern(self.locale_time.LC_date)) + '%': '%'} + if self.locale_time.LC_alt_digits is None: + for d in 'dmyCHIMS': + mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d + mapping['Ow'] = r'(?P\d)' + else: + mapping.update({ + 'Od': self.__seqToRE(self.locale_time.LC_alt_digits[1:32], 'd', + '3[0-1]|[1-2][0-9]|0[1-9]|[1-9]'), + 'Om': self.__seqToRE(self.locale_time.LC_alt_digits[1:13], 'm', + '1[0-2]|0[1-9]|[1-9]'), + 'Ow': self.__seqToRE(self.locale_time.LC_alt_digits[:7], 'w', + '[0-6]'), + 'Oy': self.__seqToRE(self.locale_time.LC_alt_digits, 'y', + '[0-9][0-9]'), + 'OC': self.__seqToRE(self.locale_time.LC_alt_digits, 'C', + '[0-9][0-9]'), + 'OH': self.__seqToRE(self.locale_time.LC_alt_digits[:24], 'H', + '2[0-3]|[0-1][0-9]|[0-9]'), + 'OI': self.__seqToRE(self.locale_time.LC_alt_digits[1:13], 'I', + '1[0-2]|0[1-9]|[1-9]'), + 'OM': self.__seqToRE(self.locale_time.LC_alt_digits[:60], 'M', + '[0-5][0-9]|[0-9]'), + 'OS': self.__seqToRE(self.locale_time.LC_alt_digits[:62], 'S', + '6[0-1]|[0-5][0-9]|[0-9]'), + }) + mapping.update({ + 'e': mapping['d'], + 'Oe': mapping['Od'], + 'P': mapping['p'], + 'Op': mapping['p'], + 'W': mapping['U'].replace('U', 'W'), + }) + mapping['W'] = mapping['U'].replace('U', 'W') + + base.__init__(mapping) + base.__setitem__('T', self.pattern('%H:%M:%S')) + base.__setitem__('R', self.pattern('%H:%M')) + base.__setitem__('r', self.pattern(self.locale_time.LC_time_ampm)) base.__setitem__('X', self.pattern(self.locale_time.LC_time)) + base.__setitem__('x', self.pattern(self.locale_time.LC_date)) + base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) - def __seqToRE(self, to_convert, directive): + def __seqToRE(self, to_convert, directive, altregex=None): """Convert a list to a regex string for matching a directive. Want possible matching values to be from longest to shortest. This @@ -232,8 +439,9 @@ def __seqToRE(self, to_convert, directive): else: return '' regex = '|'.join(re_escape(stuff) for stuff in to_convert) - regex = '(?P<%s>%s' % (directive, regex) - return '%s)' % regex + if altregex is not None: + regex += '|' + altregex + return '(?P<%s>%s)' % (directive, regex) def pattern(self, format): """Return regex pattern for the format string. @@ -242,21 +450,36 @@ def pattern(self, format): regex syntax are escaped. """ - processed_format = '' # The sub() call escapes all characters that might be misconstrued # as regex syntax. Cannot use re.escape since we have to deal with # format directives (%m, etc.). - regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])") - format = regex_chars.sub(r"\\\1", format) - whitespace_replacement = re_compile(r'\s+') - format = whitespace_replacement.sub(r'\\s+', format) - while '%' in format: - directive_index = format.index('%')+1 - processed_format = "%s%s%s" % (processed_format, - format[:directive_index-1], - self[format[directive_index]]) - format = format[directive_index+1:] - return "%s%s" % (processed_format, format) + format = re_sub(r"([\\.^$*+?\(\){}\[\]|])", r"\\\1", format) + format = re_sub(r'\s+', r'\\s+', format) + format = re_sub(r"'", "['\u02bc]", format) # needed for br_FR + year_in_format = False + day_of_month_in_format = False + def repl(m): + format_char = m[1] + match format_char: + case 'Y' | 'y' | 'G': + nonlocal year_in_format + year_in_format = True + case 'd': + nonlocal day_of_month_in_format + day_of_month_in_format = True + return self[format_char] + format = re_sub(r'%[-_0^#]*[0-9]*([OE]?\\?.?)', repl, format) + if day_of_month_in_format and not year_in_format: + import warnings + warnings.warn("""\ +Parsing dates involving a day of month without a year specified is ambiguous +and fails to parse leap day. The default behavior will change in Python 3.15 +to either always raise an exception or to use a different default year (TBD). +To avoid trouble, add a specific year to the input & format. +See https://github.com/python/cpython/issues/70647.""", + DeprecationWarning, + skip_file_prefixes=(os.path.dirname(__file__),)) + return format def compile(self, format): """Return a compiled re object for the format string.""" @@ -319,14 +542,13 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): # \\, in which case it was a stray % but with a space after it except KeyError as err: bad_directive = err.args[0] - if bad_directive == "\\": - bad_directive = "%" del err + bad_directive = bad_directive.replace('\\s', '') + if not bad_directive: + raise ValueError("stray %% in format '%s'" % format) from None + bad_directive = bad_directive.replace('\\', '', 1) raise ValueError("'%s' is a bad directive in format '%s'" % (bad_directive, format)) from None - # IndexError only occurs when the format string is "%" - except IndexError: - raise ValueError("stray %% in format '%s'" % format) from None _regex_cache[format] = format_regex found = format_regex.match(data_string) if not found: @@ -348,6 +570,15 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): # values weekday = julian = None found_dict = found.groupdict() + if locale_time.LC_alt_digits: + def parse_int(s): + try: + return locale_time.LC_alt_digits.index(s) + except ValueError: + return int(s) + else: + parse_int = int + for group_key in found_dict.keys(): # Directives not explicitly handled below: # c, x, X @@ -355,30 +586,34 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): # U, W # worthless without day of the week if group_key == 'y': - year = int(found_dict['y']) - # Open Group specification for strptime() states that a %y - #value in the range of [00, 68] is in the century 2000, while - #[69,99] is in the century 1900 - if year <= 68: - year += 2000 + year = parse_int(found_dict['y']) + if 'C' in found_dict: + century = parse_int(found_dict['C']) + year += century * 100 else: - year += 1900 + # Open Group specification for strptime() states that a %y + #value in the range of [00, 68] is in the century 2000, while + #[69,99] is in the century 1900 + if year <= 68: + year += 2000 + else: + year += 1900 elif group_key == 'Y': year = int(found_dict['Y']) elif group_key == 'G': iso_year = int(found_dict['G']) elif group_key == 'm': - month = int(found_dict['m']) + month = parse_int(found_dict['m']) elif group_key == 'B': month = locale_time.f_month.index(found_dict['B'].lower()) elif group_key == 'b': month = locale_time.a_month.index(found_dict['b'].lower()) elif group_key == 'd': - day = int(found_dict['d']) + day = parse_int(found_dict['d']) elif group_key == 'H': - hour = int(found_dict['H']) + hour = parse_int(found_dict['H']) elif group_key == 'I': - hour = int(found_dict['I']) + hour = parse_int(found_dict['I']) ampm = found_dict.get('p', '').lower() # If there was no AM/PM indicator, we'll treat this like AM if ampm in ('', locale_time.am_pm[0]): @@ -394,9 +629,9 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): if hour != 12: hour += 12 elif group_key == 'M': - minute = int(found_dict['M']) + minute = parse_int(found_dict['M']) elif group_key == 'S': - second = int(found_dict['S']) + second = parse_int(found_dict['S']) elif group_key == 'f': s = found_dict['f'] # Pad to always return microseconds. @@ -548,18 +783,40 @@ def _strptime_time(data_string, format="%a %b %d %H:%M:%S %Y"): tt = _strptime(data_string, format)[0] return time.struct_time(tt[:time._STRUCT_TM_ITEMS]) -def _strptime_datetime(cls, data_string, format="%a %b %d %H:%M:%S %Y"): - """Return a class cls instance based on the input string and the +def _strptime_datetime_date(cls, data_string, format="%a %b %d %Y"): + """Return a date instance based on the input string and the + format string.""" + tt, _, _ = _strptime(data_string, format) + args = tt[:3] + return cls(*args) + +def _parse_tz(tzname, gmtoff, gmtoff_fraction): + tzdelta = datetime_timedelta(seconds=gmtoff, microseconds=gmtoff_fraction) + if tzname: + return datetime_timezone(tzdelta, tzname) + else: + return datetime_timezone(tzdelta) + +def _strptime_datetime_time(cls, data_string, format="%H:%M:%S"): + """Return a time instance based on the input string and the format string.""" tt, fraction, gmtoff_fraction = _strptime(data_string, format) tzname, gmtoff = tt[-2:] - args = tt[:6] + (fraction,) - if gmtoff is not None: - tzdelta = datetime_timedelta(seconds=gmtoff, microseconds=gmtoff_fraction) - if tzname: - tz = datetime_timezone(tzdelta, tzname) - else: - tz = datetime_timezone(tzdelta) - args += (tz,) + args = tt[3:6] + (fraction,) + if gmtoff is None: + return cls(*args) + else: + tz = _parse_tz(tzname, gmtoff, gmtoff_fraction) + return cls(*args, tz) - return cls(*args) +def _strptime_datetime_datetime(cls, data_string, format="%a %b %d %H:%M:%S %Y"): + """Return a datetime instance based on the input string and the + format string.""" + tt, fraction, gmtoff_fraction = _strptime(data_string, format) + tzname, gmtoff = tt[-2:] + args = tt[:6] + (fraction,) + if gmtoff is None: + return cls(*args) + else: + tz = _parse_tz(tzname, gmtoff, gmtoff_fraction) + return cls(*args, tz) diff --git a/Lib/_threading_local.py b/Lib/_threading_local.py index e5204339988..0b9e5d3bbf6 100644 --- a/Lib/_threading_local.py +++ b/Lib/_threading_local.py @@ -4,133 +4,6 @@ class. Depending on the version of Python you're using, there may be a faster one available. You should always import the `local` class from `threading`.) - -Thread-local objects support the management of thread-local data. -If you have data that you want to be local to a thread, simply create -a thread-local object and use its attributes: - - >>> mydata = local() - >>> mydata.number = 42 - >>> mydata.number - 42 - -You can also access the local-object's dictionary: - - >>> mydata.__dict__ - {'number': 42} - >>> mydata.__dict__.setdefault('widgets', []) - [] - >>> mydata.widgets - [] - -What's important about thread-local objects is that their data are -local to a thread. If we access the data in a different thread: - - >>> log = [] - >>> def f(): - ... items = sorted(mydata.__dict__.items()) - ... log.append(items) - ... mydata.number = 11 - ... log.append(mydata.number) - - >>> import threading - >>> thread = threading.Thread(target=f) - >>> thread.start() - >>> thread.join() - >>> log - [[], 11] - -we get different data. Furthermore, changes made in the other thread -don't affect data seen in this thread: - - >>> mydata.number - 42 - -Of course, values you get from a local object, including a __dict__ -attribute, are for whatever thread was current at the time the -attribute was read. For that reason, you generally don't want to save -these values across threads, as they apply only to the thread they -came from. - -You can create custom local objects by subclassing the local class: - - >>> class MyLocal(local): - ... number = 2 - ... initialized = False - ... def __init__(self, **kw): - ... if self.initialized: - ... raise SystemError('__init__ called too many times') - ... self.initialized = True - ... self.__dict__.update(kw) - ... def squared(self): - ... return self.number ** 2 - -This can be useful to support default values, methods and -initialization. Note that if you define an __init__ method, it will be -called each time the local object is used in a separate thread. This -is necessary to initialize each thread's dictionary. - -Now if we create a local object: - - >>> mydata = MyLocal(color='red') - -Now we have a default number: - - >>> mydata.number - 2 - -an initial color: - - >>> mydata.color - 'red' - >>> del mydata.color - -And a method that operates on the data: - - >>> mydata.squared() - 4 - -As before, we can access the data in a separate thread: - - >>> log = [] - >>> thread = threading.Thread(target=f) - >>> thread.start() - >>> thread.join() - >>> log - [[('color', 'red'), ('initialized', True)], 11] - -without affecting this thread's data: - - >>> mydata.number - 2 - >>> mydata.color - Traceback (most recent call last): - ... - AttributeError: 'MyLocal' object has no attribute 'color' - -Note that subclasses can define slots, but they are not thread -local. They are shared across threads: - - >>> class MyLocal(local): - ... __slots__ = 'number' - - >>> mydata = MyLocal() - >>> mydata.number = 42 - >>> mydata.color = 'red' - -So, the separate thread: - - >>> thread = threading.Thread(target=f) - >>> thread.start() - >>> thread.join() - -affects what we see: - - >>> # TODO: RUSTPYTHON, __slots__ - >>> mydata.number #doctest: +SKIP - 11 - ->>> del mydata """ from weakref import ref @@ -194,7 +67,6 @@ def thread_deleted(_, idt=idt): @contextmanager def _patch(self): - old = object.__getattribute__(self, '__dict__') impl = object.__getattribute__(self, '_local__impl') try: dct = impl.get_dict() @@ -205,13 +77,12 @@ def _patch(self): with impl.locallock: object.__setattr__(self, '__dict__', dct) yield - object.__setattr__(self, '__dict__', old) class local: __slots__ = '_local__impl', '__dict__' - def __new__(cls, *args, **kw): + def __new__(cls, /, *args, **kw): if (args or kw) and (cls.__init__ is object.__init__): raise TypeError("Initialization arguments are not supported") self = object.__new__(cls) diff --git a/Lib/_weakrefset.py b/Lib/_weakrefset.py index 2a27684324d..d1c7fcaeec9 100644 --- a/Lib/_weakrefset.py +++ b/Lib/_weakrefset.py @@ -8,69 +8,29 @@ __all__ = ['WeakSet'] -class _IterationGuard: - # This context manager registers itself in the current iterators of the - # weak container, such as to delay all removals until the context manager - # exits. - # This technique should be relatively thread-safe (since sets are). - - def __init__(self, weakcontainer): - # Don't create cycles - self.weakcontainer = ref(weakcontainer) - - def __enter__(self): - w = self.weakcontainer() - if w is not None: - w._iterating.add(self) - return self - - def __exit__(self, e, t, b): - w = self.weakcontainer() - if w is not None: - s = w._iterating - s.remove(self) - if not s: - w._commit_removals() - - class WeakSet: def __init__(self, data=None): self.data = set() + def _remove(item, selfref=ref(self)): self = selfref() if self is not None: - if self._iterating: - self._pending_removals.append(item) - else: - self.data.discard(item) + self.data.discard(item) + self._remove = _remove - # A list of keys to be removed - self._pending_removals = [] - self._iterating = set() if data is not None: self.update(data) - def _commit_removals(self): - pop = self._pending_removals.pop - discard = self.data.discard - while True: - try: - item = pop() - except IndexError: - return - discard(item) - def __iter__(self): - with _IterationGuard(self): - for itemref in self.data: - item = itemref() - if item is not None: - # Caveat: the iterator will keep a strong reference to - # `item` until it is resumed or closed. - yield item + for itemref in self.data.copy(): + item = itemref() + if item is not None: + # Caveat: the iterator will keep a strong reference to + # `item` until it is resumed or closed. + yield item def __len__(self): - return len(self.data) - len(self._pending_removals) + return len(self.data) def __contains__(self, item): try: @@ -80,25 +40,18 @@ def __contains__(self, item): return wr in self.data def __reduce__(self): - return (self.__class__, (list(self),), - getattr(self, '__dict__', None)) + return self.__class__, (list(self),), self.__getstate__() def add(self, item): - if self._pending_removals: - self._commit_removals() self.data.add(ref(item, self._remove)) def clear(self): - if self._pending_removals: - self._commit_removals() self.data.clear() def copy(self): return self.__class__(self) def pop(self): - if self._pending_removals: - self._commit_removals() while True: try: itemref = self.data.pop() @@ -109,18 +62,12 @@ def pop(self): return item def remove(self, item): - if self._pending_removals: - self._commit_removals() self.data.remove(ref(item)) def discard(self, item): - if self._pending_removals: - self._commit_removals() self.data.discard(ref(item)) def update(self, other): - if self._pending_removals: - self._commit_removals() for element in other: self.add(element) @@ -137,8 +84,6 @@ def difference(self, other): def difference_update(self, other): self.__isub__(other) def __isub__(self, other): - if self._pending_removals: - self._commit_removals() if self is other: self.data.clear() else: @@ -152,8 +97,6 @@ def intersection(self, other): def intersection_update(self, other): self.__iand__(other) def __iand__(self, other): - if self._pending_removals: - self._commit_removals() self.data.intersection_update(ref(item) for item in other) return self @@ -185,8 +128,6 @@ def symmetric_difference(self, other): def symmetric_difference_update(self, other): self.__ixor__(other) def __ixor__(self, other): - if self._pending_removals: - self._commit_removals() if self is other: self.data.clear() else: diff --git a/Lib/abc.py b/Lib/abc.py index 1ecff5e2146..f8a4e11ce9c 100644 --- a/Lib/abc.py +++ b/Lib/abc.py @@ -85,10 +85,6 @@ def my_abstract_property(self): from _abc import (get_cache_token, _abc_init, _abc_register, _abc_instancecheck, _abc_subclasscheck, _get_dump, _reset_registry, _reset_caches) -# TODO: RUSTPYTHON missing _abc module implementation. -except ModuleNotFoundError: - from _py_abc import ABCMeta, get_cache_token - ABCMeta.__module__ = 'abc' except ImportError: from _py_abc import ABCMeta, get_cache_token ABCMeta.__module__ = 'abc' diff --git a/Lib/aifc.py b/Lib/aifc.py deleted file mode 100644 index 5254987e22b..00000000000 --- a/Lib/aifc.py +++ /dev/null @@ -1,984 +0,0 @@ -"""Stuff to parse AIFF-C and AIFF files. - -Unless explicitly stated otherwise, the description below is true -both for AIFF-C files and AIFF files. - -An AIFF-C file has the following structure. - - +-----------------+ - | FORM | - +-----------------+ - | | - +----+------------+ - | | AIFC | - | +------------+ - | | | - | | . | - | | . | - | | . | - +----+------------+ - -An AIFF file has the string "AIFF" instead of "AIFC". - -A chunk consists of an identifier (4 bytes) followed by a size (4 bytes, -big endian order), followed by the data. The size field does not include -the size of the 8 byte header. - -The following chunk types are recognized. - - FVER - (AIFF-C only). - MARK - <# of markers> (2 bytes) - list of markers: - (2 bytes, must be > 0) - (4 bytes) - ("pstring") - COMM - <# of channels> (2 bytes) - <# of sound frames> (4 bytes) - (2 bytes) - (10 bytes, IEEE 80-bit extended - floating point) - in AIFF-C files only: - (4 bytes) - ("pstring") - SSND - (4 bytes, not used by this program) - (4 bytes, not used by this program) - - -A pstring consists of 1 byte length, a string of characters, and 0 or 1 -byte pad to make the total length even. - -Usage. - -Reading AIFF files: - f = aifc.open(file, 'r') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods read(), seek(), and close(). -In some types of audio files, if the setpos() method is not used, -the seek() method is not necessary. - -This returns an instance of a class with the following public methods: - getnchannels() -- returns number of audio channels (1 for - mono, 2 for stereo) - getsampwidth() -- returns sample width in bytes - getframerate() -- returns sampling frequency - getnframes() -- returns number of audio frames - getcomptype() -- returns compression type ('NONE' for AIFF files) - getcompname() -- returns human-readable version of - compression type ('not compressed' for AIFF files) - getparams() -- returns a namedtuple consisting of all of the - above in the above order - getmarkers() -- get the list of marks in the audio file or None - if there are no marks - getmark(id) -- get mark with the specified id (raises an error - if the mark does not exist) - readframes(n) -- returns at most n frames of audio - rewind() -- rewind to the beginning of the audio stream - setpos(pos) -- seek to the specified position - tell() -- return the current position - close() -- close the instance (make it unusable) -The position returned by tell(), the position given to setpos() and -the position of marks are all compatible and have nothing to do with -the actual position in the file. -The close() method is called automatically when the class instance -is destroyed. - -Writing AIFF files: - f = aifc.open(file, 'w') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods write(), tell(), seek(), and -close(). - -This returns an instance of a class with the following public methods: - aiff() -- create an AIFF file (AIFF-C default) - aifc() -- create an AIFF-C file - setnchannels(n) -- set the number of channels - setsampwidth(n) -- set the sample width - setframerate(n) -- set the frame rate - setnframes(n) -- set the number of frames - setcomptype(type, name) - -- set the compression type and the - human-readable compression type - setparams(tuple) - -- set all parameters at once - setmark(id, pos, name) - -- add specified mark to the list of marks - tell() -- return current position in output file (useful - in combination with setmark()) - writeframesraw(data) - -- write audio frames without pathing up the - file header - writeframes(data) - -- write audio frames and patch up the file header - close() -- patch up the file header and close the - output file -You should set the parameters before the first writeframesraw or -writeframes. The total number of frames does not need to be set, -but when it is set to the correct value, the header does not have to -be patched up. -It is best to first set all parameters, perhaps possibly the -compression type, and then write audio frames using writeframesraw. -When all frames have been written, either call writeframes(b'') or -close() to patch up the sizes in the header. -Marks can be added anytime. If there are any marks, you must call -close() after all frames have been written. -The close() method is called automatically when the class instance -is destroyed. - -When a file is opened with the extension '.aiff', an AIFF file is -written, otherwise an AIFF-C file is written. This default can be -changed by calling aiff() or aifc() before the first writeframes or -writeframesraw. -""" - -import struct -import builtins -import warnings - -__all__ = ["Error", "open"] - - -warnings._deprecated(__name__, remove=(3, 13)) - - -class Error(Exception): - pass - -_AIFC_version = 0xA2805140 # Version 1 of AIFF-C - -def _read_long(file): - try: - return struct.unpack('>l', file.read(4))[0] - except struct.error: - raise EOFError from None - -def _read_ulong(file): - try: - return struct.unpack('>L', file.read(4))[0] - except struct.error: - raise EOFError from None - -def _read_short(file): - try: - return struct.unpack('>h', file.read(2))[0] - except struct.error: - raise EOFError from None - -def _read_ushort(file): - try: - return struct.unpack('>H', file.read(2))[0] - except struct.error: - raise EOFError from None - -def _read_string(file): - length = ord(file.read(1)) - if length == 0: - data = b'' - else: - data = file.read(length) - if length & 1 == 0: - dummy = file.read(1) - return data - -_HUGE_VAL = 1.79769313486231e+308 # See - -def _read_float(f): # 10 bytes - expon = _read_short(f) # 2 bytes - sign = 1 - if expon < 0: - sign = -1 - expon = expon + 0x8000 - himant = _read_ulong(f) # 4 bytes - lomant = _read_ulong(f) # 4 bytes - if expon == himant == lomant == 0: - f = 0.0 - elif expon == 0x7FFF: - f = _HUGE_VAL - else: - expon = expon - 16383 - f = (himant * 0x100000000 + lomant) * pow(2.0, expon - 63) - return sign * f - -def _write_short(f, x): - f.write(struct.pack('>h', x)) - -def _write_ushort(f, x): - f.write(struct.pack('>H', x)) - -def _write_long(f, x): - f.write(struct.pack('>l', x)) - -def _write_ulong(f, x): - f.write(struct.pack('>L', x)) - -def _write_string(f, s): - if len(s) > 255: - raise ValueError("string exceeds maximum pstring length") - f.write(struct.pack('B', len(s))) - f.write(s) - if len(s) & 1 == 0: - f.write(b'\x00') - -def _write_float(f, x): - import math - if x < 0: - sign = 0x8000 - x = x * -1 - else: - sign = 0 - if x == 0: - expon = 0 - himant = 0 - lomant = 0 - else: - fmant, expon = math.frexp(x) - if expon > 16384 or fmant >= 1 or fmant != fmant: # Infinity or NaN - expon = sign|0x7FFF - himant = 0 - lomant = 0 - else: # Finite - expon = expon + 16382 - if expon < 0: # denormalized - fmant = math.ldexp(fmant, expon) - expon = 0 - expon = expon | sign - fmant = math.ldexp(fmant, 32) - fsmant = math.floor(fmant) - himant = int(fsmant) - fmant = math.ldexp(fmant - fsmant, 32) - fsmant = math.floor(fmant) - lomant = int(fsmant) - _write_ushort(f, expon) - _write_ulong(f, himant) - _write_ulong(f, lomant) - -with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - from chunk import Chunk -from collections import namedtuple - -_aifc_params = namedtuple('_aifc_params', - 'nchannels sampwidth framerate nframes comptype compname') - -_aifc_params.nchannels.__doc__ = 'Number of audio channels (1 for mono, 2 for stereo)' -_aifc_params.sampwidth.__doc__ = 'Sample width in bytes' -_aifc_params.framerate.__doc__ = 'Sampling frequency' -_aifc_params.nframes.__doc__ = 'Number of audio frames' -_aifc_params.comptype.__doc__ = 'Compression type ("NONE" for AIFF files)' -_aifc_params.compname.__doc__ = ("""\ -A human-readable version of the compression type -('not compressed' for AIFF files)""") - - -class Aifc_read: - # Variables used in this class: - # - # These variables are available to the user though appropriate - # methods of this class: - # _file -- the open file with methods read(), close(), and seek() - # set through the __init__() method - # _nchannels -- the number of audio channels - # available through the getnchannels() method - # _nframes -- the number of audio frames - # available through the getnframes() method - # _sampwidth -- the number of bytes per audio sample - # available through the getsampwidth() method - # _framerate -- the sampling frequency - # available through the getframerate() method - # _comptype -- the AIFF-C compression type ('NONE' if AIFF) - # available through the getcomptype() method - # _compname -- the human-readable AIFF-C compression type - # available through the getcomptype() method - # _markers -- the marks in the audio file - # available through the getmarkers() and getmark() - # methods - # _soundpos -- the position in the audio stream - # available through the tell() method, set through the - # setpos() method - # - # These variables are used internally only: - # _version -- the AIFF-C version number - # _decomp -- the decompressor from builtin module cl - # _comm_chunk_read -- 1 iff the COMM chunk has been read - # _aifc -- 1 iff reading an AIFF-C file - # _ssnd_seek_needed -- 1 iff positioned correctly in audio - # file for readframes() - # _ssnd_chunk -- instantiation of a chunk class for the SSND chunk - # _framesize -- size of one frame in the file - - _file = None # Set here since __del__ checks it - - def initfp(self, file): - self._version = 0 - self._convert = None - self._markers = [] - self._soundpos = 0 - self._file = file - chunk = Chunk(file) - if chunk.getname() != b'FORM': - raise Error('file does not start with FORM id') - formdata = chunk.read(4) - if formdata == b'AIFF': - self._aifc = 0 - elif formdata == b'AIFC': - self._aifc = 1 - else: - raise Error('not an AIFF or AIFF-C file') - self._comm_chunk_read = 0 - self._ssnd_chunk = None - while 1: - self._ssnd_seek_needed = 1 - try: - chunk = Chunk(self._file) - except EOFError: - break - chunkname = chunk.getname() - if chunkname == b'COMM': - self._read_comm_chunk(chunk) - self._comm_chunk_read = 1 - elif chunkname == b'SSND': - self._ssnd_chunk = chunk - dummy = chunk.read(8) - self._ssnd_seek_needed = 0 - elif chunkname == b'FVER': - self._version = _read_ulong(chunk) - elif chunkname == b'MARK': - self._readmark(chunk) - chunk.skip() - if not self._comm_chunk_read or not self._ssnd_chunk: - raise Error('COMM chunk and/or SSND chunk missing') - - def __init__(self, f): - if isinstance(f, str): - file_object = builtins.open(f, 'rb') - try: - self.initfp(file_object) - except: - file_object.close() - raise - else: - # assume it is an open file object already - self.initfp(f) - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - # - # User visible methods. - # - def getfp(self): - return self._file - - def rewind(self): - self._ssnd_seek_needed = 1 - self._soundpos = 0 - - def close(self): - file = self._file - if file is not None: - self._file = None - file.close() - - def tell(self): - return self._soundpos - - def getnchannels(self): - return self._nchannels - - def getnframes(self): - return self._nframes - - def getsampwidth(self): - return self._sampwidth - - def getframerate(self): - return self._framerate - - def getcomptype(self): - return self._comptype - - def getcompname(self): - return self._compname - -## def getversion(self): -## return self._version - - def getparams(self): - return _aifc_params(self.getnchannels(), self.getsampwidth(), - self.getframerate(), self.getnframes(), - self.getcomptype(), self.getcompname()) - - def getmarkers(self): - if len(self._markers) == 0: - return None - return self._markers - - def getmark(self, id): - for marker in self._markers: - if id == marker[0]: - return marker - raise Error('marker {0!r} does not exist'.format(id)) - - def setpos(self, pos): - if pos < 0 or pos > self._nframes: - raise Error('position not in range') - self._soundpos = pos - self._ssnd_seek_needed = 1 - - def readframes(self, nframes): - if self._ssnd_seek_needed: - self._ssnd_chunk.seek(0) - dummy = self._ssnd_chunk.read(8) - pos = self._soundpos * self._framesize - if pos: - self._ssnd_chunk.seek(pos + 8) - self._ssnd_seek_needed = 0 - if nframes == 0: - return b'' - data = self._ssnd_chunk.read(nframes * self._framesize) - if self._convert and data: - data = self._convert(data) - self._soundpos = self._soundpos + len(data) // (self._nchannels - * self._sampwidth) - return data - - # - # Internal methods. - # - - def _alaw2lin(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.alaw2lin(data, 2) - - def _ulaw2lin(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.ulaw2lin(data, 2) - - def _adpcm2lin(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - if not hasattr(self, '_adpcmstate'): - # first time - self._adpcmstate = None - data, self._adpcmstate = audioop.adpcm2lin(data, 2, self._adpcmstate) - return data - - def _sowt2lin(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.byteswap(data, 2) - - def _read_comm_chunk(self, chunk): - self._nchannels = _read_short(chunk) - self._nframes = _read_long(chunk) - self._sampwidth = (_read_short(chunk) + 7) // 8 - self._framerate = int(_read_float(chunk)) - if self._sampwidth <= 0: - raise Error('bad sample width') - if self._nchannels <= 0: - raise Error('bad # of channels') - self._framesize = self._nchannels * self._sampwidth - if self._aifc: - #DEBUG: SGI's soundeditor produces a bad size :-( - kludge = 0 - if chunk.chunksize == 18: - kludge = 1 - warnings.warn('Warning: bad COMM chunk size') - chunk.chunksize = 23 - #DEBUG end - self._comptype = chunk.read(4) - #DEBUG start - if kludge: - length = ord(chunk.file.read(1)) - if length & 1 == 0: - length = length + 1 - chunk.chunksize = chunk.chunksize + length - chunk.file.seek(-1, 1) - #DEBUG end - self._compname = _read_string(chunk) - if self._comptype != b'NONE': - if self._comptype == b'G722': - self._convert = self._adpcm2lin - elif self._comptype in (b'ulaw', b'ULAW'): - self._convert = self._ulaw2lin - elif self._comptype in (b'alaw', b'ALAW'): - self._convert = self._alaw2lin - elif self._comptype in (b'sowt', b'SOWT'): - self._convert = self._sowt2lin - else: - raise Error('unsupported compression type') - self._sampwidth = 2 - else: - self._comptype = b'NONE' - self._compname = b'not compressed' - - def _readmark(self, chunk): - nmarkers = _read_short(chunk) - # Some files appear to contain invalid counts. - # Cope with this by testing for EOF. - try: - for i in range(nmarkers): - id = _read_short(chunk) - pos = _read_long(chunk) - name = _read_string(chunk) - if pos or name: - # some files appear to have - # dummy markers consisting of - # a position 0 and name '' - self._markers.append((id, pos, name)) - except EOFError: - w = ('Warning: MARK chunk contains only %s marker%s instead of %s' % - (len(self._markers), '' if len(self._markers) == 1 else 's', - nmarkers)) - warnings.warn(w) - -class Aifc_write: - # Variables used in this class: - # - # These variables are user settable through appropriate methods - # of this class: - # _file -- the open file with methods write(), close(), tell(), seek() - # set through the __init__() method - # _comptype -- the AIFF-C compression type ('NONE' in AIFF) - # set through the setcomptype() or setparams() method - # _compname -- the human-readable AIFF-C compression type - # set through the setcomptype() or setparams() method - # _nchannels -- the number of audio channels - # set through the setnchannels() or setparams() method - # _sampwidth -- the number of bytes per audio sample - # set through the setsampwidth() or setparams() method - # _framerate -- the sampling frequency - # set through the setframerate() or setparams() method - # _nframes -- the number of audio frames written to the header - # set through the setnframes() or setparams() method - # _aifc -- whether we're writing an AIFF-C file or an AIFF file - # set through the aifc() method, reset through the - # aiff() method - # - # These variables are used internally only: - # _version -- the AIFF-C version number - # _comp -- the compressor from builtin module cl - # _nframeswritten -- the number of audio frames actually written - # _datalength -- the size of the audio samples written to the header - # _datawritten -- the size of the audio samples actually written - - _file = None # Set here since __del__ checks it - - def __init__(self, f): - if isinstance(f, str): - file_object = builtins.open(f, 'wb') - try: - self.initfp(file_object) - except: - file_object.close() - raise - - # treat .aiff file extensions as non-compressed audio - if f.endswith('.aiff'): - self._aifc = 0 - else: - # assume it is an open file object already - self.initfp(f) - - def initfp(self, file): - self._file = file - self._version = _AIFC_version - self._comptype = b'NONE' - self._compname = b'not compressed' - self._convert = None - self._nchannels = 0 - self._sampwidth = 0 - self._framerate = 0 - self._nframes = 0 - self._nframeswritten = 0 - self._datawritten = 0 - self._datalength = 0 - self._markers = [] - self._marklength = 0 - self._aifc = 1 # AIFF-C is default - - def __del__(self): - self.close() - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - # - # User visible methods. - # - def aiff(self): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - self._aifc = 0 - - def aifc(self): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - self._aifc = 1 - - def setnchannels(self, nchannels): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if nchannels < 1: - raise Error('bad # of channels') - self._nchannels = nchannels - - def getnchannels(self): - if not self._nchannels: - raise Error('number of channels not set') - return self._nchannels - - def setsampwidth(self, sampwidth): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if sampwidth < 1 or sampwidth > 4: - raise Error('bad sample width') - self._sampwidth = sampwidth - - def getsampwidth(self): - if not self._sampwidth: - raise Error('sample width not set') - return self._sampwidth - - def setframerate(self, framerate): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if framerate <= 0: - raise Error('bad frame rate') - self._framerate = framerate - - def getframerate(self): - if not self._framerate: - raise Error('frame rate not set') - return self._framerate - - def setnframes(self, nframes): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - self._nframes = nframes - - def getnframes(self): - return self._nframeswritten - - def setcomptype(self, comptype, compname): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if comptype not in (b'NONE', b'ulaw', b'ULAW', - b'alaw', b'ALAW', b'G722', b'sowt', b'SOWT'): - raise Error('unsupported compression type') - self._comptype = comptype - self._compname = compname - - def getcomptype(self): - return self._comptype - - def getcompname(self): - return self._compname - -## def setversion(self, version): -## if self._nframeswritten: -## raise Error, 'cannot change parameters after starting to write' -## self._version = version - - def setparams(self, params): - nchannels, sampwidth, framerate, nframes, comptype, compname = params - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if comptype not in (b'NONE', b'ulaw', b'ULAW', - b'alaw', b'ALAW', b'G722', b'sowt', b'SOWT'): - raise Error('unsupported compression type') - self.setnchannels(nchannels) - self.setsampwidth(sampwidth) - self.setframerate(framerate) - self.setnframes(nframes) - self.setcomptype(comptype, compname) - - def getparams(self): - if not self._nchannels or not self._sampwidth or not self._framerate: - raise Error('not all parameters set') - return _aifc_params(self._nchannels, self._sampwidth, self._framerate, - self._nframes, self._comptype, self._compname) - - def setmark(self, id, pos, name): - if id <= 0: - raise Error('marker ID must be > 0') - if pos < 0: - raise Error('marker position must be >= 0') - if not isinstance(name, bytes): - raise Error('marker name must be bytes') - for i in range(len(self._markers)): - if id == self._markers[i][0]: - self._markers[i] = id, pos, name - return - self._markers.append((id, pos, name)) - - def getmark(self, id): - for marker in self._markers: - if id == marker[0]: - return marker - raise Error('marker {0!r} does not exist'.format(id)) - - def getmarkers(self): - if len(self._markers) == 0: - return None - return self._markers - - def tell(self): - return self._nframeswritten - - def writeframesraw(self, data): - if not isinstance(data, (bytes, bytearray)): - data = memoryview(data).cast('B') - self._ensure_header_written(len(data)) - nframes = len(data) // (self._sampwidth * self._nchannels) - if self._convert: - data = self._convert(data) - self._file.write(data) - self._nframeswritten = self._nframeswritten + nframes - self._datawritten = self._datawritten + len(data) - - def writeframes(self, data): - self.writeframesraw(data) - if self._nframeswritten != self._nframes or \ - self._datalength != self._datawritten: - self._patchheader() - - def close(self): - if self._file is None: - return - try: - self._ensure_header_written(0) - if self._datawritten & 1: - # quick pad to even size - self._file.write(b'\x00') - self._datawritten = self._datawritten + 1 - self._writemarkers() - if self._nframeswritten != self._nframes or \ - self._datalength != self._datawritten or \ - self._marklength: - self._patchheader() - finally: - # Prevent ref cycles - self._convert = None - f = self._file - self._file = None - f.close() - - # - # Internal methods. - # - - def _lin2alaw(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.lin2alaw(data, 2) - - def _lin2ulaw(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.lin2ulaw(data, 2) - - def _lin2adpcm(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - if not hasattr(self, '_adpcmstate'): - self._adpcmstate = None - data, self._adpcmstate = audioop.lin2adpcm(data, 2, self._adpcmstate) - return data - - def _lin2sowt(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.byteswap(data, 2) - - def _ensure_header_written(self, datasize): - if not self._nframeswritten: - if self._comptype in (b'ULAW', b'ulaw', - b'ALAW', b'alaw', b'G722', - b'sowt', b'SOWT'): - if not self._sampwidth: - self._sampwidth = 2 - if self._sampwidth != 2: - raise Error('sample width must be 2 when compressing ' - 'with ulaw/ULAW, alaw/ALAW, sowt/SOWT ' - 'or G7.22 (ADPCM)') - if not self._nchannels: - raise Error('# channels not specified') - if not self._sampwidth: - raise Error('sample width not specified') - if not self._framerate: - raise Error('sampling rate not specified') - self._write_header(datasize) - - def _init_compression(self): - if self._comptype == b'G722': - self._convert = self._lin2adpcm - elif self._comptype in (b'ulaw', b'ULAW'): - self._convert = self._lin2ulaw - elif self._comptype in (b'alaw', b'ALAW'): - self._convert = self._lin2alaw - elif self._comptype in (b'sowt', b'SOWT'): - self._convert = self._lin2sowt - - def _write_header(self, initlength): - if self._aifc and self._comptype != b'NONE': - self._init_compression() - self._file.write(b'FORM') - if not self._nframes: - self._nframes = initlength // (self._nchannels * self._sampwidth) - self._datalength = self._nframes * self._nchannels * self._sampwidth - if self._datalength & 1: - self._datalength = self._datalength + 1 - if self._aifc: - if self._comptype in (b'ulaw', b'ULAW', b'alaw', b'ALAW'): - self._datalength = self._datalength // 2 - if self._datalength & 1: - self._datalength = self._datalength + 1 - elif self._comptype == b'G722': - self._datalength = (self._datalength + 3) // 4 - if self._datalength & 1: - self._datalength = self._datalength + 1 - try: - self._form_length_pos = self._file.tell() - except (AttributeError, OSError): - self._form_length_pos = None - commlength = self._write_form_length(self._datalength) - if self._aifc: - self._file.write(b'AIFC') - self._file.write(b'FVER') - _write_ulong(self._file, 4) - _write_ulong(self._file, self._version) - else: - self._file.write(b'AIFF') - self._file.write(b'COMM') - _write_ulong(self._file, commlength) - _write_short(self._file, self._nchannels) - if self._form_length_pos is not None: - self._nframes_pos = self._file.tell() - _write_ulong(self._file, self._nframes) - if self._comptype in (b'ULAW', b'ulaw', b'ALAW', b'alaw', b'G722'): - _write_short(self._file, 8) - else: - _write_short(self._file, self._sampwidth * 8) - _write_float(self._file, self._framerate) - if self._aifc: - self._file.write(self._comptype) - _write_string(self._file, self._compname) - self._file.write(b'SSND') - if self._form_length_pos is not None: - self._ssnd_length_pos = self._file.tell() - _write_ulong(self._file, self._datalength + 8) - _write_ulong(self._file, 0) - _write_ulong(self._file, 0) - - def _write_form_length(self, datalength): - if self._aifc: - commlength = 18 + 5 + len(self._compname) - if commlength & 1: - commlength = commlength + 1 - verslength = 12 - else: - commlength = 18 - verslength = 0 - _write_ulong(self._file, 4 + verslength + self._marklength + \ - 8 + commlength + 16 + datalength) - return commlength - - def _patchheader(self): - curpos = self._file.tell() - if self._datawritten & 1: - datalength = self._datawritten + 1 - self._file.write(b'\x00') - else: - datalength = self._datawritten - if datalength == self._datalength and \ - self._nframes == self._nframeswritten and \ - self._marklength == 0: - self._file.seek(curpos, 0) - return - self._file.seek(self._form_length_pos, 0) - dummy = self._write_form_length(datalength) - self._file.seek(self._nframes_pos, 0) - _write_ulong(self._file, self._nframeswritten) - self._file.seek(self._ssnd_length_pos, 0) - _write_ulong(self._file, datalength + 8) - self._file.seek(curpos, 0) - self._nframes = self._nframeswritten - self._datalength = datalength - - def _writemarkers(self): - if len(self._markers) == 0: - return - self._file.write(b'MARK') - length = 2 - for marker in self._markers: - id, pos, name = marker - length = length + len(name) + 1 + 6 - if len(name) & 1 == 0: - length = length + 1 - _write_ulong(self._file, length) - self._marklength = length + 8 - _write_short(self._file, len(self._markers)) - for marker in self._markers: - id, pos, name = marker - _write_short(self._file, id) - _write_ulong(self._file, pos) - _write_string(self._file, name) - -def open(f, mode=None): - if mode is None: - if hasattr(f, 'mode'): - mode = f.mode - else: - mode = 'rb' - if mode in ('r', 'rb'): - return Aifc_read(f) - elif mode in ('w', 'wb'): - return Aifc_write(f) - else: - raise Error("mode must be 'r', 'rb', 'w', or 'wb'") - - -if __name__ == '__main__': - import sys - if not sys.argv[1:]: - sys.argv.append('/usr/demos/data/audio/bach.aiff') - fn = sys.argv[1] - with open(fn, 'r') as f: - print("Reading", fn) - print("nchannels =", f.getnchannels()) - print("nframes =", f.getnframes()) - print("sampwidth =", f.getsampwidth()) - print("framerate =", f.getframerate()) - print("comptype =", f.getcomptype()) - print("compname =", f.getcompname()) - if sys.argv[2:]: - gn = sys.argv[2] - print("Writing", gn) - with open(gn, 'w') as g: - g.setparams(f.getparams()) - while 1: - data = f.readframes(1024) - if not data: - break - g.writeframes(data) - print("Done.") diff --git a/Lib/annotationlib.py b/Lib/annotationlib.py new file mode 100644 index 00000000000..5c9a0812646 --- /dev/null +++ b/Lib/annotationlib.py @@ -0,0 +1,1197 @@ +"""Helpers for introspecting and wrapping annotations.""" + +import ast +import builtins +import enum +import keyword +import sys +import types + +__all__ = [ + "Format", + "ForwardRef", + "call_annotate_function", + "call_evaluate_function", + "get_annotate_from_class_namespace", + "get_annotations", + "annotations_to_string", + "type_repr", +] + + +class Format(enum.IntEnum): + VALUE = 1 + VALUE_WITH_FAKE_GLOBALS = 2 + FORWARDREF = 3 + STRING = 4 + + +_sentinel = object() +# Following `NAME_ERROR_MSG` in `ceval_macros.h`: +_NAME_ERROR_MSG = "name '{name:.200}' is not defined" + + +# Slots shared by ForwardRef and _Stringifier. The __forward__ names must be +# preserved for compatibility with the old typing.ForwardRef class. The remaining +# names are private. +_SLOTS = ( + "__forward_is_argument__", + "__forward_is_class__", + "__forward_module__", + "__weakref__", + "__arg__", + "__globals__", + "__extra_names__", + "__code__", + "__ast_node__", + "__cell__", + "__owner__", + "__stringifier_dict__", + "__resolved_str_cache__", +) + + +class ForwardRef: + """Wrapper that holds a forward reference. + + Constructor arguments: + * arg: a string representing the code to be evaluated. + * module: the module where the forward reference was created. + Must be a string, not a module object. + * owner: The owning object (module, class, or function). + * is_argument: Does nothing, retained for compatibility. + * is_class: True if the forward reference was created in class scope. + + """ + + __slots__ = _SLOTS + + def __init__( + self, + arg, + *, + module=None, + owner=None, + is_argument=True, + is_class=False, + ): + if not isinstance(arg, str): + raise TypeError(f"Forward reference must be a string -- got {arg!r}") + + self.__arg__ = arg + self.__forward_is_argument__ = is_argument + self.__forward_is_class__ = is_class + self.__forward_module__ = module + self.__owner__ = owner + # These are always set to None here but may be non-None if a ForwardRef + # is created through __class__ assignment on a _Stringifier object. + self.__globals__ = None + # This may be either a cell object (for a ForwardRef referring to a single name) + # or a dict mapping cell names to cell objects (for a ForwardRef containing references + # to multiple names). + self.__cell__ = None + self.__extra_names__ = None + # These are initially None but serve as a cache and may be set to a non-None + # value later. + self.__code__ = None + self.__ast_node__ = None + self.__resolved_str_cache__ = None + + def __init_subclass__(cls, /, *args, **kwds): + raise TypeError("Cannot subclass ForwardRef") + + def evaluate( + self, + *, + globals=None, + locals=None, + type_params=None, + owner=None, + format=Format.VALUE, + ): + """Evaluate the forward reference and return the value. + + If the forward reference cannot be evaluated, raise an exception. + """ + match format: + case Format.STRING: + return self.__resolved_str__ + case Format.VALUE: + is_forwardref_format = False + case Format.FORWARDREF: + is_forwardref_format = True + case _: + raise NotImplementedError(format) + if isinstance(self.__cell__, types.CellType): + try: + return self.__cell__.cell_contents + except ValueError: + pass + if owner is None: + owner = self.__owner__ + + if globals is None and self.__forward_module__ is not None: + globals = getattr( + sys.modules.get(self.__forward_module__, None), "__dict__", None + ) + if globals is None: + globals = self.__globals__ + if globals is None: + if isinstance(owner, type): + module_name = getattr(owner, "__module__", None) + if module_name: + module = sys.modules.get(module_name, None) + if module: + globals = getattr(module, "__dict__", None) + elif isinstance(owner, types.ModuleType): + globals = getattr(owner, "__dict__", None) + elif callable(owner): + globals = getattr(owner, "__globals__", None) + + # If we pass None to eval() below, the globals of this module are used. + if globals is None: + globals = {} + + if type_params is None and owner is not None: + type_params = getattr(owner, "__type_params__", None) + + if locals is None: + locals = {} + if isinstance(owner, type): + locals.update(vars(owner)) + elif ( + type_params is not None + or isinstance(self.__cell__, dict) + or self.__extra_names__ + ): + # Create a new locals dict if necessary, + # to avoid mutating the argument. + locals = dict(locals) + + # "Inject" type parameters into the local namespace + # (unless they are shadowed by assignments *in* the local namespace), + # as a way of emulating annotation scopes when calling `eval()` + if type_params is not None: + for param in type_params: + locals.setdefault(param.__name__, param) + + # Similar logic can be used for nonlocals, which should not + # override locals. + if isinstance(self.__cell__, dict): + for cell_name, cell in self.__cell__.items(): + try: + cell_value = cell.cell_contents + except ValueError: + pass + else: + locals.setdefault(cell_name, cell_value) + + if self.__extra_names__: + locals.update(self.__extra_names__) + + arg = self.__forward_arg__ + if arg.isidentifier() and not keyword.iskeyword(arg): + if arg in locals: + return locals[arg] + elif arg in globals: + return globals[arg] + elif hasattr(builtins, arg): + return getattr(builtins, arg) + elif is_forwardref_format: + return self + else: + raise NameError(_NAME_ERROR_MSG.format(name=arg), name=arg) + else: + code = self.__forward_code__ + try: + return eval(code, globals=globals, locals=locals) + except Exception: + if not is_forwardref_format: + raise + + # All variables, in scoping order, should be checked before + # triggering __missing__ to create a _Stringifier. + new_locals = _StringifierDict( + {**builtins.__dict__, **globals, **locals}, + globals=globals, + owner=owner, + is_class=self.__forward_is_class__, + format=format, + ) + try: + result = eval(code, globals=globals, locals=new_locals) + except Exception: + return self + else: + new_locals.transmogrify(self.__cell__) + return result + + def _evaluate(self, globalns, localns, type_params=_sentinel, *, recursive_guard): + import typing + import warnings + + if type_params is _sentinel: + typing._deprecation_warning_for_no_type_params_passed( + "typing.ForwardRef._evaluate" + ) + type_params = () + warnings._deprecated( + "ForwardRef._evaluate", + "{name} is a private API and is retained for compatibility, but will be removed" + " in Python 3.16. Use ForwardRef.evaluate() or typing.evaluate_forward_ref() instead.", + remove=(3, 16), + ) + return typing.evaluate_forward_ref( + self, + globals=globalns, + locals=localns, + type_params=type_params, + _recursive_guard=recursive_guard, + ) + + @property + def __forward_arg__(self): + if self.__arg__ is not None: + return self.__arg__ + if self.__ast_node__ is not None: + self.__arg__ = ast.unparse(self.__ast_node__) + return self.__arg__ + raise AssertionError( + "Attempted to access '__forward_arg__' on an uninitialized ForwardRef" + ) + + @property + def __resolved_str__(self): + # __forward_arg__ with any names from __extra_names__ replaced + # with the type_repr of the value they represent + if self.__resolved_str_cache__ is None: + resolved_str = self.__forward_arg__ + names = self.__extra_names__ + + if names: + visitor = _ExtraNameFixer(names) + ast_expr = ast.parse(resolved_str, mode="eval").body + node = visitor.visit(ast_expr) + resolved_str = ast.unparse(node) + + self.__resolved_str_cache__ = resolved_str + + return self.__resolved_str_cache__ + + @property + def __forward_code__(self): + if self.__code__ is not None: + return self.__code__ + arg = self.__forward_arg__ + try: + self.__code__ = compile(_rewrite_star_unpack(arg), "", "eval") + except SyntaxError: + raise SyntaxError(f"Forward reference must be an expression -- got {arg!r}") + return self.__code__ + + def __eq__(self, other): + if not isinstance(other, ForwardRef): + return NotImplemented + return ( + self.__forward_arg__ == other.__forward_arg__ + and self.__forward_module__ == other.__forward_module__ + # Use "is" here because we use id() for this in __hash__ + # because dictionaries are not hashable. + and self.__globals__ is other.__globals__ + and self.__forward_is_class__ == other.__forward_is_class__ + # Two separate cells are always considered unequal in forward refs. + and ( + {name: id(cell) for name, cell in self.__cell__.items()} + == {name: id(cell) for name, cell in other.__cell__.items()} + if isinstance(self.__cell__, dict) and isinstance(other.__cell__, dict) + else self.__cell__ is other.__cell__ + ) + and self.__owner__ == other.__owner__ + and ( + (tuple(sorted(self.__extra_names__.items())) if self.__extra_names__ else None) == + (tuple(sorted(other.__extra_names__.items())) if other.__extra_names__ else None) + ) + ) + + def __hash__(self): + return hash(( + self.__forward_arg__, + self.__forward_module__, + id(self.__globals__), # dictionaries are not hashable, so hash by identity + self.__forward_is_class__, + ( # cells are not hashable as well + tuple(sorted([(name, id(cell)) for name, cell in self.__cell__.items()])) + if isinstance(self.__cell__, dict) else id(self.__cell__), + ), + self.__owner__, + tuple(sorted(self.__extra_names__.items())) if self.__extra_names__ else None, + )) + + def __or__(self, other): + return types.UnionType[self, other] + + def __ror__(self, other): + return types.UnionType[other, self] + + def __repr__(self): + extra = [] + if self.__forward_module__ is not None: + extra.append(f", module={self.__forward_module__!r}") + if self.__forward_is_class__: + extra.append(", is_class=True") + if self.__owner__ is not None: + extra.append(f", owner={self.__owner__!r}") + return f"ForwardRef({self.__resolved_str__!r}{''.join(extra)})" + + +_Template = type(t"") + + +class _Stringifier: + # Must match the slots on ForwardRef, so we can turn an instance of one into an + # instance of the other in place. + __slots__ = _SLOTS + + def __init__( + self, + node, + globals=None, + owner=None, + is_class=False, + cell=None, + *, + stringifier_dict, + extra_names=None, + ): + # Either an AST node or a simple str (for the common case where a ForwardRef + # represent a single name). + assert isinstance(node, (ast.AST, str)) + self.__arg__ = None + self.__forward_is_argument__ = False + self.__forward_is_class__ = is_class + self.__forward_module__ = None + self.__code__ = None + self.__ast_node__ = node + self.__globals__ = globals + self.__extra_names__ = extra_names + self.__cell__ = cell + self.__owner__ = owner + self.__stringifier_dict__ = stringifier_dict + self.__resolved_str_cache__ = None # Needed for ForwardRef + + def __convert_to_ast(self, other): + if isinstance(other, _Stringifier): + if isinstance(other.__ast_node__, str): + return ast.Name(id=other.__ast_node__), other.__extra_names__ + return other.__ast_node__, other.__extra_names__ + elif type(other) is _Template: + return _template_to_ast(other), None + elif ( + # In STRING format we don't bother with the create_unique_name() dance; + # it's better to emit the repr() of the object instead of an opaque name. + self.__stringifier_dict__.format == Format.STRING + or other is None + or type(other) in (str, int, float, bool, complex) + ): + return ast.Constant(value=other), None + elif type(other) is dict: + extra_names = {} + keys = [] + values = [] + for key, value in other.items(): + new_key, new_extra_names = self.__convert_to_ast(key) + if new_extra_names is not None: + extra_names.update(new_extra_names) + keys.append(new_key) + new_value, new_extra_names = self.__convert_to_ast(value) + if new_extra_names is not None: + extra_names.update(new_extra_names) + values.append(new_value) + return ast.Dict(keys, values), extra_names + elif type(other) in (list, tuple, set): + extra_names = {} + elts = [] + for elt in other: + new_elt, new_extra_names = self.__convert_to_ast(elt) + if new_extra_names is not None: + extra_names.update(new_extra_names) + elts.append(new_elt) + ast_class = {list: ast.List, tuple: ast.Tuple, set: ast.Set}[type(other)] + return ast_class(elts), extra_names + else: + name = self.__stringifier_dict__.create_unique_name() + return ast.Name(id=name), {name: other} + + def __convert_to_ast_getitem(self, other): + if isinstance(other, slice): + extra_names = {} + + def conv(obj): + if obj is None: + return None + new_obj, new_extra_names = self.__convert_to_ast(obj) + if new_extra_names is not None: + extra_names.update(new_extra_names) + return new_obj + + return ast.Slice( + lower=conv(other.start), + upper=conv(other.stop), + step=conv(other.step), + ), extra_names + else: + return self.__convert_to_ast(other) + + def __get_ast(self): + node = self.__ast_node__ + if isinstance(node, str): + return ast.Name(id=node) + return node + + def __make_new(self, node, extra_names=None): + new_extra_names = {} + if self.__extra_names__ is not None: + new_extra_names.update(self.__extra_names__) + if extra_names is not None: + new_extra_names.update(extra_names) + stringifier = _Stringifier( + node, + self.__globals__, + self.__owner__, + self.__forward_is_class__, + stringifier_dict=self.__stringifier_dict__, + extra_names=new_extra_names or None, + ) + self.__stringifier_dict__.stringifiers.append(stringifier) + return stringifier + + # Must implement this since we set __eq__. We hash by identity so that + # stringifiers in dict keys are kept separate. + def __hash__(self): + return id(self) + + def __getitem__(self, other): + # Special case, to avoid stringifying references to class-scoped variables + # as '__classdict__["x"]'. + if self.__ast_node__ == "__classdict__": + raise KeyError + if isinstance(other, tuple): + extra_names = {} + elts = [] + for elt in other: + new_elt, new_extra_names = self.__convert_to_ast_getitem(elt) + if new_extra_names is not None: + extra_names.update(new_extra_names) + elts.append(new_elt) + other = ast.Tuple(elts) + else: + other, extra_names = self.__convert_to_ast_getitem(other) + assert isinstance(other, ast.AST), repr(other) + return self.__make_new(ast.Subscript(self.__get_ast(), other), extra_names) + + def __getattr__(self, attr): + return self.__make_new(ast.Attribute(self.__get_ast(), attr)) + + def __call__(self, *args, **kwargs): + extra_names = {} + ast_args = [] + for arg in args: + new_arg, new_extra_names = self.__convert_to_ast(arg) + if new_extra_names is not None: + extra_names.update(new_extra_names) + ast_args.append(new_arg) + ast_kwargs = [] + for key, value in kwargs.items(): + new_value, new_extra_names = self.__convert_to_ast(value) + if new_extra_names is not None: + extra_names.update(new_extra_names) + ast_kwargs.append(ast.keyword(key, new_value)) + return self.__make_new(ast.Call(self.__get_ast(), ast_args, ast_kwargs), extra_names) + + def __iter__(self): + yield self.__make_new(ast.Starred(self.__get_ast())) + + def __repr__(self): + if isinstance(self.__ast_node__, str): + return self.__ast_node__ + return ast.unparse(self.__ast_node__) + + def __format__(self, format_spec): + raise TypeError("Cannot stringify annotation containing string formatting") + + def _make_binop(op: ast.AST): + def binop(self, other): + rhs, extra_names = self.__convert_to_ast(other) + return self.__make_new( + ast.BinOp(self.__get_ast(), op, rhs), extra_names + ) + + return binop + + __add__ = _make_binop(ast.Add()) + __sub__ = _make_binop(ast.Sub()) + __mul__ = _make_binop(ast.Mult()) + __matmul__ = _make_binop(ast.MatMult()) + __truediv__ = _make_binop(ast.Div()) + __mod__ = _make_binop(ast.Mod()) + __lshift__ = _make_binop(ast.LShift()) + __rshift__ = _make_binop(ast.RShift()) + __or__ = _make_binop(ast.BitOr()) + __xor__ = _make_binop(ast.BitXor()) + __and__ = _make_binop(ast.BitAnd()) + __floordiv__ = _make_binop(ast.FloorDiv()) + __pow__ = _make_binop(ast.Pow()) + + del _make_binop + + def _make_rbinop(op: ast.AST): + def rbinop(self, other): + new_other, extra_names = self.__convert_to_ast(other) + return self.__make_new( + ast.BinOp(new_other, op, self.__get_ast()), extra_names + ) + + return rbinop + + __radd__ = _make_rbinop(ast.Add()) + __rsub__ = _make_rbinop(ast.Sub()) + __rmul__ = _make_rbinop(ast.Mult()) + __rmatmul__ = _make_rbinop(ast.MatMult()) + __rtruediv__ = _make_rbinop(ast.Div()) + __rmod__ = _make_rbinop(ast.Mod()) + __rlshift__ = _make_rbinop(ast.LShift()) + __rrshift__ = _make_rbinop(ast.RShift()) + __ror__ = _make_rbinop(ast.BitOr()) + __rxor__ = _make_rbinop(ast.BitXor()) + __rand__ = _make_rbinop(ast.BitAnd()) + __rfloordiv__ = _make_rbinop(ast.FloorDiv()) + __rpow__ = _make_rbinop(ast.Pow()) + + del _make_rbinop + + def _make_compare(op): + def compare(self, other): + rhs, extra_names = self.__convert_to_ast(other) + return self.__make_new( + ast.Compare( + left=self.__get_ast(), + ops=[op], + comparators=[rhs], + ), + extra_names, + ) + + return compare + + __lt__ = _make_compare(ast.Lt()) + __le__ = _make_compare(ast.LtE()) + __eq__ = _make_compare(ast.Eq()) + __ne__ = _make_compare(ast.NotEq()) + __gt__ = _make_compare(ast.Gt()) + __ge__ = _make_compare(ast.GtE()) + + del _make_compare + + def _make_unary_op(op): + def unary_op(self): + return self.__make_new(ast.UnaryOp(op, self.__get_ast())) + + return unary_op + + __invert__ = _make_unary_op(ast.Invert()) + __pos__ = _make_unary_op(ast.UAdd()) + __neg__ = _make_unary_op(ast.USub()) + + del _make_unary_op + + +def _template_to_ast_constructor(template): + """Convert a `template` instance to a non-literal AST.""" + args = [] + for part in template: + match part: + case str(): + args.append(ast.Constant(value=part)) + case _: + interp = ast.Call( + func=ast.Name(id="Interpolation"), + args=[ + ast.Constant(value=part.value), + ast.Constant(value=part.expression), + ast.Constant(value=part.conversion), + ast.Constant(value=part.format_spec), + ] + ) + args.append(interp) + return ast.Call(func=ast.Name(id="Template"), args=args, keywords=[]) + + +def _template_to_ast_literal(template, parsed): + """Convert a `template` instance to a t-string literal AST.""" + values = [] + interp_count = 0 + for part in template: + match part: + case str(): + values.append(ast.Constant(value=part)) + case _: + interp = ast.Interpolation( + str=part.expression, + value=parsed[interp_count], + conversion=ord(part.conversion) if part.conversion else -1, + format_spec=ast.Constant(value=part.format_spec) + if part.format_spec + else None, + ) + values.append(interp) + interp_count += 1 + return ast.TemplateStr(values=values) + + +def _template_to_ast(template): + """Make a best-effort conversion of a `template` instance to an AST.""" + # gh-138558: Not all Template instances can be represented as t-string + # literals. Return the most accurate AST we can. See issue for details. + + # If any expr is empty or whitespace only, we cannot convert to a literal. + if any(part.expression.strip() == "" for part in template.interpolations): + return _template_to_ast_constructor(template) + + try: + # Wrap in parens to allow whitespace inside interpolation curly braces + parsed = tuple( + ast.parse(f"({part.expression})", mode="eval").body + for part in template.interpolations + ) + except SyntaxError: + return _template_to_ast_constructor(template) + + return _template_to_ast_literal(template, parsed) + + +class _StringifierDict(dict): + def __init__(self, namespace, *, globals=None, owner=None, is_class=False, format): + super().__init__(namespace) + self.namespace = namespace + self.globals = globals + self.owner = owner + self.is_class = is_class + self.stringifiers = [] + self.next_id = 1 + self.format = format + + def __missing__(self, key): + fwdref = _Stringifier( + key, + globals=self.globals, + owner=self.owner, + is_class=self.is_class, + stringifier_dict=self, + ) + self.stringifiers.append(fwdref) + return fwdref + + def transmogrify(self, cell_dict): + for obj in self.stringifiers: + obj.__class__ = ForwardRef + obj.__stringifier_dict__ = None # not needed for ForwardRef + if isinstance(obj.__ast_node__, str): + obj.__arg__ = obj.__ast_node__ + obj.__ast_node__ = None + if cell_dict is not None and obj.__cell__ is None: + obj.__cell__ = cell_dict + + def create_unique_name(self): + name = f"__annotationlib_name_{self.next_id}__" + self.next_id += 1 + return name + + +def call_evaluate_function(evaluate, format, *, owner=None): + """Call an evaluate function. Evaluate functions are normally generated for + the value of type aliases and the bounds, constraints, and defaults of + type parameter objects. + """ + return call_annotate_function(evaluate, format, owner=owner, _is_evaluate=True) + + +def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): + """Call an __annotate__ function. __annotate__ functions are normally + generated by the compiler to defer the evaluation of annotations. They + can be called with any of the format arguments in the Format enum, but + compiler-generated __annotate__ functions only support the VALUE format. + This function provides additional functionality to call __annotate__ + functions with the FORWARDREF and STRING formats. + + *annotate* must be an __annotate__ function, which takes a single argument + and returns a dict of annotations. + + *format* must be a member of the Format enum or one of the corresponding + integer values. + + *owner* can be the object that owns the annotations (i.e., the module, + class, or function that the __annotate__ function derives from). With the + FORWARDREF format, it is used to provide better evaluation capabilities + on the generated ForwardRef objects. + + """ + if format == Format.VALUE_WITH_FAKE_GLOBALS: + raise ValueError("The VALUE_WITH_FAKE_GLOBALS format is for internal use only") + try: + return annotate(format) + except NotImplementedError: + pass + if format == Format.STRING: + # STRING is implemented by calling the annotate function in a special + # environment where every name lookup results in an instance of _Stringifier. + # _Stringifier supports every dunder operation and returns a new _Stringifier. + # At the end, we get a dictionary that mostly contains _Stringifier objects (or + # possibly constants if the annotate function uses them directly). We then + # convert each of those into a string to get an approximation of the + # original source. + + # Attempt to call with VALUE_WITH_FAKE_GLOBALS to check if it is implemented + # See: https://github.com/python/cpython/issues/138764 + # Only fail on NotImplementedError + try: + annotate(Format.VALUE_WITH_FAKE_GLOBALS) + except NotImplementedError: + # Both STRING and VALUE_WITH_FAKE_GLOBALS are not implemented: fallback to VALUE + return annotations_to_string(annotate(Format.VALUE)) + except Exception: + pass + + globals = _StringifierDict({}, format=format) + is_class = isinstance(owner, type) + closure, _ = _build_closure( + annotate, owner, is_class, globals, allow_evaluation=False + ) + func = types.FunctionType( + annotate.__code__, + globals, + closure=closure, + argdefs=annotate.__defaults__, + kwdefaults=annotate.__kwdefaults__, + ) + annos = func(Format.VALUE_WITH_FAKE_GLOBALS) + if _is_evaluate: + return _stringify_single(annos) + return { + key: _stringify_single(val) + for key, val in annos.items() + } + elif format == Format.FORWARDREF: + # FORWARDREF is implemented similarly to STRING, but there are two changes, + # at the beginning and the end of the process. + # First, while STRING uses an empty dictionary as the namespace, so that all + # name lookups result in _Stringifier objects, FORWARDREF uses the globals + # and builtins, so that defined names map to their real values. + # Second, instead of returning strings, we want to return either real values + # or ForwardRef objects. To do this, we keep track of all _Stringifier objects + # created while the annotation is being evaluated, and at the end we convert + # them all to ForwardRef objects by assigning to __class__. To make this + # technique work, we have to ensure that the _Stringifier and ForwardRef + # classes share the same attributes. + # We use this technique because while the annotations are being evaluated, + # we want to support all operations that the language allows, including even + # __getattr__ and __eq__, and return new _Stringifier objects so we can accurately + # reconstruct the source. But in the dictionary that we eventually return, we + # want to return objects with more user-friendly behavior, such as an __eq__ + # that returns a bool and an defined set of attributes. + namespace = {**annotate.__builtins__, **annotate.__globals__} + is_class = isinstance(owner, type) + globals = _StringifierDict( + namespace, + globals=annotate.__globals__, + owner=owner, + is_class=is_class, + format=format, + ) + closure, cell_dict = _build_closure( + annotate, owner, is_class, globals, allow_evaluation=True + ) + func = types.FunctionType( + annotate.__code__, + globals, + closure=closure, + argdefs=annotate.__defaults__, + kwdefaults=annotate.__kwdefaults__, + ) + try: + result = func(Format.VALUE_WITH_FAKE_GLOBALS) + except NotImplementedError: + # FORWARDREF and VALUE_WITH_FAKE_GLOBALS not supported, fall back to VALUE + return annotate(Format.VALUE) + except Exception: + pass + else: + globals.transmogrify(cell_dict) + return result + + # Try again, but do not provide any globals. This allows us to return + # a value in certain cases where an exception gets raised during evaluation. + globals = _StringifierDict( + {}, + globals=annotate.__globals__, + owner=owner, + is_class=is_class, + format=format, + ) + closure, cell_dict = _build_closure( + annotate, owner, is_class, globals, allow_evaluation=False + ) + func = types.FunctionType( + annotate.__code__, + globals, + closure=closure, + argdefs=annotate.__defaults__, + kwdefaults=annotate.__kwdefaults__, + ) + result = func(Format.VALUE_WITH_FAKE_GLOBALS) + globals.transmogrify(cell_dict) + if _is_evaluate: + if isinstance(result, ForwardRef): + return result.evaluate(format=Format.FORWARDREF) + else: + return result + else: + return { + key: ( + val.evaluate(format=Format.FORWARDREF) + if isinstance(val, ForwardRef) + else val + ) + for key, val in result.items() + } + elif format == Format.VALUE: + # Should be impossible because __annotate__ functions must not raise + # NotImplementedError for this format. + raise RuntimeError("annotate function does not support VALUE format") + else: + raise ValueError(f"Invalid format: {format!r}") + + +def _build_closure(annotate, owner, is_class, stringifier_dict, *, allow_evaluation): + if not annotate.__closure__: + return None, None + new_closure = [] + cell_dict = {} + for name, cell in zip(annotate.__code__.co_freevars, annotate.__closure__, strict=True): + cell_dict[name] = cell + new_cell = None + if allow_evaluation: + try: + cell.cell_contents + except ValueError: + pass + else: + new_cell = cell + if new_cell is None: + fwdref = _Stringifier( + name, + cell=cell, + owner=owner, + globals=annotate.__globals__, + is_class=is_class, + stringifier_dict=stringifier_dict, + ) + stringifier_dict.stringifiers.append(fwdref) + new_cell = types.CellType(fwdref) + new_closure.append(new_cell) + return tuple(new_closure), cell_dict + + +def _stringify_single(anno): + if anno is ...: + return "..." + # We have to handle str specially to support PEP 563 stringified annotations. + elif isinstance(anno, str): + return anno + elif isinstance(anno, _Template): + return ast.unparse(_template_to_ast(anno)) + else: + return repr(anno) + + +def get_annotate_from_class_namespace(obj): + """Retrieve the annotate function from a class namespace dictionary. + + Return None if the namespace does not contain an annotate function. + This is useful in metaclass ``__new__`` methods to retrieve the annotate function. + """ + try: + return obj["__annotate__"] + except KeyError: + return obj.get("__annotate_func__", None) + + +def get_annotations( + obj, *, globals=None, locals=None, eval_str=False, format=Format.VALUE +): + """Compute the annotations dict for an object. + + obj may be a callable, class, module, or other object with + __annotate__ or __annotations__ attributes. + Passing any other object raises TypeError. + + The *format* parameter controls the format in which annotations are returned, + and must be a member of the Format enum or its integer equivalent. + For the VALUE format, the __annotations__ is tried first; if it + does not exist, the __annotate__ function is called. The + FORWARDREF format uses __annotations__ if it exists and can be + evaluated, and otherwise falls back to calling the __annotate__ function. + The STRING format tries __annotate__ first, and falls back to + using __annotations__, stringified using annotations_to_string(). + + This function handles several details for you: + + * If eval_str is true, values of type str will + be un-stringized using eval(). This is intended + for use with stringized annotations + ("from __future__ import annotations"). + * If obj doesn't have an annotations dict, returns an + empty dict. (Functions and methods always have an + annotations dict; classes, modules, and other types of + callables may not.) + * Ignores inherited annotations on classes. If a class + doesn't have its own annotations dict, returns an empty dict. + * All accesses to object members and dict values are done + using getattr() and dict.get() for safety. + * Always, always, always returns a freshly-created dict. + + eval_str controls whether or not values of type str are replaced + with the result of calling eval() on those values: + + * If eval_str is true, eval() is called on values of type str. + * If eval_str is false (the default), values of type str are unchanged. + + globals and locals are passed in to eval(); see the documentation + for eval() for more information. If either globals or locals is + None, this function may replace that value with a context-specific + default, contingent on type(obj): + + * If obj is a module, globals defaults to obj.__dict__. + * If obj is a class, globals defaults to + sys.modules[obj.__module__].__dict__ and locals + defaults to the obj class namespace. + * If obj is a callable, globals defaults to obj.__globals__, + although if obj is a wrapped function (using + functools.update_wrapper()) it is first unwrapped. + """ + if eval_str and format != Format.VALUE: + raise ValueError("eval_str=True is only supported with format=Format.VALUE") + + match format: + case Format.VALUE: + # For VALUE, we first look at __annotations__ + ann = _get_dunder_annotations(obj) + + # If it's not there, try __annotate__ instead + if ann is None: + ann = _get_and_call_annotate(obj, format) + case Format.FORWARDREF: + # For FORWARDREF, we use __annotations__ if it exists + try: + ann = _get_dunder_annotations(obj) + except Exception: + pass + else: + if ann is not None: + return dict(ann) + + # But if __annotations__ threw a NameError, we try calling __annotate__ + ann = _get_and_call_annotate(obj, format) + if ann is None: + # If that didn't work either, we have a very weird object: evaluating + # __annotations__ threw NameError and there is no __annotate__. In that case, + # we fall back to trying __annotations__ again. + ann = _get_dunder_annotations(obj) + case Format.STRING: + # For STRING, we try to call __annotate__ + ann = _get_and_call_annotate(obj, format) + if ann is not None: + return dict(ann) + # But if we didn't get it, we use __annotations__ instead. + ann = _get_dunder_annotations(obj) + if ann is not None: + return annotations_to_string(ann) + case Format.VALUE_WITH_FAKE_GLOBALS: + raise ValueError("The VALUE_WITH_FAKE_GLOBALS format is for internal use only") + case _: + raise ValueError(f"Unsupported format {format!r}") + + if ann is None: + if isinstance(obj, type) or callable(obj): + return {} + raise TypeError(f"{obj!r} does not have annotations") + + if not ann: + return {} + + if not eval_str: + return dict(ann) + + if globals is None or locals is None: + if isinstance(obj, type): + # class + obj_globals = None + module_name = getattr(obj, "__module__", None) + if module_name: + module = sys.modules.get(module_name, None) + if module: + obj_globals = getattr(module, "__dict__", None) + obj_locals = dict(vars(obj)) + unwrap = obj + elif isinstance(obj, types.ModuleType): + # module + obj_globals = getattr(obj, "__dict__") + obj_locals = None + unwrap = None + elif callable(obj): + # this includes types.Function, types.BuiltinFunctionType, + # types.BuiltinMethodType, functools.partial, functools.singledispatch, + # "class funclike" from Lib/test/test_inspect... on and on it goes. + obj_globals = getattr(obj, "__globals__", None) + obj_locals = None + unwrap = obj + else: + obj_globals = obj_locals = unwrap = None + + if unwrap is not None: + # Use an id-based visited set to detect cycles in the __wrapped__ + # and functools.partial.func chain (e.g. f.__wrapped__ = f). + # On cycle detection we stop and use whatever __globals__ we have + # found so far, mirroring the approach of inspect.unwrap(). + _seen_ids = {id(unwrap)} + while True: + if hasattr(unwrap, "__wrapped__"): + candidate = unwrap.__wrapped__ + if id(candidate) in _seen_ids: + break + _seen_ids.add(id(candidate)) + unwrap = candidate + continue + if functools := sys.modules.get("functools"): + if isinstance(unwrap, functools.partial): + candidate = unwrap.func + if id(candidate) in _seen_ids: + break + _seen_ids.add(id(candidate)) + unwrap = candidate + continue + break + if hasattr(unwrap, "__globals__"): + obj_globals = unwrap.__globals__ + + if globals is None: + globals = obj_globals + if locals is None: + locals = obj_locals + + # "Inject" type parameters into the local namespace + # (unless they are shadowed by assignments *in* the local namespace), + # as a way of emulating annotation scopes when calling `eval()` + if type_params := getattr(obj, "__type_params__", ()): + if locals is None: + locals = {} + locals = {param.__name__: param for param in type_params} | locals + + return_value = { + key: value if not isinstance(value, str) + else eval(_rewrite_star_unpack(value), globals, locals) + for key, value in ann.items() + } + return return_value + + +def type_repr(value): + """Convert a Python value to a format suitable for use with the STRING format. + + This is intended as a helper for tools that support the STRING format but do + not have access to the code that originally produced the annotations. It uses + repr() for most objects. + + """ + if isinstance(value, (type, types.FunctionType, types.BuiltinFunctionType)): + if value.__module__ == "builtins": + return value.__qualname__ + return f"{value.__module__}.{value.__qualname__}" + elif isinstance(value, _Template): + tree = _template_to_ast(value) + return ast.unparse(tree) + if value is ...: + return "..." + return repr(value) + + +def annotations_to_string(annotations): + """Convert an annotation dict containing values to approximately the STRING format. + + Always returns a fresh a dictionary. + """ + return { + n: t if isinstance(t, str) else type_repr(t) + for n, t in annotations.items() + } + + +def _rewrite_star_unpack(arg): + """If the given argument annotation expression is a star unpack e.g. `'*Ts'` + rewrite it to a valid expression. + """ + if arg.lstrip().startswith("*"): + return f"({arg},)[0]" # E.g. (*Ts,)[0] or (*tuple[int, int],)[0] + else: + return arg + + +def _get_and_call_annotate(obj, format): + """Get the __annotate__ function and call it. + + May not return a fresh dictionary. + """ + annotate = getattr(obj, "__annotate__", None) + if annotate is not None: + ann = call_annotate_function(annotate, format, owner=obj) + if not isinstance(ann, dict): + raise ValueError(f"{obj!r}.__annotate__ returned a non-dict") + return ann + return None + + +_BASE_GET_ANNOTATIONS = type.__dict__["__annotations__"].__get__ + + +def _get_dunder_annotations(obj): + """Return the annotations for an object, checking that it is a dictionary. + + Does not return a fresh dictionary. + """ + # This special case is needed to support types defined under + # from __future__ import annotations, where accessing the __annotations__ + # attribute directly might return annotations for the wrong class. + if isinstance(obj, type): + try: + ann = _BASE_GET_ANNOTATIONS(obj) + except AttributeError: + # For static types, the descriptor raises AttributeError. + return None + else: + ann = getattr(obj, "__annotations__", None) + if ann is None: + return None + + if not isinstance(ann, dict): + raise ValueError(f"{obj!r}.__annotations__ is neither a dict nor None") + return ann + + +class _ExtraNameFixer(ast.NodeTransformer): + """Fixer for __extra_names__ items in ForwardRef __repr__ and string evaluation""" + def __init__(self, extra_names): + self.extra_names = extra_names + + def visit_Name(self, node: ast.Name): + if (new_name := self.extra_names.get(node.id, _sentinel)) is not _sentinel: + node = ast.Name(id=type_repr(new_name)) + return node diff --git a/Lib/argparse.py b/Lib/argparse.py index 543d9944f9e..24f4740562d 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -18,11 +18,12 @@ 'integers', metavar='int', nargs='+', type=int, help='an integer to be summed') parser.add_argument( - '--log', default=sys.stdout, type=argparse.FileType('w'), + '--log', help='the file where the sum should be written') args = parser.parse_args() - args.log.write('%s' % sum(args.integers)) - args.log.close() + with (open(args.log, 'w') if args.log is not None + else contextlib.nullcontext(sys.stdout)) as log: + log.write('%s' % sum(args.integers)) The module contains the following public classes: @@ -39,7 +40,8 @@ - FileType -- A factory for defining types of files to be created. As the example above shows, instances of FileType are typically passed as - the type= argument of add_argument() calls. + the type= argument of add_argument() calls. Deprecated since + Python 3.14. - Action -- The base class for parser actions. Typically actions are selected by passing strings like 'store_true' or 'append_const' to @@ -89,8 +91,6 @@ import re as _re import sys as _sys -import warnings - from gettext import gettext as _, ngettext SUPPRESS = '==SUPPRESS==' @@ -149,6 +149,10 @@ def _copy_items(items): return copy.copy(items) +def _identity(value): + return value + + # =============== # Formatting Help # =============== @@ -161,18 +165,21 @@ class HelpFormatter(object): provided by the class are considered an implementation detail. """ - def __init__(self, - prog, - indent_increment=2, - max_help_position=24, - width=None): - + def __init__( + self, + prog, + indent_increment=2, + max_help_position=24, + width=None, + color=True, + ): # default setting for width if width is None: import shutil width = shutil.get_terminal_size().columns width -= 2 + self._set_color(color) self._prog = prog self._indent_increment = indent_increment self._max_help_position = min(max_help_position, @@ -189,9 +196,20 @@ def __init__(self, self._whitespace_matcher = _re.compile(r'\s+', _re.ASCII) self._long_break_matcher = _re.compile(r'\n\n\n+') + def _set_color(self, color): + from _colorize import can_colorize, decolor, get_theme + + if color and can_colorize(): + self._theme = get_theme(force_color=True).argparse + self._decolor = decolor + else: + self._theme = get_theme(force_no_color=True).argparse + self._decolor = _identity + # =============================== # Section and indentation methods # =============================== + def _indent(self): self._current_indent += self._indent_increment self._level += 1 @@ -225,7 +243,12 @@ def format_help(self): # add the heading if the section was non-empty if self.heading is not SUPPRESS and self.heading is not None: current_indent = self.formatter._current_indent - heading = '%*s%s:\n' % (current_indent, '', self.heading) + heading_text = _('%(heading)s:') % dict(heading=self.heading) + t = self.formatter._theme + heading = ( + f'{" " * current_indent}' + f'{t.heading}{heading_text}{t.reset}\n' + ) else: heading = '' @@ -238,6 +261,7 @@ def _add_item(self, func, args): # ======================== # Message building methods # ======================== + def start_section(self, heading): self._indent() section = self._Section(self, self._current_section, heading) @@ -261,14 +285,13 @@ def add_argument(self, action): if action.help is not SUPPRESS: # find all invocations - get_invocation = self._format_action_invocation - invocations = [get_invocation(action)] + get_invocation = lambda x: self._decolor(self._format_action_invocation(x)) + invocation_lengths = [len(get_invocation(action)) + self._current_indent] for subaction in self._iter_indented_subactions(action): - invocations.append(get_invocation(subaction)) + invocation_lengths.append(len(get_invocation(subaction)) + self._current_indent) # update the maximum item length - invocation_length = max(map(len, invocations)) - action_length = invocation_length + self._current_indent + action_length = max(invocation_lengths) self._action_max_length = max(self._action_max_length, action_length) @@ -282,6 +305,7 @@ def add_arguments(self, actions): # ======================= # Help-formatting methods # ======================= + def format_help(self): help = self._root_section.format_help() if help: @@ -295,51 +319,39 @@ def _join_parts(self, part_strings): if part and part is not SUPPRESS]) def _format_usage(self, usage, actions, groups, prefix): + t = self._theme + if prefix is None: prefix = _('usage: ') # if usage is specified, use that if usage is not None: - usage = usage % dict(prog=self._prog) + usage = ( + t.prog_extra + + usage + % {"prog": f"{t.prog}{self._prog}{t.reset}{t.prog_extra}"} + + t.reset + ) # if no optionals or positionals are available, usage is just prog elif usage is None and not actions: - usage = '%(prog)s' % dict(prog=self._prog) + usage = f"{t.prog}{self._prog}{t.reset}" # if optionals and positionals are available, calculate usage elif usage is None: prog = '%(prog)s' % dict(prog=self._prog) - # split optionals from positionals - optionals = [] - positionals = [] - for action in actions: - if action.option_strings: - optionals.append(action) - else: - positionals.append(action) - + parts, pos_start = self._get_actions_usage_parts(actions, groups) # build full usage string - format = self._format_actions_usage - action_usage = format(optionals + positionals, groups) - usage = ' '.join([s for s in [prog, action_usage] if s]) + usage = ' '.join(filter(None, [prog, *parts])) # wrap the usage parts if it's too long text_width = self._width - self._current_indent - if len(prefix) + len(usage) > text_width: + if len(prefix) + len(self._decolor(usage)) > text_width: # break usage into wrappable parts - part_regexp = ( - r'\(.*?\)+(?=\s|$)|' - r'\[.*?\]+(?=\s|$)|' - r'\S+' - ) - opt_usage = format(optionals, groups) - pos_usage = format(positionals, groups) - opt_parts = _re.findall(part_regexp, opt_usage) - pos_parts = _re.findall(part_regexp, pos_usage) - assert ' '.join(opt_parts) == opt_usage - assert ' '.join(pos_parts) == pos_usage + opt_parts = parts[:pos_start] + pos_parts = parts[pos_start:] # helper for wrapping lines def get_lines(parts, indent, prefix=None): @@ -351,12 +363,13 @@ def get_lines(parts, indent, prefix=None): else: line_len = indent_length - 1 for part in parts: - if line_len + 1 + len(part) > text_width and line: + part_len = len(self._decolor(part)) + if line_len + 1 + part_len > text_width and line: lines.append(indent + ' '.join(line)) line = [] line_len = indent_length - 1 line.append(part) - line_len += len(part) + 1 + line_len += part_len + 1 if line: lines.append(indent + ' '.join(line)) if prefix is not None: @@ -364,8 +377,9 @@ def get_lines(parts, indent, prefix=None): return lines # if prog is short, follow it with optionals or positionals - if len(prefix) + len(prog) <= 0.75 * text_width: - indent = ' ' * (len(prefix) + len(prog) + 1) + prog_len = len(self._decolor(prog)) + if len(prefix) + prog_len <= 0.75 * text_width: + indent = ' ' * (len(prefix) + prog_len + 1) if opt_parts: lines = get_lines([prog] + opt_parts, indent, prefix) lines.extend(get_lines(pos_parts, indent)) @@ -388,121 +402,120 @@ def get_lines(parts, indent, prefix=None): # join lines into usage usage = '\n'.join(lines) - # prefix with 'usage:' - return '%s%s\n\n' % (prefix, usage) - - def _format_actions_usage(self, actions, groups): - # find group indices and identify actions in groups - group_actions = set() - inserts = {} - for group in groups: - if not group._group_actions: - raise ValueError(f'empty group {group}') + usage = usage.removeprefix(prog) + usage = f"{t.prog}{prog}{t.reset}{usage}" - try: - start = actions.index(group._group_actions[0]) - except ValueError: - continue - else: - group_action_count = len(group._group_actions) - end = start + group_action_count - if actions[start:end] == group._group_actions: + # prefix with 'usage:' + return f'{t.usage}{prefix}{t.reset}{usage}\n\n' - suppressed_actions_count = 0 - for action in group._group_actions: - group_actions.add(action) - if action.help is SUPPRESS: - suppressed_actions_count += 1 + def _is_long_option(self, string): + return len(string) > 2 - exposed_actions_count = group_action_count - suppressed_actions_count + def _get_actions_usage_parts(self, actions, groups): + """Get usage parts with split index for optionals/positionals. - if not group.required: - if start in inserts: - inserts[start] += ' [' - else: - inserts[start] = '[' - if end in inserts: - inserts[end] += ']' - else: - inserts[end] = ']' - elif exposed_actions_count > 1: - if start in inserts: - inserts[start] += ' (' - else: - inserts[start] = '(' - if end in inserts: - inserts[end] += ')' - else: - inserts[end] = ')' - for i in range(start + 1, end): - inserts[i] = '|' + Returns (parts, pos_start) where pos_start is the index in parts + where positionals begin. + This preserves mutually exclusive group formatting across the + optionals/positionals boundary (gh-75949). + """ + actions = [action for action in actions if action.help is not SUPPRESS] + # group actions by mutually exclusive groups + action_groups = dict.fromkeys(actions) + for group in groups: + for action in group._group_actions: + if action in action_groups: + action_groups[action] = group + # positional arguments keep their position + positionals = [] + for action in actions: + if not action.option_strings: + group = action_groups.pop(action) + if group: + group_actions = [ + action2 for action2 in group._group_actions + if action2.option_strings and + action_groups.pop(action2, None) + ] + [action] + positionals.append((group.required, group_actions)) + else: + positionals.append((None, [action])) + # the remaining optional arguments are sorted by the position of + # the first option in the group + optionals = [] + for action in actions: + if action.option_strings and action in action_groups: + group = action_groups.pop(action) + if group: + group_actions = [action] + [ + action2 for action2 in group._group_actions + if action2.option_strings and + action_groups.pop(action2, None) + ] + optionals.append((group.required, group_actions)) + else: + optionals.append((None, [action])) # collect all actions format strings parts = [] - for i, action in enumerate(actions): - - # suppressed arguments are marked with None - # remove | separators for suppressed arguments - if action.help is SUPPRESS: - parts.append(None) - if inserts.get(i) == '|': - inserts.pop(i) - elif inserts.get(i + 1) == '|': - inserts.pop(i + 1) - - # produce all arg strings - elif not action.option_strings: - default = self._get_default_metavar_for_positional(action) - part = self._format_args(action, default) - - # if it's in a group, strip the outer [] - if action in group_actions: - if part[0] == '[' and part[-1] == ']': - part = part[1:-1] - - # add the action string to the list - parts.append(part) - - # produce the first way to invoke the option in brackets - else: - option_string = action.option_strings[0] + t = self._theme + pos_start = None + for i, (required, group) in enumerate(optionals + positionals): + start = len(parts) + if i == len(optionals): + pos_start = start + in_group = len(group) > 1 + for action in group: + # produce all arg strings + if not action.option_strings: + default = self._get_default_metavar_for_positional(action) + part = self._format_args(action, default) + # if it's in a group, strip the outer [] + if in_group: + if part[0] == '[' and part[-1] == ']': + part = part[1:-1] + part = t.summary_action + part + t.reset + + # produce the first way to invoke the option in brackets + else: + option_string = action.option_strings[0] + if self._is_long_option(option_string): + option_color = t.summary_long_option + else: + option_color = t.summary_short_option - # if the Optional doesn't take a value, format is: - # -s or --long - if action.nargs == 0: - part = action.format_usage() + # if the Optional doesn't take a value, format is: + # -s or --long + if action.nargs == 0: + part = action.format_usage() + part = f"{option_color}{part}{t.reset}" - # if the Optional takes a value, format is: - # -s ARGS or --long ARGS - else: - default = self._get_default_metavar_for_optional(action) - args_string = self._format_args(action, default) - part = '%s %s' % (option_string, args_string) + # if the Optional takes a value, format is: + # -s ARGS or --long ARGS + else: + default = self._get_default_metavar_for_optional(action) + args_string = self._format_args(action, default) + part = ( + f"{option_color}{option_string} " + f"{t.summary_label}{args_string}{t.reset}" + ) - # make it look optional if it's not required or in a group - if not action.required and action not in group_actions: - part = '[%s]' % part + # make it look optional if it's not required or in a group + if not (action.required or required or in_group): + part = '[%s]' % part # add the action string to the list parts.append(part) - # insert things at the necessary indices - for i in sorted(inserts, reverse=True): - parts[i:i] = [inserts[i]] - - # join all the action items with spaces - text = ' '.join([item for item in parts if item is not None]) - - # clean up separators for mutually exclusive groups - open = r'[\[(]' - close = r'[\])]' - text = _re.sub(r'(%s) ' % open, r'\1', text) - text = _re.sub(r' (%s)' % close, r'\1', text) - text = _re.sub(r'%s *%s' % (open, close), r'', text) - text = text.strip() + if in_group: + parts[start] = ('(' if required else '[') + parts[start] + for i in range(start, len(parts) - 1): + parts[i] += ' |' + parts[-1] += ')' if required else ']' - # return the text - return text + if pos_start is None: + pos_start = len(parts) + return parts, pos_start def _format_text(self, text): if '%(prog)' in text: @@ -518,6 +531,7 @@ def _format_action(self, action): help_width = max(self._width - help_position, 11) action_width = help_position - self._current_indent - 2 action_header = self._format_action_invocation(action) + action_header_no_color = self._decolor(action_header) # no help; start on same line and add a final newline if not action.help: @@ -525,9 +539,15 @@ def _format_action(self, action): action_header = '%*s%s\n' % tup # short action name; start on the same line and pad two spaces - elif len(action_header) <= action_width: - tup = self._current_indent, '', action_width, action_header + elif len(action_header_no_color) <= action_width: + # calculate widths without color codes + action_header_color = action_header + tup = self._current_indent, '', action_width, action_header_no_color action_header = '%*s%-*s ' % tup + # swap in the colored header + action_header = action_header.replace( + action_header_no_color, action_header_color + ) indent_first = 0 # long action name; start on the next line @@ -560,35 +580,48 @@ def _format_action(self, action): return self._join_parts(parts) def _format_action_invocation(self, action): + t = self._theme + if not action.option_strings: default = self._get_default_metavar_for_positional(action) - metavar, = self._metavar_formatter(action, default)(1) - return metavar + return ( + t.action + + ' '.join(self._metavar_formatter(action, default)(1)) + + t.reset + ) else: - parts = [] + + def color_option_strings(strings): + parts = [] + for s in strings: + if self._is_long_option(s): + parts.append(f"{t.long_option}{s}{t.reset}") + else: + parts.append(f"{t.short_option}{s}{t.reset}") + return parts # if the Optional doesn't take a value, format is: # -s, --long if action.nargs == 0: - parts.extend(action.option_strings) + option_strings = color_option_strings(action.option_strings) + return ', '.join(option_strings) # if the Optional takes a value, format is: - # -s ARGS, --long ARGS + # -s, --long ARGS else: default = self._get_default_metavar_for_optional(action) - args_string = self._format_args(action, default) - for option_string in action.option_strings: - parts.append('%s %s' % (option_string, args_string)) - - return ', '.join(parts) + option_strings = color_option_strings(action.option_strings) + args_string = ( + f"{t.label}{self._format_args(action, default)}{t.reset}" + ) + return ', '.join(option_strings) + ' ' + args_string def _metavar_formatter(self, action, default_metavar): if action.metavar is not None: result = action.metavar elif action.choices is not None: - choice_strs = [str(choice) for choice in action.choices] - result = '{%s}' % ','.join(choice_strs) + result = '{%s}' % ','.join(map(str, action.choices)) else: result = default_metavar @@ -628,17 +661,19 @@ def _format_args(self, action, default_metavar): return result def _expand_help(self, action): + help_string = self._get_help_string(action) + if '%' not in help_string: + return help_string params = dict(vars(action), prog=self._prog) for name in list(params): - if params[name] is SUPPRESS: + value = params[name] + if value is SUPPRESS: del params[name] - for name in list(params): - if hasattr(params[name], '__name__'): - params[name] = params[name].__name__ + elif hasattr(value, '__name__'): + params[name] = value.__name__ if params.get('choices') is not None: - choices_str = ', '.join([str(c) for c in params['choices']]) - params['choices'] = choices_str - return self._get_help_string(action) % params + params['choices'] = ', '.join(map(str, params['choices'])) + return help_string % params def _iter_indented_subactions(self, action): try: @@ -704,23 +739,18 @@ class ArgumentDefaultsHelpFormatter(HelpFormatter): """ def _get_help_string(self, action): - """ - Add the default value to the option help message. - - ArgumentDefaultsHelpFormatter and BooleanOptionalAction when it isn't - already present. This code will do that, detecting cornercases to - prevent duplicates or cases where it wouldn't make sense to the end - user. - """ help = action.help if help is None: help = '' - if '%(default)' not in help: - if action.default is not SUPPRESS: - defaulting_nargs = [OPTIONAL, ZERO_OR_MORE] - if action.option_strings or action.nargs in defaulting_nargs: - help += ' (default: %(default)s)' + if ( + '%(default)' not in help + and action.default is not SUPPRESS + and not action.required + ): + defaulting_nargs = (OPTIONAL, ZERO_OR_MORE) + if action.option_strings or action.nargs in defaulting_nargs: + help += _(' (default: %(default)s)') return help @@ -750,11 +780,19 @@ def _get_action_name(argument): elif argument.option_strings: return '/'.join(argument.option_strings) elif argument.metavar not in (None, SUPPRESS): - return argument.metavar + metavar = argument.metavar + if not isinstance(metavar, tuple): + return metavar + if argument.nargs == ZERO_OR_MORE and len(metavar) == 2: + return '%s[, %s]' % metavar + elif argument.nargs == ONE_OR_MORE: + return '%s[, %s]' % metavar + else: + return ', '.join(metavar) elif argument.dest not in (None, SUPPRESS): return argument.dest elif argument.choices: - return '{' + ','.join(argument.choices) + '}' + return '{%s}' % ','.join(map(str, argument.choices)) else: return None @@ -849,7 +887,8 @@ def __init__(self, choices=None, required=False, help=None, - metavar=None): + metavar=None, + deprecated=False): self.option_strings = option_strings self.dest = dest self.nargs = nargs @@ -860,6 +899,7 @@ def __init__(self, self.required = required self.help = help self.metavar = metavar + self.deprecated = deprecated def _get_kwargs(self): names = [ @@ -873,6 +913,7 @@ def _get_kwargs(self): 'required', 'help', 'metavar', + 'deprecated', ] return [(name, getattr(self, name)) for name in names] @@ -880,59 +921,37 @@ def format_usage(self): return self.option_strings[0] def __call__(self, parser, namespace, values, option_string=None): - raise NotImplementedError(_('.__call__() not defined')) + raise NotImplementedError('.__call__() not defined') -# FIXME: remove together with `BooleanOptionalAction` deprecated arguments. -_deprecated_default = object() - class BooleanOptionalAction(Action): def __init__(self, option_strings, dest, default=None, - type=_deprecated_default, - choices=_deprecated_default, required=False, help=None, - metavar=_deprecated_default): + deprecated=False): _option_strings = [] for option_string in option_strings: _option_strings.append(option_string) if option_string.startswith('--'): + if option_string.startswith('--no-'): + raise ValueError(f'invalid option name {option_string!r} ' + f'for BooleanOptionalAction') option_string = '--no-' + option_string[2:] _option_strings.append(option_string) - # We need `_deprecated` special value to ban explicit arguments that - # match default value. Like: - # parser.add_argument('-f', action=BooleanOptionalAction, type=int) - for field_name in ('type', 'choices', 'metavar'): - if locals()[field_name] is not _deprecated_default: - warnings._deprecated( - field_name, - "{name!r} is deprecated as of Python 3.12 and will be " - "removed in Python {remove}.", - remove=(3, 14)) - - if type is _deprecated_default: - type = None - if choices is _deprecated_default: - choices = None - if metavar is _deprecated_default: - metavar = None - super().__init__( option_strings=_option_strings, dest=dest, nargs=0, default=default, - type=type, - choices=choices, required=required, help=help, - metavar=metavar) + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): @@ -955,7 +974,8 @@ def __init__(self, choices=None, required=False, help=None, - metavar=None): + metavar=None, + deprecated=False): if nargs == 0: raise ValueError('nargs for store actions must be != 0; if you ' 'have nothing to store, actions such as store ' @@ -972,7 +992,8 @@ def __init__(self, choices=choices, required=required, help=help, - metavar=metavar) + metavar=metavar, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, values) @@ -987,7 +1008,8 @@ def __init__(self, default=None, required=False, help=None, - metavar=None): + metavar=None, + deprecated=False): super(_StoreConstAction, self).__init__( option_strings=option_strings, dest=dest, @@ -995,7 +1017,8 @@ def __init__(self, const=const, default=default, required=required, - help=help) + help=help, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, self.const) @@ -1008,14 +1031,16 @@ def __init__(self, dest, default=False, required=False, - help=None): + help=None, + deprecated=False): super(_StoreTrueAction, self).__init__( option_strings=option_strings, dest=dest, const=True, - default=default, + deprecated=deprecated, required=required, - help=help) + help=help, + default=default) class _StoreFalseAction(_StoreConstAction): @@ -1025,14 +1050,16 @@ def __init__(self, dest, default=True, required=False, - help=None): + help=None, + deprecated=False): super(_StoreFalseAction, self).__init__( option_strings=option_strings, dest=dest, const=False, default=default, required=required, - help=help) + help=help, + deprecated=deprecated) class _AppendAction(Action): @@ -1047,7 +1074,8 @@ def __init__(self, choices=None, required=False, help=None, - metavar=None): + metavar=None, + deprecated=False): if nargs == 0: raise ValueError('nargs for append actions must be != 0; if arg ' 'strings are not supplying the value to append, ' @@ -1064,7 +1092,8 @@ def __init__(self, choices=choices, required=required, help=help, - metavar=metavar) + metavar=metavar, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): items = getattr(namespace, self.dest, None) @@ -1082,7 +1111,8 @@ def __init__(self, default=None, required=False, help=None, - metavar=None): + metavar=None, + deprecated=False): super(_AppendConstAction, self).__init__( option_strings=option_strings, dest=dest, @@ -1091,7 +1121,8 @@ def __init__(self, default=default, required=required, help=help, - metavar=metavar) + metavar=metavar, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): items = getattr(namespace, self.dest, None) @@ -1107,14 +1138,16 @@ def __init__(self, dest, default=None, required=False, - help=None): + help=None, + deprecated=False): super(_CountAction, self).__init__( option_strings=option_strings, dest=dest, nargs=0, default=default, required=required, - help=help) + help=help, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): count = getattr(namespace, self.dest, None) @@ -1129,13 +1162,15 @@ def __init__(self, option_strings, dest=SUPPRESS, default=SUPPRESS, - help=None): + help=None, + deprecated=False): super(_HelpAction, self).__init__( option_strings=option_strings, dest=dest, default=default, nargs=0, - help=help) + help=help, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): parser.print_help() @@ -1149,7 +1184,10 @@ def __init__(self, version=None, dest=SUPPRESS, default=SUPPRESS, - help="show program's version number and exit"): + help=None, + deprecated=False): + if help is None: + help = _("show program's version number and exit") super(_VersionAction, self).__init__( option_strings=option_strings, dest=dest, @@ -1193,6 +1231,8 @@ def __init__(self, self._parser_class = parser_class self._name_parser_map = {} self._choices_actions = [] + self._deprecated = set() + self._color = True super(_SubParsersAction, self).__init__( option_strings=option_strings, @@ -1203,34 +1243,45 @@ def __init__(self, help=help, metavar=metavar) - def add_parser(self, name, **kwargs): + def add_parser(self, name, *, deprecated=False, **kwargs): # set prog from the existing prefix if kwargs.get('prog') is None: kwargs['prog'] = '%s %s' % (self._prog_prefix, name) + # set color + if kwargs.get('color') is None: + kwargs['color'] = self._color + aliases = kwargs.pop('aliases', ()) if name in self._name_parser_map: - raise ArgumentError(self, _('conflicting subparser: %s') % name) + raise ValueError(f'conflicting subparser: {name}') for alias in aliases: if alias in self._name_parser_map: - raise ArgumentError( - self, _('conflicting subparser alias: %s') % alias) + raise ValueError(f'conflicting subparser alias: {alias}') # create a pseudo-action to hold the choice help if 'help' in kwargs: help = kwargs.pop('help') choice_action = self._ChoicesPseudoAction(name, aliases, help) self._choices_actions.append(choice_action) + else: + choice_action = None # create the parser and add it to the map parser = self._parser_class(**kwargs) + if choice_action is not None: + parser._check_help(choice_action) self._name_parser_map[name] = parser # make parser available under aliases also for alias in aliases: self._name_parser_map[alias] = parser + if deprecated: + self._deprecated.add(name) + self._deprecated.update(aliases) + return parser def _get_subactions(self): @@ -1246,13 +1297,17 @@ def __call__(self, parser, namespace, values, option_string=None): # select the parser try: - parser = self._name_parser_map[parser_name] + subparser = self._name_parser_map[parser_name] except KeyError: args = {'parser_name': parser_name, 'choices': ', '.join(self._name_parser_map)} msg = _('unknown parser %(parser_name)r (choices: %(choices)s)') % args raise ArgumentError(self, msg) + if parser_name in self._deprecated: + parser._warning(_("command '%(parser_name)s' is deprecated") % + {'parser_name': parser_name}) + # parse all the remaining options into the namespace # store any unrecognized options on the object, so that the top # level parser can decide what to do with them @@ -1260,12 +1315,13 @@ def __call__(self, parser, namespace, values, option_string=None): # In case this subparser defines new defaults, we parse them # in a new namespace object and then update the original # namespace for the relevant parts. - subnamespace, arg_strings = parser.parse_known_args(arg_strings, None) + subnamespace, arg_strings = subparser.parse_known_args(arg_strings, None) for key, value in vars(subnamespace).items(): setattr(namespace, key, value) if arg_strings: - vars(namespace).setdefault(_UNRECOGNIZED_ARGS_ATTR, []) + if not hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR): + setattr(namespace, _UNRECOGNIZED_ARGS_ATTR, []) getattr(namespace, _UNRECOGNIZED_ARGS_ATTR).extend(arg_strings) class _ExtendAction(_AppendAction): @@ -1280,7 +1336,7 @@ def __call__(self, parser, namespace, values, option_string=None): # ============== class FileType(object): - """Factory for creating file object types + """Deprecated factory for creating file object types Instances of FileType are typically passed as type= arguments to the ArgumentParser add_argument() method. @@ -1297,6 +1353,12 @@ class FileType(object): """ def __init__(self, mode='r', bufsize=-1, encoding=None, errors=None): + import warnings + warnings.warn( + "FileType is deprecated. Simply open files after parsing arguments.", + category=PendingDeprecationWarning, + stacklevel=2 + ) self._mode = mode self._bufsize = bufsize self._encoding = encoding @@ -1400,7 +1462,7 @@ def __init__(self, self._defaults = {} # determines whether an "option" looks like a negative number - self._negative_number_matcher = _re.compile(r'^-\d+$|^-\d*\.\d+$') + self._negative_number_matcher = _re.compile(r'-\.?\d') # whether or not there are any optionals that look like negative # numbers -- uses a list so it can be shared and edited @@ -1409,6 +1471,7 @@ def __init__(self, # ==================== # Registration methods # ==================== + def register(self, registry_name, value, object): registry = self._registries.setdefault(registry_name, {}) registry[value] = object @@ -1419,6 +1482,7 @@ def _registry_get(self, registry_name, value, default=None): # ================================== # Namespace default accessor methods # ================================== + def set_defaults(self, **kwargs): self._defaults.update(kwargs) @@ -1438,6 +1502,7 @@ def get_default(self, dest): # ======================= # Adding argument actions # ======================= + def add_argument(self, *args, **kwargs): """ add_argument(dest, ..., name=value, ...) @@ -1450,7 +1515,8 @@ def add_argument(self, *args, **kwargs): chars = self.prefix_chars if not args or len(args) == 1 and args[0][0] not in chars: if args and 'dest' in kwargs: - raise ValueError('dest supplied twice for positional argument') + raise TypeError('dest supplied twice for positional argument,' + ' did you mean metavar?') kwargs = self._get_positional_kwargs(*args, **kwargs) # otherwise, we're adding an optional argument @@ -1466,27 +1532,34 @@ def add_argument(self, *args, **kwargs): kwargs['default'] = self.argument_default # create the action object, and add it to the parser + action_name = kwargs.get('action') action_class = self._pop_action_class(kwargs) if not callable(action_class): - raise ValueError('unknown action "%s"' % (action_class,)) + raise ValueError(f'unknown action {action_class!r}') action = action_class(**kwargs) + # raise an error if action for positional argument does not + # consume arguments + if not action.option_strings and action.nargs == 0: + raise ValueError(f'action {action_name!r} is not valid for positional arguments') + # raise an error if the action type is not callable type_func = self._registry_get('type', action.type, action.type) if not callable(type_func): - raise ValueError('%r is not callable' % (type_func,)) + raise TypeError(f'{type_func!r} is not callable') if type_func is FileType: - raise ValueError('%r is a FileType class object, instance of it' - ' must be passed' % (type_func,)) + raise TypeError(f'{type_func!r} is a FileType class object, ' + f'instance of it must be passed') # raise an error if the metavar does not match the type - if hasattr(self, "_get_formatter"): + if hasattr(self, "_get_validation_formatter"): + formatter = self._get_validation_formatter() try: - self._get_formatter()._format_args(action, None) + formatter._format_args(action, None) except TypeError: raise ValueError("length of metavar tuple does not match nargs") - + self._check_help(action) return self._add_action(action) def add_argument_group(self, *args, **kwargs): @@ -1528,8 +1601,10 @@ def _add_container_actions(self, container): title_group_map = {} for group in self._action_groups: if group.title in title_group_map: - msg = _('cannot merge actions - two groups are named %r') - raise ValueError(msg % (group.title)) + # This branch could happen if a derived class added + # groups with duplicated titles in __init__ + msg = f'cannot merge actions - two groups are named {group.title!r}' + raise ValueError(msg) title_group_map[group.title] = group # map each action to its group @@ -1552,7 +1627,11 @@ def _add_container_actions(self, container): # NOTE: if add_mutually_exclusive_group ever gains title= and # description= then this code will need to be expanded as above for group in container._mutually_exclusive_groups: - mutex_group = self.add_mutually_exclusive_group( + if group._container is container: + cont = self + else: + cont = title_group_map[group._container.title] + mutex_group = cont.add_mutually_exclusive_group( required=group.required) # map the actions to their new mutex group @@ -1566,14 +1645,15 @@ def _add_container_actions(self, container): def _get_positional_kwargs(self, dest, **kwargs): # make sure required is not specified if 'required' in kwargs: - msg = _("'required' is an invalid argument for positionals") + msg = "'required' is an invalid argument for positionals" raise TypeError(msg) # mark positional arguments as required if at least one is # always required - if kwargs.get('nargs') not in [OPTIONAL, ZERO_OR_MORE]: - kwargs['required'] = True - if kwargs.get('nargs') == ZERO_OR_MORE and 'default' not in kwargs: + nargs = kwargs.get('nargs') + if nargs == 0: + raise ValueError('nargs for positionals must be != 0') + if nargs not in [OPTIONAL, ZERO_OR_MORE, REMAINDER, SUPPRESS]: kwargs['required'] = True # return the keyword arguments with no option strings @@ -1586,11 +1666,9 @@ def _get_optional_kwargs(self, *args, **kwargs): for option_string in args: # error on strings that don't start with an appropriate prefix if not option_string[0] in self.prefix_chars: - args = {'option': option_string, - 'prefix_chars': self.prefix_chars} - msg = _('invalid option string %(option)r: ' - 'must start with a character %(prefix_chars)r') - raise ValueError(msg % args) + raise ValueError( + f'invalid option string {option_string!r}: ' + f'must start with a character {self.prefix_chars!r}') # strings starting with two prefix characters are long options option_strings.append(option_string) @@ -1606,8 +1684,8 @@ def _get_optional_kwargs(self, *args, **kwargs): dest_option_string = option_strings[0] dest = dest_option_string.lstrip(self.prefix_chars) if not dest: - msg = _('dest= is required for options like %r') - raise ValueError(msg % option_string) + msg = f'dest= is required for options like {option_string!r}' + raise TypeError(msg) dest = dest.replace('-', '_') # return the updated keyword arguments @@ -1623,8 +1701,8 @@ def _get_handler(self): try: return getattr(self, handler_func_name) except AttributeError: - msg = _('invalid conflict_resolution value: %r') - raise ValueError(msg % self.conflict_handler) + msg = f'invalid conflict_resolution value: {self.conflict_handler!r}' + raise ValueError(msg) def _check_conflict(self, action): @@ -1663,10 +1741,26 @@ def _handle_conflict_resolve(self, action, conflicting_actions): if not action.option_strings: action.container._remove_action(action) + def _check_help(self, action): + if action.help and hasattr(self, "_get_validation_formatter"): + formatter = self._get_validation_formatter() + try: + formatter._expand_help(action) + except (ValueError, TypeError, KeyError) as exc: + raise ValueError('badly formed help string') from exc + class _ArgumentGroup(_ActionsContainer): def __init__(self, container, title=None, description=None, **kwargs): + if 'prefix_chars' in kwargs: + import warnings + depr_msg = ( + "The use of the undocumented 'prefix_chars' parameter in " + "ArgumentParser.add_argument_group() is deprecated." + ) + warnings.warn(depr_msg, DeprecationWarning, stacklevel=3) + # add any missing keyword arguments by checking the container update = kwargs.setdefault update('conflict_handler', container.conflict_handler) @@ -1698,13 +1792,7 @@ def _remove_action(self, action): self._group_actions.remove(action) def add_argument_group(self, *args, **kwargs): - warnings.warn( - "Nesting argument groups is deprecated.", - category=DeprecationWarning, - stacklevel=2 - ) - return super().add_argument_group(*args, **kwargs) - + raise ValueError('argument groups cannot be nested') class _MutuallyExclusiveGroup(_ArgumentGroup): @@ -1715,7 +1803,7 @@ def __init__(self, container, required=False): def _add_action(self, action): if action.required: - msg = _('mutually exclusive arguments must be optional') + msg = 'mutually exclusive arguments must be optional' raise ValueError(msg) action = self._container._add_action(action) self._group_actions.append(action) @@ -1725,13 +1813,29 @@ def _remove_action(self, action): self._container._remove_action(action) self._group_actions.remove(action) - def add_mutually_exclusive_group(self, *args, **kwargs): - warnings.warn( - "Nesting mutually exclusive groups is deprecated.", - category=DeprecationWarning, - stacklevel=2 - ) - return super().add_mutually_exclusive_group(*args, **kwargs) + def add_mutually_exclusive_group(self, **kwargs): + raise ValueError('mutually exclusive groups cannot be nested') + +def _prog_name(prog=None): + if prog is not None: + return prog + arg0 = _sys.argv[0] + try: + modspec = _sys.modules['__main__'].__spec__ + except (KeyError, AttributeError): + # possibly PYTHONSTARTUP or -X presite or other weird edge case + # no good answer here, so fall back to the default + modspec = None + if modspec is None: + # simple script + return _os.path.basename(arg0) + py = _os.path.basename(_sys.executable) + if modspec.name != '__main__': + # imported module or package + modname = modspec.name.removesuffix('.__main__') + return f'{py} -m {modname}' + # directory or ZIP file + return f'{py} {arg0}' class ArgumentParser(_AttributeHolder, _ActionsContainer): @@ -1754,6 +1858,9 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer): - allow_abbrev -- Allow long options to be abbreviated unambiguously - exit_on_error -- Determines whether or not ArgumentParser exits with error info when an error occurs + - suggest_on_error - Enables suggestions for mistyped argument choices + and subparser names (default: ``False``) + - color - Allow color output in help messages (default: ``False``) """ def __init__(self, @@ -1769,19 +1876,18 @@ def __init__(self, conflict_handler='error', add_help=True, allow_abbrev=True, - exit_on_error=True): - + exit_on_error=True, + *, + suggest_on_error=False, + color=True, + ): superinit = super(ArgumentParser, self).__init__ superinit(description=description, prefix_chars=prefix_chars, argument_default=argument_default, conflict_handler=conflict_handler) - # default setting for prog - if prog is None: - prog = _os.path.basename(_sys.argv[0]) - - self.prog = prog + self.prog = _prog_name(prog) self.usage = usage self.epilog = epilog self.formatter_class = formatter_class @@ -1789,6 +1895,11 @@ def __init__(self, self.add_help = add_help self.allow_abbrev = allow_abbrev self.exit_on_error = exit_on_error + self.suggest_on_error = suggest_on_error + self.color = color + + # Cached formatter for validation (avoids repeated _set_color calls) + self._cached_formatter = None add_group = self.add_argument_group self._positionals = add_group(_('positional arguments')) @@ -1796,9 +1907,7 @@ def __init__(self, self._subparsers = None # register types - def identity(string): - return string - self.register('type', None, identity) + self.register('type', None, _identity) # add help argument if necessary # (using explicit default to override global argument_default) @@ -1811,17 +1920,16 @@ def identity(string): # add parent arguments and defaults for parent in parents: + if not isinstance(parent, ArgumentParser): + raise TypeError('parents must be a list of ArgumentParser') self._add_container_actions(parent) - try: - defaults = parent._defaults - except AttributeError: - pass - else: - self._defaults.update(defaults) + defaults = parent._defaults + self._defaults.update(defaults) # ======================= # Pretty __repr__ methods # ======================= + def _get_kwargs(self): names = [ 'prog', @@ -1836,16 +1944,17 @@ def _get_kwargs(self): # ================================== # Optional/Positional adding methods # ================================== + def add_subparsers(self, **kwargs): if self._subparsers is not None: - self.error(_('cannot have multiple subparser arguments')) + raise ValueError('cannot have multiple subparser arguments') # add the parser class to the arguments if it's not present kwargs.setdefault('parser_class', type(self)) if 'title' in kwargs or 'description' in kwargs: - title = _(kwargs.pop('title', 'subcommands')) - description = _(kwargs.pop('description', None)) + title = kwargs.pop('title', _('subcommands')) + description = kwargs.pop('description', None) self._subparsers = self.add_argument_group(title, description) else: self._subparsers = self._positionals @@ -1853,15 +1962,19 @@ def add_subparsers(self, **kwargs): # prog defaults to the usage message of this parser, skipping # optional arguments and with no "usage:" prefix if kwargs.get('prog') is None: - formatter = self._get_formatter() + # Create formatter without color to avoid storing ANSI codes in prog + formatter = self.formatter_class(prog=self.prog) + formatter._set_color(False) positionals = self._get_positional_actions() groups = self._mutually_exclusive_groups - formatter.add_usage(self.usage, positionals, groups, '') + formatter.add_usage(None, positionals, groups, '') kwargs['prog'] = formatter.format_help().strip() # create the parsers action and add it to the positionals list parsers_class = self._pop_action_class(kwargs, 'parsers') action = parsers_class(option_strings=[], **kwargs) + action._color = self.color + self._check_help(action) self._subparsers._add_action(action) # return the created parsers action @@ -1887,14 +2000,21 @@ def _get_positional_actions(self): # ===================================== # Command line argument parsing methods # ===================================== + def parse_args(self, args=None, namespace=None): args, argv = self.parse_known_args(args, namespace) if argv: - msg = _('unrecognized arguments: %s') - self.error(msg % ' '.join(argv)) + msg = _('unrecognized arguments: %s') % ' '.join(argv) + if self.exit_on_error: + self.error(msg) + else: + raise ArgumentError(None, msg) return args def parse_known_args(self, args=None, namespace=None): + return self._parse_known_args2(args, namespace, intermixed=False) + + def _parse_known_args2(self, args, namespace, intermixed): if args is None: # args default to the system args args = _sys.argv[1:] @@ -1921,18 +2041,18 @@ def parse_known_args(self, args=None, namespace=None): # parse the arguments and exit if there are any errors if self.exit_on_error: try: - namespace, args = self._parse_known_args(args, namespace) + namespace, args = self._parse_known_args(args, namespace, intermixed) except ArgumentError as err: self.error(str(err)) else: - namespace, args = self._parse_known_args(args, namespace) + namespace, args = self._parse_known_args(args, namespace, intermixed) if hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR): args.extend(getattr(namespace, _UNRECOGNIZED_ARGS_ATTR)) delattr(namespace, _UNRECOGNIZED_ARGS_ATTR) return namespace, args - def _parse_known_args(self, arg_strings, namespace): + def _parse_known_args(self, arg_strings, namespace, intermixed): # replace arg strings that are file references if self.fromfile_prefix_chars is not None: arg_strings = self._read_args_from_files(arg_strings) @@ -1964,11 +2084,11 @@ def _parse_known_args(self, arg_strings, namespace): # otherwise, add the arg to the arg strings # and note the index if it was an option else: - option_tuple = self._parse_optional(arg_string) - if option_tuple is None: + option_tuples = self._parse_optional(arg_string) + if option_tuples is None: pattern = 'A' else: - option_string_indices[i] = option_tuple + option_string_indices[i] = option_tuples pattern = 'O' arg_string_pattern_parts.append(pattern) @@ -1978,15 +2098,15 @@ def _parse_known_args(self, arg_strings, namespace): # converts arg strings to the appropriate and then takes the action seen_actions = set() seen_non_default_actions = set() + warned = set() def take_action(action, argument_strings, option_string=None): seen_actions.add(action) argument_values = self._get_values(action, argument_strings) # error if this argument is not allowed with other previously - # seen arguments, assuming that actions that use the default - # value don't really count as "present" - if argument_values is not action.default: + # seen arguments + if action.option_strings or argument_strings: seen_non_default_actions.add(action) for conflict_action in action_conflicts.get(action, []): if conflict_action in seen_non_default_actions: @@ -2003,8 +2123,16 @@ def take_action(action, argument_strings, option_string=None): def consume_optional(start_index): # get the optional identified at this index - option_tuple = option_string_indices[start_index] - action, option_string, explicit_arg = option_tuple + option_tuples = option_string_indices[start_index] + # if multiple actions match, the option string was ambiguous + if len(option_tuples) > 1: + options = ', '.join([option_string + for action, option_string, sep, explicit_arg in option_tuples]) + args = {'option': arg_strings[start_index], 'matches': options} + msg = _('ambiguous option: %(option)s could match %(matches)s') + raise ArgumentError(None, msg % args) + + action, option_string, sep, explicit_arg = option_tuples[0] # identify additional optionals in the same arg string # (e.g. -xyz is the same as -x -y -z if no args are required) @@ -2015,6 +2143,7 @@ def consume_optional(start_index): # if we found no optional action, skip it if action is None: extras.append(arg_strings[start_index]) + extras_pattern.append('O') return start_index + 1 # if there is an explicit argument, try to match the @@ -2031,18 +2160,28 @@ def consume_optional(start_index): and option_string[1] not in chars and explicit_arg != '' ): + if sep or explicit_arg[0] in chars: + msg = _('ignored explicit argument %r') + raise ArgumentError(action, msg % explicit_arg) action_tuples.append((action, [], option_string)) char = option_string[0] option_string = char + explicit_arg[0] - new_explicit_arg = explicit_arg[1:] or None optionals_map = self._option_string_actions if option_string in optionals_map: action = optionals_map[option_string] - explicit_arg = new_explicit_arg + explicit_arg = explicit_arg[1:] + if not explicit_arg: + sep = explicit_arg = None + elif explicit_arg[0] == '=': + sep = '=' + explicit_arg = explicit_arg[1:] + else: + sep = '' else: - msg = _('ignored explicit argument %r') - raise ArgumentError(action, msg % explicit_arg) - + extras.append(char + explicit_arg) + extras_pattern.append('O') + stop = start_index + 1 + break # if the action expect exactly one argument, we've # successfully matched the option; exit the loop elif arg_count == 1: @@ -2073,6 +2212,10 @@ def consume_optional(start_index): # the Optional's string args stopped assert action_tuples for action, args, option_string in action_tuples: + if action.deprecated and option_string not in warned: + self._warning(_("option '%(option)s' is deprecated") % + {'option': option_string}) + warned.add(option_string) take_action(action, args, option_string) return stop @@ -2091,7 +2234,20 @@ def consume_positionals(start_index): # and add the Positional and its args to the list for action, arg_count in zip(positionals, arg_counts): args = arg_strings[start_index: start_index + arg_count] + # Strip out the first '--' if it is not in REMAINDER arg. + if action.nargs == PARSER: + if arg_strings_pattern[start_index] == '-': + assert args[0] == '--' + args.remove('--') + elif action.nargs != REMAINDER: + if (arg_strings_pattern.find('-', start_index, + start_index + arg_count) >= 0): + args.remove('--') start_index += arg_count + if args and action.deprecated and action.dest not in warned: + self._warning(_("argument '%(argument_name)s' is deprecated") % + {'argument_name': action.dest}) + warned.add(action.dest) take_action(action, args) # slice off the Positionals that we just parsed and return the @@ -2102,6 +2258,7 @@ def consume_positionals(start_index): # consume Positionals and Optionals alternately, until we have # passed the last option string extras = [] + extras_pattern = [] start_index = 0 if option_string_indices: max_option_string_index = max(option_string_indices) @@ -2110,11 +2267,12 @@ def consume_positionals(start_index): while start_index <= max_option_string_index: # consume any Positionals preceding the next option - next_option_string_index = min([ - index - for index in option_string_indices - if index >= start_index]) - if start_index != next_option_string_index: + next_option_string_index = start_index + while next_option_string_index <= max_option_string_index: + if next_option_string_index in option_string_indices: + break + next_option_string_index += 1 + if not intermixed and start_index != next_option_string_index: positionals_end_index = consume_positionals(start_index) # only try to parse the next optional if we didn't consume @@ -2130,16 +2288,35 @@ def consume_positionals(start_index): if start_index not in option_string_indices: strings = arg_strings[start_index:next_option_string_index] extras.extend(strings) + extras_pattern.extend(arg_strings_pattern[start_index:next_option_string_index]) start_index = next_option_string_index # consume the next optional and any arguments for it start_index = consume_optional(start_index) - # consume any positionals following the last Optional - stop_index = consume_positionals(start_index) + if not intermixed: + # consume any positionals following the last Optional + stop_index = consume_positionals(start_index) - # if we didn't consume all the argument strings, there were extras - extras.extend(arg_strings[stop_index:]) + # if we didn't consume all the argument strings, there were extras + extras.extend(arg_strings[stop_index:]) + else: + extras.extend(arg_strings[start_index:]) + extras_pattern.extend(arg_strings_pattern[start_index:]) + extras_pattern = ''.join(extras_pattern) + assert len(extras_pattern) == len(extras) + # consume all positionals + arg_strings = [s for s, c in zip(extras, extras_pattern) if c != 'O'] + arg_strings_pattern = extras_pattern.replace('O', '') + stop_index = consume_positionals(0) + # leave unknown optionals and non-consumed positionals in extras + for i, c in enumerate(extras_pattern): + if not stop_index: + break + if c != 'O': + stop_index -= 1 + extras[i] = None + extras = [s for s in extras if s is not None] # make sure all required actions were present and also convert # action defaults which were not given as arguments @@ -2161,7 +2338,7 @@ def consume_positionals(start_index): self._get_value(action, action.default)) if required_actions: - self.error(_('the following arguments are required: %s') % + raise ArgumentError(None, _('the following arguments are required: %s') % ', '.join(required_actions)) # make sure all required groups had one option present @@ -2177,7 +2354,7 @@ def consume_positionals(start_index): for action in group._group_actions if action.help is not SUPPRESS] msg = _('one of the arguments %s is required') - self.error(msg % ' '.join(names)) + raise ArgumentError(None, msg % ' '.join(names)) # return the updated namespace and the extra arguments return namespace, extras @@ -2204,7 +2381,7 @@ def _read_args_from_files(self, arg_strings): arg_strings = self._read_args_from_files(arg_strings) new_arg_strings.extend(arg_strings) except OSError as err: - self.error(str(err)) + raise ArgumentError(None, str(err)) # return the modified argument list return new_arg_strings @@ -2237,18 +2414,19 @@ def _match_argument(self, action, arg_strings_pattern): def _match_arguments_partial(self, actions, arg_strings_pattern): # progressively shorten the actions list by slicing off the # final actions until we find a match - result = [] for i in range(len(actions), 0, -1): actions_slice = actions[:i] pattern = ''.join([self._get_nargs_pattern(action) for action in actions_slice]) match = _re.match(pattern, arg_strings_pattern) if match is not None: - result.extend([len(string) for string in match.groups()]) - break - - # return the list of arg string counts - return result + result = [len(string) for string in match.groups()] + if (match.end() < len(arg_strings_pattern) + and arg_strings_pattern[match.end()] == 'O'): + while result and not result[-1]: + del result[-1] + return result + return [] def _parse_optional(self, arg_string): # if it's an empty string, it was meant to be a positional @@ -2262,36 +2440,24 @@ def _parse_optional(self, arg_string): # if the option string is present in the parser, return the action if arg_string in self._option_string_actions: action = self._option_string_actions[arg_string] - return action, arg_string, None + return [(action, arg_string, None, None)] # if it's just a single character, it was meant to be positional if len(arg_string) == 1: return None # if the option string before the "=" is present, return the action - if '=' in arg_string: - option_string, explicit_arg = arg_string.split('=', 1) - if option_string in self._option_string_actions: - action = self._option_string_actions[option_string] - return action, option_string, explicit_arg + option_string, sep, explicit_arg = arg_string.partition('=') + if sep and option_string in self._option_string_actions: + action = self._option_string_actions[option_string] + return [(action, option_string, sep, explicit_arg)] # search through all possible prefixes of the option string # and all actions in the parser for possible interpretations option_tuples = self._get_option_tuples(arg_string) - # if multiple actions match, the option string was ambiguous - if len(option_tuples) > 1: - options = ', '.join([option_string - for action, option_string, explicit_arg in option_tuples]) - args = {'option': arg_string, 'matches': options} - msg = _('ambiguous option: %(option)s could match %(matches)s') - self.error(msg % args) - - # if exactly one action matched, this segmentation is good, - # so return the parsed action - elif len(option_tuples) == 1: - option_tuple, = option_tuples - return option_tuple + if option_tuples: + return option_tuples # if it was not found as an option, but it looks like a negative # number, it was meant to be positional @@ -2306,7 +2472,7 @@ def _parse_optional(self, arg_string): # it was meant to be an optional but there is no such option # in this parser (though it might be a valid option in a subparser) - return None, arg_string, None + return [(None, arg_string, None, None)] def _get_option_tuples(self, option_string): result = [] @@ -2316,39 +2482,38 @@ def _get_option_tuples(self, option_string): chars = self.prefix_chars if option_string[0] in chars and option_string[1] in chars: if self.allow_abbrev: - if '=' in option_string: - option_prefix, explicit_arg = option_string.split('=', 1) - else: - option_prefix = option_string - explicit_arg = None + option_prefix, sep, explicit_arg = option_string.partition('=') + if not sep: + sep = explicit_arg = None for option_string in self._option_string_actions: if option_string.startswith(option_prefix): action = self._option_string_actions[option_string] - tup = action, option_string, explicit_arg + tup = action, option_string, sep, explicit_arg result.append(tup) # single character options can be concatenated with their arguments # but multiple character options always have to have their argument # separate elif option_string[0] in chars and option_string[1] not in chars: - option_prefix = option_string - explicit_arg = None + option_prefix, sep, explicit_arg = option_string.partition('=') + if not sep: + sep = explicit_arg = None short_option_prefix = option_string[:2] short_explicit_arg = option_string[2:] for option_string in self._option_string_actions: if option_string == short_option_prefix: action = self._option_string_actions[option_string] - tup = action, option_string, short_explicit_arg + tup = action, option_string, '', short_explicit_arg result.append(tup) - elif option_string.startswith(option_prefix): + elif self.allow_abbrev and option_string.startswith(option_prefix): action = self._option_string_actions[option_string] - tup = action, option_string, explicit_arg + tup = action, option_string, sep, explicit_arg result.append(tup) # shouldn't ever get here else: - self.error(_('unexpected option string: %s') % option_string) + raise ArgumentError(None, _('unexpected option string: %s') % option_string) # return the collected option tuples return result @@ -2357,43 +2522,40 @@ def _get_nargs_pattern(self, action): # in all examples below, we have to allow for '--' args # which are represented as '-' in the pattern nargs = action.nargs + # if this is an optional action, -- is not allowed + option = action.option_strings # the default (None) is assumed to be a single argument if nargs is None: - nargs_pattern = '(-*A-*)' + nargs_pattern = '([A])' if option else '(-*A-*)' # allow zero or one arguments elif nargs == OPTIONAL: - nargs_pattern = '(-*A?-*)' + nargs_pattern = '(A?)' if option else '(-*A?-*)' # allow zero or more arguments elif nargs == ZERO_OR_MORE: - nargs_pattern = '(-*[A-]*)' + nargs_pattern = '(A*)' if option else '(-*[A-]*)' # allow one or more arguments elif nargs == ONE_OR_MORE: - nargs_pattern = '(-*A[A-]*)' + nargs_pattern = '(A+)' if option else '(-*A[A-]*)' # allow any number of options or arguments elif nargs == REMAINDER: - nargs_pattern = '([-AO]*)' + nargs_pattern = '([AO]*)' if option else '(.*)' # allow one argument followed by any number of options or arguments elif nargs == PARSER: - nargs_pattern = '(-*A[-AO]*)' + nargs_pattern = '(A[AO]*)' if option else '(-*A[-AO]*)' # suppress action, like nargs=0 elif nargs == SUPPRESS: - nargs_pattern = '(-*-*)' + nargs_pattern = '()' if option else '(-*)' # all others should be integers else: - nargs_pattern = '(-*%s-*)' % '-*'.join('A' * nargs) - - # if this is an optional action, -- is not allowed - if action.option_strings: - nargs_pattern = nargs_pattern.replace('-*', '') - nargs_pattern = nargs_pattern.replace('-', '') + nargs_pattern = '([AO]{%d})' % nargs if option else '((?:-*A){%d}-*)' % nargs # return the pattern return nargs_pattern @@ -2405,8 +2567,11 @@ def _get_nargs_pattern(self, action): def parse_intermixed_args(self, args=None, namespace=None): args, argv = self.parse_known_intermixed_args(args, namespace) if argv: - msg = _('unrecognized arguments: %s') - self.error(msg % ' '.join(argv)) + msg = _('unrecognized arguments: %s') % ' '.join(argv) + if self.exit_on_error: + self.error(msg) + else: + raise ArgumentError(None, msg) return args def parse_known_intermixed_args(self, args=None, namespace=None): @@ -2417,10 +2582,6 @@ def parse_known_intermixed_args(self, args=None, namespace=None): # are then parsed. If the parser definition is incompatible with the # intermixed assumptions (e.g. use of REMAINDER, subparsers) a # TypeError is raised. - # - # positionals are 'deactivated' by setting nargs and default to - # SUPPRESS. This blocks the addition of that positional to the - # namespace positionals = self._get_positional_actions() a = [action for action in positionals @@ -2429,80 +2590,21 @@ def parse_known_intermixed_args(self, args=None, namespace=None): raise TypeError('parse_intermixed_args: positional arg' ' with nargs=%s'%a[0].nargs) - if [action.dest for group in self._mutually_exclusive_groups - for action in group._group_actions if action in positionals]: - raise TypeError('parse_intermixed_args: positional in' - ' mutuallyExclusiveGroup') - - try: - save_usage = self.usage - try: - if self.usage is None: - # capture the full usage for use in error messages - self.usage = self.format_usage()[7:] - for action in positionals: - # deactivate positionals - action.save_nargs = action.nargs - # action.nargs = 0 - action.nargs = SUPPRESS - action.save_default = action.default - action.default = SUPPRESS - namespace, remaining_args = self.parse_known_args(args, - namespace) - for action in positionals: - # remove the empty positional values from namespace - if (hasattr(namespace, action.dest) - and getattr(namespace, action.dest)==[]): - from warnings import warn - warn('Do not expect %s in %s' % (action.dest, namespace)) - delattr(namespace, action.dest) - finally: - # restore nargs and usage before exiting - for action in positionals: - action.nargs = action.save_nargs - action.default = action.save_default - optionals = self._get_optional_actions() - try: - # parse positionals. optionals aren't normally required, but - # they could be, so make sure they aren't. - for action in optionals: - action.save_required = action.required - action.required = False - for group in self._mutually_exclusive_groups: - group.save_required = group.required - group.required = False - namespace, extras = self.parse_known_args(remaining_args, - namespace) - finally: - # restore parser values before exiting - for action in optionals: - action.required = action.save_required - for group in self._mutually_exclusive_groups: - group.required = group.save_required - finally: - self.usage = save_usage - return namespace, extras + return self._parse_known_args2(args, namespace, intermixed=True) # ======================== # Value conversion methods # ======================== - def _get_values(self, action, arg_strings): - # for everything but PARSER, REMAINDER args, strip out first '--' - if action.nargs not in [PARSER, REMAINDER]: - try: - arg_strings.remove('--') - except ValueError: - pass + def _get_values(self, action, arg_strings): # optional argument produces a default when not present if not arg_strings and action.nargs == OPTIONAL: if action.option_strings: value = action.const else: value = action.default - if isinstance(value, str): + if isinstance(value, str) and value is not SUPPRESS: value = self._get_value(action, value) - self._check_value(action, value) # when nargs='*' on a positional, if there were no command-line # args, use the default if it is anything other than None @@ -2510,11 +2612,8 @@ def _get_values(self, action, arg_strings): not action.option_strings): if action.default is not None: value = action.default - self._check_value(action, value) else: - # since arg_strings is always [] at this point - # there is no need to use self._check_value(action, value) - value = arg_strings + value = [] # single argument or optional argument produces a single value elif len(arg_strings) == 1 and action.nargs in [None, OPTIONAL]: @@ -2547,8 +2646,7 @@ def _get_values(self, action, arg_strings): def _get_value(self, action, arg_string): type_func = self._registry_get('type', action.type, action.type) if not callable(type_func): - msg = _('%r is not callable') - raise ArgumentError(action, msg % type_func) + raise TypeError(f'{type_func!r} is not callable') # convert the value to the appropriate type try: @@ -2571,15 +2669,33 @@ def _get_value(self, action, arg_string): def _check_value(self, action, value): # converted value must be one of the choices (if specified) - if action.choices is not None and value not in action.choices: - args = {'value': value, - 'choices': ', '.join(map(repr, action.choices))} + choices = action.choices + if choices is None: + return + + if isinstance(choices, str): + choices = iter(choices) + + if value not in choices: + args = {'value': str(value), + 'choices': ', '.join(repr(str(choice)) for choice in action.choices)} msg = _('invalid choice: %(value)r (choose from %(choices)s)') + + if self.suggest_on_error and isinstance(value, str): + if all(isinstance(choice, str) for choice in action.choices): + import difflib + suggestions = difflib.get_close_matches(value, action.choices, 1) + if suggestions: + args['closest'] = suggestions[0] + msg = _('invalid choice: %(value)r, maybe you meant %(closest)r? ' + '(choose from %(choices)s)') + raise ArgumentError(action, msg % args) # ======================= # Help-formatting methods # ======================= + def format_usage(self): formatter = self._get_formatter() formatter.add_usage(self.usage, self._actions, @@ -2610,11 +2726,21 @@ def format_help(self): return formatter.format_help() def _get_formatter(self): - return self.formatter_class(prog=self.prog) + formatter = self.formatter_class(prog=self.prog) + formatter._set_color(self.color) + return formatter + + def _get_validation_formatter(self): + # Return cached formatter for read-only validation operations + # (_expand_help and _format_args). Avoids repeated slow _set_color calls. + if self._cached_formatter is None: + self._cached_formatter = self._get_formatter() + return self._cached_formatter # ===================== # Help-printing methods # ===================== + def print_usage(self, file=None): if file is None: file = _sys.stdout @@ -2636,6 +2762,7 @@ def _print_message(self, message, file=None): # =============== # Exiting methods # =============== + def exit(self, status=0, message=None): if message: self._print_message(message, _sys.stderr) @@ -2653,3 +2780,7 @@ def error(self, message): self.print_usage(_sys.stderr) args = {'prog': self.prog, 'message': message} self.exit(2, _('%(prog)s: error: %(message)s\n') % args) + + def _warning(self, message): + args = {'prog': self.prog, 'message': message} + self._print_message(_('%(prog)s: warning: %(message)s\n') % args, _sys.stderr) diff --git a/Lib/ast.py b/Lib/ast.py index 07044706dc3..2f11683ecf7 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1,44 +1,38 @@ """ - ast - ~~~ - - The `ast` module helps Python applications to process trees of the Python - abstract syntax grammar. The abstract syntax itself might change with - each Python release; this module helps to find out programmatically what - the current grammar looks like and allows modifications of it. - - An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as - a flag to the `compile()` builtin function or by using the `parse()` - function from this module. The result will be a tree of objects whose - classes all inherit from `ast.AST`. - - A modified abstract syntax tree can be compiled into a Python code object - using the built-in `compile()` function. - - Additionally various helper functions are provided that make working with - the trees simpler. The main intention of the helper functions and this - module in general is to provide an easy to use interface for libraries - that work tightly with the python syntax (template engines for example). - - - :copyright: Copyright 2008 by Armin Ronacher. - :license: Python License. +The `ast` module helps Python applications to process trees of the Python +abstract syntax grammar. The abstract syntax itself might change with +each Python release; this module helps to find out programmatically what +the current grammar looks like and allows modifications of it. + +An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as +a flag to the `compile()` builtin function or by using the `parse()` +function from this module. The result will be a tree of objects whose +classes all inherit from `ast.AST`. + +A modified abstract syntax tree can be compiled into a Python code object +using the built-in `compile()` function. + +Additionally various helper functions are provided that make working with +the trees simpler. The main intention of the helper functions and this +module in general is to provide an easy to use interface for libraries +that work tightly with the python syntax (template engines for example). + +:copyright: Copyright 2008 by Armin Ronacher. +:license: Python License. """ -import sys -import re from _ast import * -from contextlib import contextmanager, nullcontext -from enum import IntEnum, auto, _simple_enum def parse(source, filename='', mode='exec', *, - type_comments=False, feature_version=None): + type_comments=False, feature_version=None, optimize=-1): """ Parse the source into an AST node. Equivalent to compile(source, filename, mode, PyCF_ONLY_AST). Pass type_comments=True to get back type comments where the syntax allows. """ flags = PyCF_ONLY_AST + if optimize > 0: + flags |= PyCF_OPTIMIZED_AST if type_comments: flags |= PyCF_TYPE_COMMENTS if feature_version is None: @@ -50,7 +44,7 @@ def parse(source, filename='', mode='exec', *, feature_version = minor # Else it should be an int giving the minor version for 3.x. return compile(source, filename, mode, flags, - _feature_version=feature_version) + _feature_version=feature_version, optimize=optimize) def literal_eval(node_or_string): @@ -112,7 +106,11 @@ def _convert(node): return _convert(node_or_string) -def dump(node, annotate_fields=True, include_attributes=False, *, indent=None): +def dump( + node, annotate_fields=True, include_attributes=False, + *, + indent=None, show_empty=False, +): """ Return a formatted dump of the tree in node. This is mainly useful for debugging purposes. If annotate_fields is true (by default), @@ -123,6 +121,8 @@ def dump(node, annotate_fields=True, include_attributes=False, *, indent=None): include_attributes can be set to true. If indent is a non-negative integer or string, then the tree will be pretty-printed with that indent level. None (the default) selects the single line representation. + If show_empty is False, then empty lists and fields that are None + will be omitted from the output for better readability. """ def _format(node, level=0): if indent is not None: @@ -135,6 +135,7 @@ def _format(node, level=0): if isinstance(node, AST): cls = type(node) args = [] + args_buffer = [] allsimple = True keywords = annotate_fields for name in node._fields: @@ -146,6 +147,16 @@ def _format(node, level=0): if value is None and getattr(cls, name, ...) is None: keywords = True continue + if not show_empty: + if value == []: + field_type = cls._field_types.get(name, object) + if getattr(field_type, '__origin__', ...) is list: + if not keywords: + args_buffer.append(repr(value)) + continue + if not keywords: + args.extend(args_buffer) + args_buffer = [] value, simple = _format(value, level) allsimple = allsimple and simple if keywords: @@ -304,12 +315,18 @@ def get_docstring(node, clean=True): return text -_line_pattern = re.compile(r"(.*?(?:\r\n|\n|\r|$))") +_line_pattern = None def _splitlines_no_ff(source, maxlines=None): """Split a string into lines ignoring form feed and other chars. This mimics how the Python parser splits source code. """ + global _line_pattern + if _line_pattern is None: + # lazily computed to speedup import time of `ast` + import re + _line_pattern = re.compile(r"(.*?(?:\r\n|\n|\r|$))") + lines = [] for lineno, match in enumerate(_line_pattern.finditer(source), 1): if maxlines is not None and lineno > maxlines: @@ -380,6 +397,88 @@ def walk(node): yield node +def compare( + a, + b, + /, + *, + compare_attributes=False, +): + """Recursively compares two ASTs. + + compare_attributes affects whether AST attributes are considered + in the comparison. If compare_attributes is False (default), then + attributes are ignored. Otherwise they must all be equal. This + option is useful to check whether the ASTs are structurally equal but + might differ in whitespace or similar details. + """ + + sentinel = object() # handle the possibility of a missing attribute/field + + def _compare(a, b): + # Compare two fields on an AST object, which may themselves be + # AST objects, lists of AST objects, or primitive ASDL types + # like identifiers and constants. + if isinstance(a, AST): + return compare( + a, + b, + compare_attributes=compare_attributes, + ) + elif isinstance(a, list): + # If a field is repeated, then both objects will represent + # the value as a list. + if len(a) != len(b): + return False + for a_item, b_item in zip(a, b): + if not _compare(a_item, b_item): + return False + else: + return True + else: + return type(a) is type(b) and a == b + + def _compare_fields(a, b): + if a._fields != b._fields: + return False + for field in a._fields: + a_field = getattr(a, field, sentinel) + b_field = getattr(b, field, sentinel) + if a_field is sentinel and b_field is sentinel: + # both nodes are missing a field at runtime + continue + if a_field is sentinel or b_field is sentinel: + # one of the node is missing a field + return False + if not _compare(a_field, b_field): + return False + else: + return True + + def _compare_attributes(a, b): + if a._attributes != b._attributes: + return False + # Attributes are always ints. + for attr in a._attributes: + a_attr = getattr(a, attr, sentinel) + b_attr = getattr(b, attr, sentinel) + if a_attr is sentinel and b_attr is sentinel: + # both nodes are missing an attribute at runtime + continue + if a_attr != b_attr: + return False + else: + return True + + if type(a) is not type(b): + return False + if not _compare_fields(a, b): + return False + if compare_attributes and not _compare_attributes(a, b): + return False + return True + + class NodeVisitor(object): """ A node visitor base class that walks the abstract syntax tree and calls a @@ -416,27 +515,6 @@ def generic_visit(self, node): elif isinstance(value, AST): self.visit(value) - def visit_Constant(self, node): - value = node.value - type_name = _const_node_type_names.get(type(value)) - if type_name is None: - for cls, name in _const_node_type_names.items(): - if isinstance(value, cls): - type_name = name - break - if type_name is not None: - method = 'visit_' + type_name - try: - visitor = getattr(self, method) - except AttributeError: - pass - else: - import warnings - warnings.warn(f"{method} is deprecated; add visit_Constant", - DeprecationWarning, 2) - return visitor(node) - return self.generic_visit(node) - class NodeTransformer(NodeVisitor): """ @@ -496,151 +574,6 @@ def generic_visit(self, node): setattr(node, field, new_node) return node - -_DEPRECATED_VALUE_ALIAS_MESSAGE = ( - "{name} is deprecated and will be removed in Python {remove}; use value instead" -) -_DEPRECATED_CLASS_MESSAGE = ( - "{name} is deprecated and will be removed in Python {remove}; " - "use ast.Constant instead" -) - - -# If the ast module is loaded more than once, only add deprecated methods once -if not hasattr(Constant, 'n'): - # The following code is for backward compatibility. - # It will be removed in future. - - def _n_getter(self): - """Deprecated. Use value instead.""" - import warnings - warnings._deprecated( - "Attribute n", message=_DEPRECATED_VALUE_ALIAS_MESSAGE, remove=(3, 14) - ) - return self.value - - def _n_setter(self, value): - import warnings - warnings._deprecated( - "Attribute n", message=_DEPRECATED_VALUE_ALIAS_MESSAGE, remove=(3, 14) - ) - self.value = value - - def _s_getter(self): - """Deprecated. Use value instead.""" - import warnings - warnings._deprecated( - "Attribute s", message=_DEPRECATED_VALUE_ALIAS_MESSAGE, remove=(3, 14) - ) - return self.value - - def _s_setter(self, value): - import warnings - warnings._deprecated( - "Attribute s", message=_DEPRECATED_VALUE_ALIAS_MESSAGE, remove=(3, 14) - ) - self.value = value - - Constant.n = property(_n_getter, _n_setter) - Constant.s = property(_s_getter, _s_setter) - -class _ABC(type): - - def __init__(cls, *args): - cls.__doc__ = """Deprecated AST node class. Use ast.Constant instead""" - - def __instancecheck__(cls, inst): - if cls in _const_types: - import warnings - warnings._deprecated( - f"ast.{cls.__qualname__}", - message=_DEPRECATED_CLASS_MESSAGE, - remove=(3, 14) - ) - if not isinstance(inst, Constant): - return False - if cls in _const_types: - try: - value = inst.value - except AttributeError: - return False - else: - return ( - isinstance(value, _const_types[cls]) and - not isinstance(value, _const_types_not.get(cls, ())) - ) - return type.__instancecheck__(cls, inst) - -def _new(cls, *args, **kwargs): - for key in kwargs: - if key not in cls._fields: - # arbitrary keyword arguments are accepted - continue - pos = cls._fields.index(key) - if pos < len(args): - raise TypeError(f"{cls.__name__} got multiple values for argument {key!r}") - if cls in _const_types: - import warnings - warnings._deprecated( - f"ast.{cls.__qualname__}", message=_DEPRECATED_CLASS_MESSAGE, remove=(3, 14) - ) - return Constant(*args, **kwargs) - return Constant.__new__(cls, *args, **kwargs) - -class Num(Constant, metaclass=_ABC): - _fields = ('n',) - __new__ = _new - -class Str(Constant, metaclass=_ABC): - _fields = ('s',) - __new__ = _new - -class Bytes(Constant, metaclass=_ABC): - _fields = ('s',) - __new__ = _new - -class NameConstant(Constant, metaclass=_ABC): - __new__ = _new - -class Ellipsis(Constant, metaclass=_ABC): - _fields = () - - def __new__(cls, *args, **kwargs): - if cls is _ast_Ellipsis: - import warnings - warnings._deprecated( - "ast.Ellipsis", message=_DEPRECATED_CLASS_MESSAGE, remove=(3, 14) - ) - return Constant(..., *args, **kwargs) - return Constant.__new__(cls, *args, **kwargs) - -# Keep another reference to Ellipsis in the global namespace -# so it can be referenced in Ellipsis.__new__ -# (The original "Ellipsis" name is removed from the global namespace later on) -_ast_Ellipsis = Ellipsis - -_const_types = { - Num: (int, float, complex), - Str: (str,), - Bytes: (bytes,), - NameConstant: (type(None), bool), - Ellipsis: (type(...),), -} -_const_types_not = { - Num: (bool,), -} - -_const_node_type_names = { - bool: 'NameConstant', # should be before int - type(None): 'NameConstant', - int: 'Num', - float: 'Num', - complex: 'Num', - str: 'Str', - bytes: 'Bytes', - type(...): 'Ellipsis', -} - class slice(AST): """Deprecated AST node class.""" @@ -681,1132 +614,22 @@ class Param(expr_context): """Deprecated AST node class. Unused in Python 3.""" -# Large float and imaginary literals get turned into infinities in the AST. -# We unparse those infinities to INFSTR. -_INFSTR = "1e" + repr(sys.float_info.max_10_exp + 1) - -@_simple_enum(IntEnum) -class _Precedence: - """Precedence table that originated from python grammar.""" - - NAMED_EXPR = auto() # := - TUPLE = auto() # , - YIELD = auto() # 'yield', 'yield from' - TEST = auto() # 'if'-'else', 'lambda' - OR = auto() # 'or' - AND = auto() # 'and' - NOT = auto() # 'not' - CMP = auto() # '<', '>', '==', '>=', '<=', '!=', - # 'in', 'not in', 'is', 'is not' - EXPR = auto() - BOR = EXPR # '|' - BXOR = auto() # '^' - BAND = auto() # '&' - SHIFT = auto() # '<<', '>>' - ARITH = auto() # '+', '-' - TERM = auto() # '*', '@', '/', '%', '//' - FACTOR = auto() # unary '+', '-', '~' - POWER = auto() # '**' - AWAIT = auto() # 'await' - ATOM = auto() - - def next(self): - try: - return self.__class__(self + 1) - except ValueError: - return self - - -_SINGLE_QUOTES = ("'", '"') -_MULTI_QUOTES = ('"""', "'''") -_ALL_QUOTES = (*_SINGLE_QUOTES, *_MULTI_QUOTES) - -class _Unparser(NodeVisitor): - """Methods in this class recursively traverse an AST and - output source code for the abstract syntax; original formatting - is disregarded.""" - - def __init__(self, *, _avoid_backslashes=False): - self._source = [] - self._precedences = {} - self._type_ignores = {} - self._indent = 0 - self._avoid_backslashes = _avoid_backslashes - self._in_try_star = False - - def interleave(self, inter, f, seq): - """Call f on each item in seq, calling inter() in between.""" - seq = iter(seq) - try: - f(next(seq)) - except StopIteration: - pass - else: - for x in seq: - inter() - f(x) - - def items_view(self, traverser, items): - """Traverse and separate the given *items* with a comma and append it to - the buffer. If *items* is a single item sequence, a trailing comma - will be added.""" - if len(items) == 1: - traverser(items[0]) - self.write(",") - else: - self.interleave(lambda: self.write(", "), traverser, items) - - def maybe_newline(self): - """Adds a newline if it isn't the start of generated source""" - if self._source: - self.write("\n") - - def fill(self, text=""): - """Indent a piece of text and append it, according to the current - indentation level""" - self.maybe_newline() - self.write(" " * self._indent + text) - - def write(self, *text): - """Add new source parts""" - self._source.extend(text) - - @contextmanager - def buffered(self, buffer = None): - if buffer is None: - buffer = [] - - original_source = self._source - self._source = buffer - yield buffer - self._source = original_source - - @contextmanager - def block(self, *, extra = None): - """A context manager for preparing the source for blocks. It adds - the character':', increases the indentation on enter and decreases - the indentation on exit. If *extra* is given, it will be directly - appended after the colon character. - """ - self.write(":") - if extra: - self.write(extra) - self._indent += 1 - yield - self._indent -= 1 - - @contextmanager - def delimit(self, start, end): - """A context manager for preparing the source for expressions. It adds - *start* to the buffer and enters, after exit it adds *end*.""" - - self.write(start) - yield - self.write(end) - - def delimit_if(self, start, end, condition): - if condition: - return self.delimit(start, end) - else: - return nullcontext() - - def require_parens(self, precedence, node): - """Shortcut to adding precedence related parens""" - return self.delimit_if("(", ")", self.get_precedence(node) > precedence) - - def get_precedence(self, node): - return self._precedences.get(node, _Precedence.TEST) - - def set_precedence(self, precedence, *nodes): - for node in nodes: - self._precedences[node] = precedence - - def get_raw_docstring(self, node): - """If a docstring node is found in the body of the *node* parameter, - return that docstring node, None otherwise. - - Logic mirrored from ``_PyAST_GetDocString``.""" - if not isinstance( - node, (AsyncFunctionDef, FunctionDef, ClassDef, Module) - ) or len(node.body) < 1: - return None - node = node.body[0] - if not isinstance(node, Expr): - return None - node = node.value - if isinstance(node, Constant) and isinstance(node.value, str): - return node - - def get_type_comment(self, node): - comment = self._type_ignores.get(node.lineno) or node.type_comment - if comment is not None: - return f" # type: {comment}" - - def traverse(self, node): - if isinstance(node, list): - for item in node: - self.traverse(item) - else: - super().visit(node) - - # Note: as visit() resets the output text, do NOT rely on - # NodeVisitor.generic_visit to handle any nodes (as it calls back in to - # the subclass visit() method, which resets self._source to an empty list) - def visit(self, node): - """Outputs a source code string that, if converted back to an ast - (using ast.parse) will generate an AST equivalent to *node*""" - self._source = [] - self.traverse(node) - return "".join(self._source) - - def _write_docstring_and_traverse_body(self, node): - if (docstring := self.get_raw_docstring(node)): - self._write_docstring(docstring) - self.traverse(node.body[1:]) - else: - self.traverse(node.body) - - def visit_Module(self, node): - self._type_ignores = { - ignore.lineno: f"ignore{ignore.tag}" - for ignore in node.type_ignores - } - self._write_docstring_and_traverse_body(node) - self._type_ignores.clear() - - def visit_FunctionType(self, node): - with self.delimit("(", ")"): - self.interleave( - lambda: self.write(", "), self.traverse, node.argtypes - ) - - self.write(" -> ") - self.traverse(node.returns) - - def visit_Expr(self, node): - self.fill() - self.set_precedence(_Precedence.YIELD, node.value) - self.traverse(node.value) - - def visit_NamedExpr(self, node): - with self.require_parens(_Precedence.NAMED_EXPR, node): - self.set_precedence(_Precedence.ATOM, node.target, node.value) - self.traverse(node.target) - self.write(" := ") - self.traverse(node.value) - - def visit_Import(self, node): - self.fill("import ") - self.interleave(lambda: self.write(", "), self.traverse, node.names) - - def visit_ImportFrom(self, node): - self.fill("from ") - self.write("." * (node.level or 0)) - if node.module: - self.write(node.module) - self.write(" import ") - self.interleave(lambda: self.write(", "), self.traverse, node.names) - - def visit_Assign(self, node): - self.fill() - for target in node.targets: - self.set_precedence(_Precedence.TUPLE, target) - self.traverse(target) - self.write(" = ") - self.traverse(node.value) - if type_comment := self.get_type_comment(node): - self.write(type_comment) - - def visit_AugAssign(self, node): - self.fill() - self.traverse(node.target) - self.write(" " + self.binop[node.op.__class__.__name__] + "= ") - self.traverse(node.value) - - def visit_AnnAssign(self, node): - self.fill() - with self.delimit_if("(", ")", not node.simple and isinstance(node.target, Name)): - self.traverse(node.target) - self.write(": ") - self.traverse(node.annotation) - if node.value: - self.write(" = ") - self.traverse(node.value) - - def visit_Return(self, node): - self.fill("return") - if node.value: - self.write(" ") - self.traverse(node.value) - - def visit_Pass(self, node): - self.fill("pass") - - def visit_Break(self, node): - self.fill("break") - - def visit_Continue(self, node): - self.fill("continue") - - def visit_Delete(self, node): - self.fill("del ") - self.interleave(lambda: self.write(", "), self.traverse, node.targets) - - def visit_Assert(self, node): - self.fill("assert ") - self.traverse(node.test) - if node.msg: - self.write(", ") - self.traverse(node.msg) - - def visit_Global(self, node): - self.fill("global ") - self.interleave(lambda: self.write(", "), self.write, node.names) - - def visit_Nonlocal(self, node): - self.fill("nonlocal ") - self.interleave(lambda: self.write(", "), self.write, node.names) - - def visit_Await(self, node): - with self.require_parens(_Precedence.AWAIT, node): - self.write("await") - if node.value: - self.write(" ") - self.set_precedence(_Precedence.ATOM, node.value) - self.traverse(node.value) - - def visit_Yield(self, node): - with self.require_parens(_Precedence.YIELD, node): - self.write("yield") - if node.value: - self.write(" ") - self.set_precedence(_Precedence.ATOM, node.value) - self.traverse(node.value) - - def visit_YieldFrom(self, node): - with self.require_parens(_Precedence.YIELD, node): - self.write("yield from ") - if not node.value: - raise ValueError("Node can't be used without a value attribute.") - self.set_precedence(_Precedence.ATOM, node.value) - self.traverse(node.value) - - def visit_Raise(self, node): - self.fill("raise") - if not node.exc: - if node.cause: - raise ValueError(f"Node can't use cause without an exception.") - return - self.write(" ") - self.traverse(node.exc) - if node.cause: - self.write(" from ") - self.traverse(node.cause) - - def do_visit_try(self, node): - self.fill("try") - with self.block(): - self.traverse(node.body) - for ex in node.handlers: - self.traverse(ex) - if node.orelse: - self.fill("else") - with self.block(): - self.traverse(node.orelse) - if node.finalbody: - self.fill("finally") - with self.block(): - self.traverse(node.finalbody) - - def visit_Try(self, node): - prev_in_try_star = self._in_try_star - try: - self._in_try_star = False - self.do_visit_try(node) - finally: - self._in_try_star = prev_in_try_star - - def visit_TryStar(self, node): - prev_in_try_star = self._in_try_star - try: - self._in_try_star = True - self.do_visit_try(node) - finally: - self._in_try_star = prev_in_try_star - - def visit_ExceptHandler(self, node): - self.fill("except*" if self._in_try_star else "except") - if node.type: - self.write(" ") - self.traverse(node.type) - if node.name: - self.write(" as ") - self.write(node.name) - with self.block(): - self.traverse(node.body) - - def visit_ClassDef(self, node): - self.maybe_newline() - for deco in node.decorator_list: - self.fill("@") - self.traverse(deco) - self.fill("class " + node.name) - if hasattr(node, "type_params"): - self._type_params_helper(node.type_params) - with self.delimit_if("(", ")", condition = node.bases or node.keywords): - comma = False - for e in node.bases: - if comma: - self.write(", ") - else: - comma = True - self.traverse(e) - for e in node.keywords: - if comma: - self.write(", ") - else: - comma = True - self.traverse(e) - - with self.block(): - self._write_docstring_and_traverse_body(node) - - def visit_FunctionDef(self, node): - self._function_helper(node, "def") - - def visit_AsyncFunctionDef(self, node): - self._function_helper(node, "async def") - - def _function_helper(self, node, fill_suffix): - self.maybe_newline() - for deco in node.decorator_list: - self.fill("@") - self.traverse(deco) - def_str = fill_suffix + " " + node.name - self.fill(def_str) - if hasattr(node, "type_params"): - self._type_params_helper(node.type_params) - with self.delimit("(", ")"): - self.traverse(node.args) - if node.returns: - self.write(" -> ") - self.traverse(node.returns) - with self.block(extra=self.get_type_comment(node)): - self._write_docstring_and_traverse_body(node) - - def _type_params_helper(self, type_params): - if type_params is not None and len(type_params) > 0: - with self.delimit("[", "]"): - self.interleave(lambda: self.write(", "), self.traverse, type_params) - - def visit_TypeVar(self, node): - self.write(node.name) - if node.bound: - self.write(": ") - self.traverse(node.bound) - - def visit_TypeVarTuple(self, node): - self.write("*" + node.name) - - def visit_ParamSpec(self, node): - self.write("**" + node.name) - - def visit_TypeAlias(self, node): - self.fill("type ") - self.traverse(node.name) - self._type_params_helper(node.type_params) - self.write(" = ") - self.traverse(node.value) - - def visit_For(self, node): - self._for_helper("for ", node) - - def visit_AsyncFor(self, node): - self._for_helper("async for ", node) - - def _for_helper(self, fill, node): - self.fill(fill) - self.set_precedence(_Precedence.TUPLE, node.target) - self.traverse(node.target) - self.write(" in ") - self.traverse(node.iter) - with self.block(extra=self.get_type_comment(node)): - self.traverse(node.body) - if node.orelse: - self.fill("else") - with self.block(): - self.traverse(node.orelse) - - def visit_If(self, node): - self.fill("if ") - self.traverse(node.test) - with self.block(): - self.traverse(node.body) - # collapse nested ifs into equivalent elifs. - while node.orelse and len(node.orelse) == 1 and isinstance(node.orelse[0], If): - node = node.orelse[0] - self.fill("elif ") - self.traverse(node.test) - with self.block(): - self.traverse(node.body) - # final else - if node.orelse: - self.fill("else") - with self.block(): - self.traverse(node.orelse) - - def visit_While(self, node): - self.fill("while ") - self.traverse(node.test) - with self.block(): - self.traverse(node.body) - if node.orelse: - self.fill("else") - with self.block(): - self.traverse(node.orelse) - - def visit_With(self, node): - self.fill("with ") - self.interleave(lambda: self.write(", "), self.traverse, node.items) - with self.block(extra=self.get_type_comment(node)): - self.traverse(node.body) - - def visit_AsyncWith(self, node): - self.fill("async with ") - self.interleave(lambda: self.write(", "), self.traverse, node.items) - with self.block(extra=self.get_type_comment(node)): - self.traverse(node.body) - - def _str_literal_helper( - self, string, *, quote_types=_ALL_QUOTES, escape_special_whitespace=False - ): - """Helper for writing string literals, minimizing escapes. - Returns the tuple (string literal to write, possible quote types). - """ - def escape_char(c): - # \n and \t are non-printable, but we only escape them if - # escape_special_whitespace is True - if not escape_special_whitespace and c in "\n\t": - return c - # Always escape backslashes and other non-printable characters - if c == "\\" or not c.isprintable(): - return c.encode("unicode_escape").decode("ascii") - return c - - escaped_string = "".join(map(escape_char, string)) - possible_quotes = quote_types - if "\n" in escaped_string: - possible_quotes = [q for q in possible_quotes if q in _MULTI_QUOTES] - possible_quotes = [q for q in possible_quotes if q not in escaped_string] - if not possible_quotes: - # If there aren't any possible_quotes, fallback to using repr - # on the original string. Try to use a quote from quote_types, - # e.g., so that we use triple quotes for docstrings. - string = repr(string) - quote = next((q for q in quote_types if string[0] in q), string[0]) - return string[1:-1], [quote] - if escaped_string: - # Sort so that we prefer '''"''' over """\"""" - possible_quotes.sort(key=lambda q: q[0] == escaped_string[-1]) - # If we're using triple quotes and we'd need to escape a final - # quote, escape it - if possible_quotes[0][0] == escaped_string[-1]: - assert len(possible_quotes[0]) == 3 - escaped_string = escaped_string[:-1] + "\\" + escaped_string[-1] - return escaped_string, possible_quotes - - def _write_str_avoiding_backslashes(self, string, *, quote_types=_ALL_QUOTES): - """Write string literal value with a best effort attempt to avoid backslashes.""" - string, quote_types = self._str_literal_helper(string, quote_types=quote_types) - quote_type = quote_types[0] - self.write(f"{quote_type}{string}{quote_type}") - - def visit_JoinedStr(self, node): - self.write("f") - - fstring_parts = [] - for value in node.values: - with self.buffered() as buffer: - self._write_fstring_inner(value) - fstring_parts.append( - ("".join(buffer), isinstance(value, Constant)) - ) - - new_fstring_parts = [] - quote_types = list(_ALL_QUOTES) - fallback_to_repr = False - for value, is_constant in fstring_parts: - if is_constant: - value, new_quote_types = self._str_literal_helper( - value, - quote_types=quote_types, - escape_special_whitespace=True, - ) - if set(new_quote_types).isdisjoint(quote_types): - fallback_to_repr = True - break - quote_types = new_quote_types - elif "\n" in value: - quote_types = [q for q in quote_types if q in _MULTI_QUOTES] - assert quote_types - new_fstring_parts.append(value) - - if fallback_to_repr: - # If we weren't able to find a quote type that works for all parts - # of the JoinedStr, fallback to using repr and triple single quotes. - quote_types = ["'''"] - new_fstring_parts.clear() - for value, is_constant in fstring_parts: - if is_constant: - value = repr('"' + value) # force repr to use single quotes - expected_prefix = "'\"" - assert value.startswith(expected_prefix), repr(value) - value = value[len(expected_prefix):-1] - new_fstring_parts.append(value) - - value = "".join(new_fstring_parts) - quote_type = quote_types[0] - self.write(f"{quote_type}{value}{quote_type}") - - def _write_fstring_inner(self, node): - if isinstance(node, JoinedStr): - # for both the f-string itself, and format_spec - for value in node.values: - self._write_fstring_inner(value) - elif isinstance(node, Constant) and isinstance(node.value, str): - value = node.value.replace("{", "{{").replace("}", "}}") - self.write(value) - elif isinstance(node, FormattedValue): - self.visit_FormattedValue(node) - else: - raise ValueError(f"Unexpected node inside JoinedStr, {node!r}") - - def visit_FormattedValue(self, node): - def unparse_inner(inner): - unparser = type(self)() - unparser.set_precedence(_Precedence.TEST.next(), inner) - return unparser.visit(inner) - - with self.delimit("{", "}"): - expr = unparse_inner(node.value) - if expr.startswith("{"): - # Separate pair of opening brackets as "{ {" - self.write(" ") - self.write(expr) - if node.conversion != -1: - self.write(f"!{chr(node.conversion)}") - if node.format_spec: - self.write(":") - self._write_fstring_inner(node.format_spec) - - def visit_Name(self, node): - self.write(node.id) - - def _write_docstring(self, node): - self.fill() - if node.kind == "u": - self.write("u") - self._write_str_avoiding_backslashes(node.value, quote_types=_MULTI_QUOTES) - - def _write_constant(self, value): - if isinstance(value, (float, complex)): - # Substitute overflowing decimal literal for AST infinities, - # and inf - inf for NaNs. - self.write( - repr(value) - .replace("inf", _INFSTR) - .replace("nan", f"({_INFSTR}-{_INFSTR})") - ) - elif self._avoid_backslashes and isinstance(value, str): - self._write_str_avoiding_backslashes(value) - else: - self.write(repr(value)) - - def visit_Constant(self, node): - value = node.value - if isinstance(value, tuple): - with self.delimit("(", ")"): - self.items_view(self._write_constant, value) - elif value is ...: - self.write("...") - else: - if node.kind == "u": - self.write("u") - self._write_constant(node.value) - - def visit_List(self, node): - with self.delimit("[", "]"): - self.interleave(lambda: self.write(", "), self.traverse, node.elts) - - def visit_ListComp(self, node): - with self.delimit("[", "]"): - self.traverse(node.elt) - for gen in node.generators: - self.traverse(gen) - - def visit_GeneratorExp(self, node): - with self.delimit("(", ")"): - self.traverse(node.elt) - for gen in node.generators: - self.traverse(gen) - - def visit_SetComp(self, node): - with self.delimit("{", "}"): - self.traverse(node.elt) - for gen in node.generators: - self.traverse(gen) - - def visit_DictComp(self, node): - with self.delimit("{", "}"): - self.traverse(node.key) - self.write(": ") - self.traverse(node.value) - for gen in node.generators: - self.traverse(gen) - - def visit_comprehension(self, node): - if node.is_async: - self.write(" async for ") - else: - self.write(" for ") - self.set_precedence(_Precedence.TUPLE, node.target) - self.traverse(node.target) - self.write(" in ") - self.set_precedence(_Precedence.TEST.next(), node.iter, *node.ifs) - self.traverse(node.iter) - for if_clause in node.ifs: - self.write(" if ") - self.traverse(if_clause) - - def visit_IfExp(self, node): - with self.require_parens(_Precedence.TEST, node): - self.set_precedence(_Precedence.TEST.next(), node.body, node.test) - self.traverse(node.body) - self.write(" if ") - self.traverse(node.test) - self.write(" else ") - self.set_precedence(_Precedence.TEST, node.orelse) - self.traverse(node.orelse) - - def visit_Set(self, node): - if node.elts: - with self.delimit("{", "}"): - self.interleave(lambda: self.write(", "), self.traverse, node.elts) - else: - # `{}` would be interpreted as a dictionary literal, and - # `set` might be shadowed. Thus: - self.write('{*()}') - - def visit_Dict(self, node): - def write_key_value_pair(k, v): - self.traverse(k) - self.write(": ") - self.traverse(v) - - def write_item(item): - k, v = item - if k is None: - # for dictionary unpacking operator in dicts {**{'y': 2}} - # see PEP 448 for details - self.write("**") - self.set_precedence(_Precedence.EXPR, v) - self.traverse(v) - else: - write_key_value_pair(k, v) - - with self.delimit("{", "}"): - self.interleave( - lambda: self.write(", "), write_item, zip(node.keys, node.values) - ) - - def visit_Tuple(self, node): - with self.delimit_if( - "(", - ")", - len(node.elts) == 0 or self.get_precedence(node) > _Precedence.TUPLE - ): - self.items_view(self.traverse, node.elts) - - unop = {"Invert": "~", "Not": "not", "UAdd": "+", "USub": "-"} - unop_precedence = { - "not": _Precedence.NOT, - "~": _Precedence.FACTOR, - "+": _Precedence.FACTOR, - "-": _Precedence.FACTOR, - } - - def visit_UnaryOp(self, node): - operator = self.unop[node.op.__class__.__name__] - operator_precedence = self.unop_precedence[operator] - with self.require_parens(operator_precedence, node): - self.write(operator) - # factor prefixes (+, -, ~) shouldn't be separated - # from the value they belong, (e.g: +1 instead of + 1) - if operator_precedence is not _Precedence.FACTOR: - self.write(" ") - self.set_precedence(operator_precedence, node.operand) - self.traverse(node.operand) - - binop = { - "Add": "+", - "Sub": "-", - "Mult": "*", - "MatMult": "@", - "Div": "/", - "Mod": "%", - "LShift": "<<", - "RShift": ">>", - "BitOr": "|", - "BitXor": "^", - "BitAnd": "&", - "FloorDiv": "//", - "Pow": "**", - } - - binop_precedence = { - "+": _Precedence.ARITH, - "-": _Precedence.ARITH, - "*": _Precedence.TERM, - "@": _Precedence.TERM, - "/": _Precedence.TERM, - "%": _Precedence.TERM, - "<<": _Precedence.SHIFT, - ">>": _Precedence.SHIFT, - "|": _Precedence.BOR, - "^": _Precedence.BXOR, - "&": _Precedence.BAND, - "//": _Precedence.TERM, - "**": _Precedence.POWER, - } - - binop_rassoc = frozenset(("**",)) - def visit_BinOp(self, node): - operator = self.binop[node.op.__class__.__name__] - operator_precedence = self.binop_precedence[operator] - with self.require_parens(operator_precedence, node): - if operator in self.binop_rassoc: - left_precedence = operator_precedence.next() - right_precedence = operator_precedence - else: - left_precedence = operator_precedence - right_precedence = operator_precedence.next() - - self.set_precedence(left_precedence, node.left) - self.traverse(node.left) - self.write(f" {operator} ") - self.set_precedence(right_precedence, node.right) - self.traverse(node.right) - - cmpops = { - "Eq": "==", - "NotEq": "!=", - "Lt": "<", - "LtE": "<=", - "Gt": ">", - "GtE": ">=", - "Is": "is", - "IsNot": "is not", - "In": "in", - "NotIn": "not in", - } - - def visit_Compare(self, node): - with self.require_parens(_Precedence.CMP, node): - self.set_precedence(_Precedence.CMP.next(), node.left, *node.comparators) - self.traverse(node.left) - for o, e in zip(node.ops, node.comparators): - self.write(" " + self.cmpops[o.__class__.__name__] + " ") - self.traverse(e) - - boolops = {"And": "and", "Or": "or"} - boolop_precedence = {"and": _Precedence.AND, "or": _Precedence.OR} - - def visit_BoolOp(self, node): - operator = self.boolops[node.op.__class__.__name__] - operator_precedence = self.boolop_precedence[operator] - - def increasing_level_traverse(node): - nonlocal operator_precedence - operator_precedence = operator_precedence.next() - self.set_precedence(operator_precedence, node) - self.traverse(node) - - with self.require_parens(operator_precedence, node): - s = f" {operator} " - self.interleave(lambda: self.write(s), increasing_level_traverse, node.values) - - def visit_Attribute(self, node): - self.set_precedence(_Precedence.ATOM, node.value) - self.traverse(node.value) - # Special case: 3.__abs__() is a syntax error, so if node.value - # is an integer literal then we need to either parenthesize - # it or add an extra space to get 3 .__abs__(). - if isinstance(node.value, Constant) and isinstance(node.value.value, int): - self.write(" ") - self.write(".") - self.write(node.attr) - - def visit_Call(self, node): - self.set_precedence(_Precedence.ATOM, node.func) - self.traverse(node.func) - with self.delimit("(", ")"): - comma = False - for e in node.args: - if comma: - self.write(", ") - else: - comma = True - self.traverse(e) - for e in node.keywords: - if comma: - self.write(", ") - else: - comma = True - self.traverse(e) - - def visit_Subscript(self, node): - def is_non_empty_tuple(slice_value): - return ( - isinstance(slice_value, Tuple) - and slice_value.elts - ) - - self.set_precedence(_Precedence.ATOM, node.value) - self.traverse(node.value) - with self.delimit("[", "]"): - if is_non_empty_tuple(node.slice): - # parentheses can be omitted if the tuple isn't empty - self.items_view(self.traverse, node.slice.elts) - else: - self.traverse(node.slice) - - def visit_Starred(self, node): - self.write("*") - self.set_precedence(_Precedence.EXPR, node.value) - self.traverse(node.value) - - def visit_Ellipsis(self, node): - self.write("...") - - def visit_Slice(self, node): - if node.lower: - self.traverse(node.lower) - self.write(":") - if node.upper: - self.traverse(node.upper) - if node.step: - self.write(":") - self.traverse(node.step) - - def visit_Match(self, node): - self.fill("match ") - self.traverse(node.subject) - with self.block(): - for case in node.cases: - self.traverse(case) - - def visit_arg(self, node): - self.write(node.arg) - if node.annotation: - self.write(": ") - self.traverse(node.annotation) - - def visit_arguments(self, node): - first = True - # normal arguments - all_args = node.posonlyargs + node.args - defaults = [None] * (len(all_args) - len(node.defaults)) + node.defaults - for index, elements in enumerate(zip(all_args, defaults), 1): - a, d = elements - if first: - first = False - else: - self.write(", ") - self.traverse(a) - if d: - self.write("=") - self.traverse(d) - if index == len(node.posonlyargs): - self.write(", /") - - # varargs, or bare '*' if no varargs but keyword-only arguments present - if node.vararg or node.kwonlyargs: - if first: - first = False - else: - self.write(", ") - self.write("*") - if node.vararg: - self.write(node.vararg.arg) - if node.vararg.annotation: - self.write(": ") - self.traverse(node.vararg.annotation) - - # keyword-only arguments - if node.kwonlyargs: - for a, d in zip(node.kwonlyargs, node.kw_defaults): - self.write(", ") - self.traverse(a) - if d: - self.write("=") - self.traverse(d) - - # kwargs - if node.kwarg: - if first: - first = False - else: - self.write(", ") - self.write("**" + node.kwarg.arg) - if node.kwarg.annotation: - self.write(": ") - self.traverse(node.kwarg.annotation) - - def visit_keyword(self, node): - if node.arg is None: - self.write("**") - else: - self.write(node.arg) - self.write("=") - self.traverse(node.value) - - def visit_Lambda(self, node): - with self.require_parens(_Precedence.TEST, node): - self.write("lambda") - with self.buffered() as buffer: - self.traverse(node.args) - if buffer: - self.write(" ", *buffer) - self.write(": ") - self.set_precedence(_Precedence.TEST, node.body) - self.traverse(node.body) - - def visit_alias(self, node): - self.write(node.name) - if node.asname: - self.write(" as " + node.asname) - - def visit_withitem(self, node): - self.traverse(node.context_expr) - if node.optional_vars: - self.write(" as ") - self.traverse(node.optional_vars) - - def visit_match_case(self, node): - self.fill("case ") - self.traverse(node.pattern) - if node.guard: - self.write(" if ") - self.traverse(node.guard) - with self.block(): - self.traverse(node.body) - - def visit_MatchValue(self, node): - self.traverse(node.value) - - def visit_MatchSingleton(self, node): - self._write_constant(node.value) - - def visit_MatchSequence(self, node): - with self.delimit("[", "]"): - self.interleave( - lambda: self.write(", "), self.traverse, node.patterns - ) - - def visit_MatchStar(self, node): - name = node.name - if name is None: - name = "_" - self.write(f"*{name}") - - def visit_MatchMapping(self, node): - def write_key_pattern_pair(pair): - k, p = pair - self.traverse(k) - self.write(": ") - self.traverse(p) - - with self.delimit("{", "}"): - keys = node.keys - self.interleave( - lambda: self.write(", "), - write_key_pattern_pair, - zip(keys, node.patterns, strict=True), - ) - rest = node.rest - if rest is not None: - if keys: - self.write(", ") - self.write(f"**{rest}") - - def visit_MatchClass(self, node): - self.set_precedence(_Precedence.ATOM, node.cls) - self.traverse(node.cls) - with self.delimit("(", ")"): - patterns = node.patterns - self.interleave( - lambda: self.write(", "), self.traverse, patterns - ) - attrs = node.kwd_attrs - if attrs: - def write_attr_pattern(pair): - attr, pattern = pair - self.write(f"{attr}=") - self.traverse(pattern) - - if patterns: - self.write(", ") - self.interleave( - lambda: self.write(", "), - write_attr_pattern, - zip(attrs, node.kwd_patterns, strict=True), - ) - - def visit_MatchAs(self, node): - name = node.name - pattern = node.pattern - if name is None: - self.write("_") - elif pattern is None: - self.write(node.name) - else: - with self.require_parens(_Precedence.TEST, node): - self.set_precedence(_Precedence.BOR, node.pattern) - self.traverse(node.pattern) - self.write(f" as {node.name}") - - def visit_MatchOr(self, node): - with self.require_parens(_Precedence.BOR, node): - self.set_precedence(_Precedence.BOR.next(), *node.patterns) - self.interleave(lambda: self.write(" | "), self.traverse, node.patterns) - def unparse(ast_obj): - unparser = _Unparser() + global _Unparser + try: + unparser = _Unparser() + except NameError: + from _ast_unparse import Unparser as _Unparser + unparser = _Unparser() return unparser.visit(ast_obj) -_deprecated_globals = { - name: globals().pop(name) - for name in ('Num', 'Str', 'Bytes', 'NameConstant', 'Ellipsis') -} - -def __getattr__(name): - if name in _deprecated_globals: - globals()[name] = value = _deprecated_globals[name] - import warnings - warnings._deprecated( - f"ast.{name}", message=_DEPRECATED_CLASS_MESSAGE, remove=(3, 14) - ) - return value - raise AttributeError(f"module 'ast' has no attribute '{name}'") - - -def main(): +def main(args=None): import argparse + import sys - parser = argparse.ArgumentParser(prog='python -m ast') - parser.add_argument('infile', type=argparse.FileType(mode='rb'), nargs='?', - default='-', + parser = argparse.ArgumentParser(color=True) + parser.add_argument('infile', nargs='?', default='-', help='the file to parse; defaults to stdin') parser.add_argument('-m', '--mode', default='exec', choices=('exec', 'single', 'eval', 'func_type'), @@ -1818,12 +641,40 @@ def main(): 'column offsets') parser.add_argument('-i', '--indent', type=int, default=3, help='indentation of nodes (number of spaces)') - args = parser.parse_args() + parser.add_argument('--feature-version', + type=str, default=None, metavar='VERSION', + help='Python version in the format 3.x ' + '(for example, 3.10)') + parser.add_argument('-O', '--optimize', + type=int, default=-1, metavar='LEVEL', + help='optimization level for parser (default -1)') + parser.add_argument('--show-empty', default=False, action='store_true', + help='show empty lists and fields in dump output') + args = parser.parse_args(args) + + if args.infile == '-': + name = '' + source = sys.stdin.buffer.read() + else: + name = args.infile + with open(args.infile, 'rb') as infile: + source = infile.read() + + # Process feature_version + feature_version = None + if args.feature_version: + try: + major, minor = map(int, args.feature_version.split('.', 1)) + except ValueError: + parser.error('Invalid format for --feature-version; ' + 'expected format 3.x (for example, 3.10)') + + feature_version = (major, minor) - with args.infile as infile: - source = infile.read() - tree = parse(source, args.infile.name, args.mode, type_comments=args.no_type_comments) - print(dump(tree, include_attributes=args.include_attributes, indent=args.indent)) + tree = parse(source, name, args.mode, type_comments=args.no_type_comments, + feature_version=feature_version, optimize=args.optimize) + print(dump(tree, include_attributes=args.include_attributes, + indent=args.indent, show_empty=args.show_empty)) if __name__ == '__main__': main() diff --git a/Lib/asynchat.py b/Lib/asynchat.py deleted file mode 100644 index fc1146adbb1..00000000000 --- a/Lib/asynchat.py +++ /dev/null @@ -1,307 +0,0 @@ -# -*- Mode: Python; tab-width: 4 -*- -# Id: asynchat.py,v 2.26 2000/09/07 22:29:26 rushing Exp -# Author: Sam Rushing - -# ====================================================================== -# Copyright 1996 by Sam Rushing -# -# All Rights Reserved -# -# Permission to use, copy, modify, and distribute this software and -# its documentation for any purpose and without fee is hereby -# granted, provided that the above copyright notice appear in all -# copies and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of Sam -# Rushing not be used in advertising or publicity pertaining to -# distribution of the software without specific, written prior -# permission. -# -# SAM RUSHING DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN -# NO EVENT SHALL SAM RUSHING BE LIABLE FOR ANY SPECIAL, INDIRECT OR -# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, -# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -# ====================================================================== - -r"""A class supporting chat-style (command/response) protocols. - -This class adds support for 'chat' style protocols - where one side -sends a 'command', and the other sends a response (examples would be -the common internet protocols - smtp, nntp, ftp, etc..). - -The handle_read() method looks at the input stream for the current -'terminator' (usually '\r\n' for single-line responses, '\r\n.\r\n' -for multi-line output), calling self.found_terminator() on its -receipt. - -for example: -Say you build an async nntp client using this class. At the start -of the connection, you'll have self.terminator set to '\r\n', in -order to process the single-line greeting. Just before issuing a -'LIST' command you'll set it to '\r\n.\r\n'. The output of the LIST -command will be accumulated (using your own 'collect_incoming_data' -method) up to the terminator, and then control will be returned to -you - by calling your self.found_terminator() method. -""" -import asyncore -from collections import deque - - -class async_chat(asyncore.dispatcher): - """This is an abstract class. You must derive from this class, and add - the two methods collect_incoming_data() and found_terminator()""" - - # these are overridable defaults - - ac_in_buffer_size = 65536 - ac_out_buffer_size = 65536 - - # we don't want to enable the use of encoding by default, because that is a - # sign of an application bug that we don't want to pass silently - - use_encoding = 0 - encoding = 'latin-1' - - def __init__(self, sock=None, map=None): - # for string terminator matching - self.ac_in_buffer = b'' - - # we use a list here rather than io.BytesIO for a few reasons... - # del lst[:] is faster than bio.truncate(0) - # lst = [] is faster than bio.truncate(0) - self.incoming = [] - - # we toss the use of the "simple producer" and replace it with - # a pure deque, which the original fifo was a wrapping of - self.producer_fifo = deque() - asyncore.dispatcher.__init__(self, sock, map) - - def collect_incoming_data(self, data): - raise NotImplementedError("must be implemented in subclass") - - def _collect_incoming_data(self, data): - self.incoming.append(data) - - def _get_data(self): - d = b''.join(self.incoming) - del self.incoming[:] - return d - - def found_terminator(self): - raise NotImplementedError("must be implemented in subclass") - - def set_terminator(self, term): - """Set the input delimiter. - - Can be a fixed string of any length, an integer, or None. - """ - if isinstance(term, str) and self.use_encoding: - term = bytes(term, self.encoding) - elif isinstance(term, int) and term < 0: - raise ValueError('the number of received bytes must be positive') - self.terminator = term - - def get_terminator(self): - return self.terminator - - # grab some more data from the socket, - # throw it to the collector method, - # check for the terminator, - # if found, transition to the next state. - - def handle_read(self): - - try: - data = self.recv(self.ac_in_buffer_size) - except BlockingIOError: - return - except OSError as why: - self.handle_error() - return - - if isinstance(data, str) and self.use_encoding: - data = bytes(str, self.encoding) - self.ac_in_buffer = self.ac_in_buffer + data - - # Continue to search for self.terminator in self.ac_in_buffer, - # while calling self.collect_incoming_data. The while loop - # is necessary because we might read several data+terminator - # combos with a single recv(4096). - - while self.ac_in_buffer: - lb = len(self.ac_in_buffer) - terminator = self.get_terminator() - if not terminator: - # no terminator, collect it all - self.collect_incoming_data(self.ac_in_buffer) - self.ac_in_buffer = b'' - elif isinstance(terminator, int): - # numeric terminator - n = terminator - if lb < n: - self.collect_incoming_data(self.ac_in_buffer) - self.ac_in_buffer = b'' - self.terminator = self.terminator - lb - else: - self.collect_incoming_data(self.ac_in_buffer[:n]) - self.ac_in_buffer = self.ac_in_buffer[n:] - self.terminator = 0 - self.found_terminator() - else: - # 3 cases: - # 1) end of buffer matches terminator exactly: - # collect data, transition - # 2) end of buffer matches some prefix: - # collect data to the prefix - # 3) end of buffer does not match any prefix: - # collect data - terminator_len = len(terminator) - index = self.ac_in_buffer.find(terminator) - if index != -1: - # we found the terminator - if index > 0: - # don't bother reporting the empty string - # (source of subtle bugs) - self.collect_incoming_data(self.ac_in_buffer[:index]) - self.ac_in_buffer = self.ac_in_buffer[index+terminator_len:] - # This does the Right Thing if the terminator - # is changed here. - self.found_terminator() - else: - # check for a prefix of the terminator - index = find_prefix_at_end(self.ac_in_buffer, terminator) - if index: - if index != lb: - # we found a prefix, collect up to the prefix - self.collect_incoming_data(self.ac_in_buffer[:-index]) - self.ac_in_buffer = self.ac_in_buffer[-index:] - break - else: - # no prefix, collect it all - self.collect_incoming_data(self.ac_in_buffer) - self.ac_in_buffer = b'' - - def handle_write(self): - self.initiate_send() - - def handle_close(self): - self.close() - - def push(self, data): - if not isinstance(data, (bytes, bytearray, memoryview)): - raise TypeError('data argument must be byte-ish (%r)', - type(data)) - sabs = self.ac_out_buffer_size - if len(data) > sabs: - for i in range(0, len(data), sabs): - self.producer_fifo.append(data[i:i+sabs]) - else: - self.producer_fifo.append(data) - self.initiate_send() - - def push_with_producer(self, producer): - self.producer_fifo.append(producer) - self.initiate_send() - - def readable(self): - "predicate for inclusion in the readable for select()" - # cannot use the old predicate, it violates the claim of the - # set_terminator method. - - # return (len(self.ac_in_buffer) <= self.ac_in_buffer_size) - return 1 - - def writable(self): - "predicate for inclusion in the writable for select()" - return self.producer_fifo or (not self.connected) - - def close_when_done(self): - "automatically close this channel once the outgoing queue is empty" - self.producer_fifo.append(None) - - def initiate_send(self): - while self.producer_fifo and self.connected: - first = self.producer_fifo[0] - # handle empty string/buffer or None entry - if not first: - del self.producer_fifo[0] - if first is None: - self.handle_close() - return - - # handle classic producer behavior - obs = self.ac_out_buffer_size - try: - data = first[:obs] - except TypeError: - data = first.more() - if data: - self.producer_fifo.appendleft(data) - else: - del self.producer_fifo[0] - continue - - if isinstance(data, str) and self.use_encoding: - data = bytes(data, self.encoding) - - # send the data - try: - num_sent = self.send(data) - except OSError: - self.handle_error() - return - - if num_sent: - if num_sent < len(data) or obs < len(first): - self.producer_fifo[0] = first[num_sent:] - else: - del self.producer_fifo[0] - # we tried to send some actual data - return - - def discard_buffers(self): - # Emergencies only! - self.ac_in_buffer = b'' - del self.incoming[:] - self.producer_fifo.clear() - - -class simple_producer: - - def __init__(self, data, buffer_size=512): - self.data = data - self.buffer_size = buffer_size - - def more(self): - if len(self.data) > self.buffer_size: - result = self.data[:self.buffer_size] - self.data = self.data[self.buffer_size:] - return result - else: - result = self.data - self.data = b'' - return result - - -# Given 'haystack', see if any prefix of 'needle' is at its end. This -# assumes an exact match has already been checked. Return the number of -# characters matched. -# for example: -# f_p_a_e("qwerty\r", "\r\n") => 1 -# f_p_a_e("qwertydkjf", "\r\n") => 0 -# f_p_a_e("qwerty\r\n", "\r\n") => - -# this could maybe be made faster with a computed regex? -# [answer: no; circa Python-2.0, Jan 2001] -# new python: 28961/s -# old python: 18307/s -# re: 12820/s -# regex: 14035/s - -def find_prefix_at_end(haystack, needle): - l = len(needle) - 1 - while l and not haystack.endswith(needle[:l]): - l -= 1 - return l diff --git a/Lib/asyncio/__init__.py b/Lib/asyncio/__init__.py index 03165a425eb..32a5dbae03a 100644 --- a/Lib/asyncio/__init__.py +++ b/Lib/asyncio/__init__.py @@ -10,6 +10,7 @@ from .events import * from .exceptions import * from .futures import * +from .graph import * from .locks import * from .protocols import * from .runners import * @@ -27,6 +28,7 @@ events.__all__ + exceptions.__all__ + futures.__all__ + + graph.__all__ + locks.__all__ + protocols.__all__ + runners.__all__ + @@ -45,3 +47,28 @@ else: from .unix_events import * # pragma: no cover __all__ += unix_events.__all__ + +def __getattr__(name: str): + import warnings + + match name: + case "AbstractEventLoopPolicy": + warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) + return events._AbstractEventLoopPolicy + case "DefaultEventLoopPolicy": + warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) + if sys.platform == 'win32': + return windows_events._DefaultEventLoopPolicy + return unix_events._DefaultEventLoopPolicy + case "WindowsSelectorEventLoopPolicy": + if sys.platform == 'win32': + warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) + return windows_events._WindowsSelectorEventLoopPolicy + # Else fall through to the AttributeError below. + case "WindowsProactorEventLoopPolicy": + if sys.platform == 'win32': + warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) + return windows_events._WindowsProactorEventLoopPolicy + # Else fall through to the AttributeError below. + + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/Lib/asyncio/__main__.py b/Lib/asyncio/__main__.py index 18bb87a5bc4..44e14771b11 100644 --- a/Lib/asyncio/__main__.py +++ b/Lib/asyncio/__main__.py @@ -1,41 +1,53 @@ +import argparse import ast import asyncio -import code +import asyncio.tools import concurrent.futures +import contextvars import inspect +import os +import site import sys import threading import types import warnings +from _colorize import get_theme +from _pyrepl.console import InteractiveColoredConsole + from . import futures -class AsyncIOInteractiveConsole(code.InteractiveConsole): +class AsyncIOInteractiveConsole(InteractiveColoredConsole): def __init__(self, locals, loop): - super().__init__(locals) + super().__init__(locals, filename="") self.compile.compiler.flags |= ast.PyCF_ALLOW_TOP_LEVEL_AWAIT self.loop = loop + self.context = contextvars.copy_context() def runcode(self, code): + global return_code future = concurrent.futures.Future() def callback(): + global return_code global repl_future - global repl_future_interrupted + global keyboard_interrupted repl_future = None - repl_future_interrupted = False + keyboard_interrupted = False func = types.FunctionType(code, self.locals) try: coro = func() - except SystemExit: - raise + except SystemExit as se: + return_code = se.code + self.loop.stop() + return except KeyboardInterrupt as ex: - repl_future_interrupted = True + keyboard_interrupted = True future.set_exception(ex) return except BaseException as ex: @@ -47,39 +59,77 @@ def callback(): return try: - repl_future = self.loop.create_task(coro) + repl_future = self.loop.create_task(coro, context=self.context) futures._chain_future(repl_future, future) except BaseException as exc: future.set_exception(exc) - loop.call_soon_threadsafe(callback) + self.loop.call_soon_threadsafe(callback, context=self.context) try: return future.result() - except SystemExit: - raise + except SystemExit as se: + return_code = se.code + self.loop.stop() + return except BaseException: - if repl_future_interrupted: - self.write("\nKeyboardInterrupt\n") + if keyboard_interrupted: + if not CAN_USE_PYREPL: + self.write("\nKeyboardInterrupt\n") else: self.showtraceback() - + return self.STATEMENT_FAILED class REPLThread(threading.Thread): def run(self): + global return_code + try: - banner = ( - f'asyncio REPL {sys.version} on {sys.platform}\n' - f'Use "await" directly instead of "asyncio.run()".\n' - f'Type "help", "copyright", "credits" or "license" ' - f'for more information.\n' - f'{getattr(sys, "ps1", ">>> ")}import asyncio' - ) - - console.interact( - banner=banner, - exitmsg='exiting asyncio REPL...') + if not sys.flags.quiet: + banner = ( + f'asyncio REPL {sys.version} on {sys.platform}\n' + f'Use "await" directly instead of "asyncio.run()".\n' + f'Type "help", "copyright", "credits" or "license" ' + f'for more information.\n' + ) + + console.write(banner) + + if not sys.flags.isolated and (startup_path := os.getenv("PYTHONSTARTUP")): + sys.audit("cpython.run_startup", startup_path) + try: + import tokenize + with tokenize.open(startup_path) as f: + startup_code = compile(f.read(), startup_path, "exec") + exec(startup_code, console.locals) + except SystemExit: + raise + except BaseException: + console.showtraceback() + + ps1 = getattr(sys, "ps1", ">>> ") + if CAN_USE_PYREPL: + theme = get_theme().syntax + ps1 = f"{theme.prompt}{ps1}{theme.reset}" + console.write(f"{ps1}import asyncio\n") + + if CAN_USE_PYREPL: + from _pyrepl.simple_interact import ( + run_multiline_interactive_console, + ) + try: + run_multiline_interactive_console(console) + except SystemExit: + # expected via the `exit` and `quit` commands + pass + except BaseException: + # unexpected issue + console.showtraceback() + console.write("Internal error, ") + return_code = 1 + else: + console.interact(banner="", exitmsg="") finally: warnings.filterwarnings( 'ignore', @@ -88,8 +138,56 @@ def run(self): loop.call_soon_threadsafe(loop.stop) + def interrupt(self) -> None: + if not CAN_USE_PYREPL: + return + + from _pyrepl.simple_interact import _get_reader + r = _get_reader() + if r.threading_hook is not None: + r.threading_hook.add("") # type: ignore + if __name__ == '__main__': + parser = argparse.ArgumentParser( + prog="python3 -m asyncio", + description="Interactive asyncio shell and CLI tools", + color=True, + ) + subparsers = parser.add_subparsers(help="sub-commands", dest="command") + ps = subparsers.add_parser( + "ps", help="Display a table of all pending tasks in a process" + ) + ps.add_argument("pid", type=int, help="Process ID to inspect") + pstree = subparsers.add_parser( + "pstree", help="Display a tree of all pending tasks in a process" + ) + pstree.add_argument("pid", type=int, help="Process ID to inspect") + args = parser.parse_args() + match args.command: + case "ps": + asyncio.tools.display_awaited_by_tasks_table(args.pid) + sys.exit(0) + case "pstree": + asyncio.tools.display_awaited_by_tasks_tree(args.pid) + sys.exit(0) + case None: + pass # continue to the interactive shell + case _: + # shouldn't happen as an invalid command-line wouldn't parse + # but let's keep it for the next person adding a command + print(f"error: unhandled command {args.command}", file=sys.stderr) + parser.print_usage(file=sys.stderr) + sys.exit(1) + + sys.audit("cpython.run_stdin") + + if os.getenv('PYTHON_BASIC_REPL'): + CAN_USE_PYREPL = False + else: + from _pyrepl.main import CAN_USE_PYREPL + + return_code = 0 loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) @@ -102,14 +200,31 @@ def run(self): console = AsyncIOInteractiveConsole(repl_locals, loop) repl_future = None - repl_future_interrupted = False + keyboard_interrupted = False try: import readline # NoQA except ImportError: - pass + readline = None - repl_thread = REPLThread() + interactive_hook = getattr(sys, "__interactivehook__", None) + + if interactive_hook is not None: + sys.audit("cpython.run_interactivehook", interactive_hook) + interactive_hook() + + if interactive_hook is site.register_readline: + # Fix the completer function to use the interactive console locals + try: + import rlcompleter + except: + pass + else: + if readline is not None: + completer = rlcompleter.Completer(console.locals) + readline.set_completer(completer.complete) + + repl_thread = REPLThread(name="Interactive thread") repl_thread.daemon = True repl_thread.start() @@ -117,9 +232,14 @@ def run(self): try: loop.run_forever() except KeyboardInterrupt: + keyboard_interrupted = True if repl_future and not repl_future.done(): repl_future.cancel() - repl_future_interrupted = True + repl_thread.interrupt() continue else: break + + console.write('exiting asyncio REPL...\n') + loop.close() + sys.exit(return_code) diff --git a/Lib/asyncio/base_events.py b/Lib/asyncio/base_events.py index 29eff0499cb..6d1f1f84323 100644 --- a/Lib/asyncio/base_events.py +++ b/Lib/asyncio/base_events.py @@ -17,7 +17,6 @@ import collections.abc import concurrent.futures import errno -import functools import heapq import itertools import os @@ -279,7 +278,9 @@ def __init__(self, loop, sockets, protocol_factory, ssl_context, backlog, ssl_handshake_timeout, ssl_shutdown_timeout=None): self._loop = loop self._sockets = sockets - self._active_count = 0 + # Weak references so we don't break Transport's ability to + # detect abandoned transports + self._clients = weakref.WeakSet() self._waiters = [] self._protocol_factory = protocol_factory self._backlog = backlog @@ -292,14 +293,13 @@ def __init__(self, loop, sockets, protocol_factory, ssl_context, backlog, def __repr__(self): return f'<{self.__class__.__name__} sockets={self.sockets!r}>' - def _attach(self): + def _attach(self, transport): assert self._sockets is not None - self._active_count += 1 + self._clients.add(transport) - def _detach(self): - assert self._active_count > 0 - self._active_count -= 1 - if self._active_count == 0 and self._sockets is None: + def _detach(self, transport): + self._clients.discard(transport) + if len(self._clients) == 0 and self._sockets is None: self._wakeup() def _wakeup(self): @@ -348,9 +348,17 @@ def close(self): self._serving_forever_fut.cancel() self._serving_forever_fut = None - if self._active_count == 0: + if len(self._clients) == 0: self._wakeup() + def close_clients(self): + for transport in self._clients.copy(): + transport.close() + + def abort_clients(self): + for transport in self._clients.copy(): + transport.abort() + async def start_serving(self): self._start_serving() # Skip one loop iteration so that all 'loop.add_reader' @@ -422,6 +430,8 @@ def __init__(self): self._clock_resolution = time.get_clock_info('monotonic').resolution self._exception_handler = None self.set_debug(coroutines._is_debug_mode()) + # The preserved state of async generator hooks. + self._old_agen_hooks = None # In debug mode, if the execution of a callback or a step of a task # exceed this duration in seconds, the slow callback/task is logged. self.slow_callback_duration = 0.1 @@ -448,26 +458,24 @@ def create_future(self): """Create a Future object attached to the loop.""" return futures.Future(loop=self) - def create_task(self, coro, *, name=None, context=None): - """Schedule a coroutine object. + def create_task(self, coro, **kwargs): + """Schedule or begin executing a coroutine object. Return a task object. """ self._check_closed() - if self._task_factory is None: - task = tasks.Task(coro, loop=self, name=name, context=context) - if task._source_traceback: - del task._source_traceback[-1] - else: - if context is None: - # Use legacy API if context is not needed - task = self._task_factory(self, coro) - else: - task = self._task_factory(self, coro, context=context) - - tasks._set_task_name(task, name) + if self._task_factory is not None: + return self._task_factory(self, coro, **kwargs) - return task + task = tasks.Task(coro, loop=self, **kwargs) + if task._source_traceback: + del task._source_traceback[-1] + try: + return task + finally: + # gh-128552: prevent a refcycle of + # task.exception().__traceback__->BaseEventLoop.create_task->task + del task def set_task_factory(self, factory): """Set a task factory that will be used by loop.create_task(). @@ -475,9 +483,10 @@ def set_task_factory(self, factory): If factory is None the default task factory will be set. If factory is a callable, it should have a signature matching - '(loop, coro)', where 'loop' will be a reference to the active - event loop, 'coro' will be a coroutine object. The callable - must return a Future. + '(loop, coro, **kwargs)', where 'loop' will be a reference to the + active event loop, 'coro' will be a coroutine object, and **kwargs + will be arbitrary keyword arguments that should be passed on to + Task. The callable must return a Task. """ if factory is not None and not callable(factory): raise TypeError('task factory must be a callable or None') @@ -624,29 +633,52 @@ def _check_running(self): raise RuntimeError( 'Cannot run the event loop while another loop is running') - def run_forever(self): - """Run until stop() is called.""" + def _run_forever_setup(self): + """Prepare the run loop to process events. + + This method exists so that custom event loop subclasses (e.g., event loops + that integrate a GUI event loop with Python's event loop) have access to all the + loop setup logic. + """ self._check_closed() self._check_running() self._set_coroutine_origin_tracking(self._debug) - old_agen_hooks = sys.get_asyncgen_hooks() - try: - self._thread_id = threading.get_ident() - sys.set_asyncgen_hooks(firstiter=self._asyncgen_firstiter_hook, - finalizer=self._asyncgen_finalizer_hook) + self._old_agen_hooks = sys.get_asyncgen_hooks() + self._thread_id = threading.get_ident() + sys.set_asyncgen_hooks( + firstiter=self._asyncgen_firstiter_hook, + finalizer=self._asyncgen_finalizer_hook + ) + + events._set_running_loop(self) + + def _run_forever_cleanup(self): + """Clean up after an event loop finishes the looping over events. + + This method exists so that custom event loop subclasses (e.g., event loops + that integrate a GUI event loop with Python's event loop) have access to all the + loop cleanup logic. + """ + self._stopping = False + self._thread_id = None + events._set_running_loop(None) + self._set_coroutine_origin_tracking(False) + # Restore any pre-existing async generator hooks. + if self._old_agen_hooks is not None: + sys.set_asyncgen_hooks(*self._old_agen_hooks) + self._old_agen_hooks = None - events._set_running_loop(self) + def run_forever(self): + """Run until stop() is called.""" + self._run_forever_setup() + try: while True: self._run_once() if self._stopping: break finally: - self._stopping = False - self._thread_id = None - events._set_running_loop(None) - self._set_coroutine_origin_tracking(False) - sys.set_asyncgen_hooks(*old_agen_hooks) + self._run_forever_cleanup() def run_until_complete(self, future): """Run until the Future is done. @@ -689,8 +721,8 @@ def run_until_complete(self, future): def stop(self): """Stop running the event loop. - Every callback already scheduled will still run. This simply informs - run_forever to stop looping after a complete iteration. + Every callback already scheduled will still run. This simply + informs run_forever to stop looping after a complete iteration. """ self._stopping = True @@ -803,7 +835,7 @@ def call_soon(self, callback, *args, context=None): def _check_callback(self, callback, method): if (coroutines.iscoroutine(callback) or - coroutines.iscoroutinefunction(callback)): + coroutines._iscoroutinefunction(callback)): raise TypeError( f"coroutines cannot be used with {method}()") if not callable(callback): @@ -840,7 +872,10 @@ def call_soon_threadsafe(self, callback, *args, context=None): self._check_closed() if self._debug: self._check_callback(callback, 'call_soon_threadsafe') - handle = self._call_soon(callback, args, context) + handle = events._ThreadSafeHandle(callback, args, self, context) + self._ready.append(handle) + if handle._source_traceback: + del handle._source_traceback[-1] if handle._source_traceback: del handle._source_traceback[-1] self._write_to_self() @@ -928,7 +963,7 @@ async def _sock_sendfile_native(self, sock, file, offset, count): f"and file {file!r} combination") async def _sock_sendfile_fallback(self, sock, file, offset, count): - if offset: + if hasattr(file, 'seek'): file.seek(offset) blocksize = ( min(count, constants.SENDFILE_FALLBACK_READBUFFER_SIZE) @@ -981,39 +1016,43 @@ async def _connect_sock(self, exceptions, addr_info, local_addr_infos=None): family, type_, proto, _, address = addr_info sock = None try: - sock = socket.socket(family=family, type=type_, proto=proto) - sock.setblocking(False) - if local_addr_infos is not None: - for lfamily, _, _, _, laddr in local_addr_infos: - # skip local addresses of different family - if lfamily != family: - continue - try: - sock.bind(laddr) - break - except OSError as exc: - msg = ( - f'error while attempting to bind on ' - f'address {laddr!r}: ' - f'{exc.strerror.lower()}' - ) - exc = OSError(exc.errno, msg) - my_exceptions.append(exc) - else: # all bind attempts failed - if my_exceptions: - raise my_exceptions.pop() - else: - raise OSError(f"no matching local address with {family=} found") - await self.sock_connect(sock, address) - return sock - except OSError as exc: - my_exceptions.append(exc) - if sock is not None: - sock.close() - raise + try: + sock = socket.socket(family=family, type=type_, proto=proto) + sock.setblocking(False) + if local_addr_infos is not None: + for lfamily, _, _, _, laddr in local_addr_infos: + # skip local addresses of different family + if lfamily != family: + continue + try: + sock.bind(laddr) + break + except OSError as exc: + msg = ( + f'error while attempting to bind on ' + f'address {laddr!r}: {str(exc).lower()}' + ) + exc = OSError(exc.errno, msg) + my_exceptions.append(exc) + else: # all bind attempts failed + if my_exceptions: + raise my_exceptions.pop() + else: + raise OSError(f"no matching local address with {family=} found") + await self.sock_connect(sock, address) + return sock + except OSError as exc: + my_exceptions.append(exc) + raise except: if sock is not None: - sock.close() + try: + sock.close() + except OSError: + # An error when closing a newly created socket is + # not important, but it can overwrite more important + # non-OSError error. So ignore it. + pass raise finally: exceptions = my_exceptions = None @@ -1031,12 +1070,12 @@ async def create_connection( Create a streaming transport connection to a given internet host and port: socket family AF_INET or socket.AF_INET6 depending on host (or - family if specified), socket type SOCK_STREAM. protocol_factory must be - a callable returning a protocol instance. + family if specified), socket type SOCK_STREAM. protocol_factory must + be a callable returning a protocol instance. - This method is a coroutine which will try to establish the connection - in the background. When successful, the coroutine returns a - (transport, protocol) pair. + This method is a coroutine which will try to establish the + connection in the background. When successful, the coroutine + returns a (transport, protocol) pair. """ if server_hostname is not None and not ssl: raise ValueError('server_hostname is only meaningful with ssl') @@ -1107,11 +1146,18 @@ async def create_connection( except OSError: continue else: # using happy eyeballs - sock, _, _ = await staggered.staggered_race( - (functools.partial(self._connect_sock, - exceptions, addrinfo, laddr_infos) - for addrinfo in infos), - happy_eyeballs_delay, loop=self) + sock = (await staggered.staggered_race( + ( + # can't use functools.partial as it keeps a reference + # to exceptions + lambda addrinfo=addrinfo: self._connect_sock( + exceptions, addrinfo, laddr_infos + ) + for addrinfo in infos + ), + happy_eyeballs_delay, + loop=self, + ))[0] # can't use sock, _, _ as it keeks a reference to exceptions if sock is None: exceptions = [exc for sub in exceptions for exc in sub] @@ -1120,7 +1166,7 @@ async def create_connection( raise ExceptionGroup("create_connection failed", exceptions) if len(exceptions) == 1: raise exceptions[0] - else: + elif exceptions: # If they all have the same str(), raise one. model = str(exceptions[0]) if all(str(exc) == model for exc in exceptions): @@ -1129,6 +1175,9 @@ async def create_connection( # the various error messages. raise OSError('Multiple exceptions: {}'.format( ', '.join(str(exc) for exc in exceptions))) + else: + # No exceptions were collected, raise a timeout error + raise TimeoutError('create_connection failed') finally: exceptions = None @@ -1229,7 +1278,6 @@ async def sendfile(self, transport, file, offset=0, count=None, raise RuntimeError( f"fallback is disabled and native sendfile is not " f"supported for transport {transport!r}") - return await self._sendfile_fallback(transport, file, offset, count) @@ -1238,7 +1286,7 @@ async def _sendfile_native(self, transp, file, offset, count): "sendfile syscall is not supported") async def _sendfile_fallback(self, transp, file, offset, count): - if offset: + if hasattr(file, 'seek'): file.seek(offset) blocksize = min(count, 16384) if count else 16384 buf = bytearray(blocksize) @@ -1254,8 +1302,8 @@ async def _sendfile_fallback(self, transp, file, offset, count): read = await self.run_in_executor(None, file.readinto, view) if not read: return total_sent # EOF - await proto.drain() transp.write(view[:read]) + await proto.drain() total_sent += read finally: if total_sent > 0 and hasattr(file, 'seek'): @@ -1296,6 +1344,17 @@ async def start_tls(self, transport, protocol, sslcontext, *, # have a chance to get called before "ssl_protocol.connection_made()". transport.pause_reading() + # gh-142352: move buffered StreamReader data to SSLProtocol + if server_side: + from .streams import StreamReaderProtocol + if isinstance(protocol, StreamReaderProtocol): + stream_reader = getattr(protocol, '_stream_reader', None) + if stream_reader is not None: + buffer = stream_reader._buffer + if buffer: + ssl_protocol._incoming.write(buffer) + buffer.clear() + transport.set_protocol(ssl_protocol) conmade_cb = self.call_soon(ssl_protocol.connection_made, transport) resume_cb = self.call_soon(transport.resume_reading) @@ -1474,6 +1533,7 @@ async def create_server( ssl=None, reuse_address=None, reuse_port=None, + keep_alive=None, ssl_handshake_timeout=None, ssl_shutdown_timeout=None, start_serving=True): @@ -1482,11 +1542,11 @@ async def create_server( The host parameter can be a string, in that case the TCP server is bound to host and port. - The host parameter can also be a sequence of strings and in that case - the TCP server is bound to all hosts of the sequence. If a host - appears multiple times (possibly indirectly e.g. when hostnames - resolve to the same IP address), the server is only bound once to that - host. + The host parameter can also be a sequence of strings and in that + case the TCP server is bound to all hosts of the sequence. If + a host appears multiple times (possibly indirectly e.g. when + hostnames resolve to the same IP address), the server is only bound + once to that host. Return a Server object which can be used to stop the service. @@ -1545,8 +1605,13 @@ async def create_server( if reuse_address: sock.setsockopt( socket.SOL_SOCKET, socket.SO_REUSEADDR, True) - if reuse_port: + # Since Linux 6.12.9, SO_REUSEPORT is not allowed + # on other address families than AF_INET/AF_INET6. + if reuse_port and af in (socket.AF_INET, socket.AF_INET6): _set_reuseport(sock) + if keep_alive: + sock.setsockopt( + socket.SOL_SOCKET, socket.SO_KEEPALIVE, True) # Disable IPv4/IPv6 dual stack support (enabled by # default on Linux) which makes a single socket # listen on both address families. @@ -1561,7 +1626,7 @@ async def create_server( except OSError as err: msg = ('error while attempting ' 'to bind on address %r: %s' - % (sa, err.strerror.lower())) + % (sa, str(err).lower())) if err.errno == errno.EADDRNOTAVAIL: # Assume the family is not enabled (bpo-30945) sockets.pop() @@ -1619,8 +1684,7 @@ async def connect_accepted_socket( raise ValueError( 'ssl_shutdown_timeout is only meaningful with ssl') - if sock is not None: - _check_ssl_socket(sock) + _check_ssl_socket(sock) transport, protocol = await self._create_connection_transport( sock, protocol_factory, ssl, '', server_side=True, @@ -1833,6 +1897,8 @@ def call_exception_handler(self, context): - 'protocol' (optional): Protocol instance; - 'transport' (optional): Transport instance; - 'socket' (optional): Socket instance; + - 'source_traceback' (optional): Traceback of the source; + - 'handle_traceback' (optional): Traceback of the handle; - 'asyncgen' (optional): Asynchronous generator that caused the exception. @@ -1943,8 +2009,11 @@ def _run_once(self): timeout = 0 elif self._scheduled: # Compute the desired timeout. - when = self._scheduled[0]._when - timeout = min(max(0, when - self.time()), MAXIMUM_SELECT_TIMEOUT) + timeout = self._scheduled[0]._when - self.time() + if timeout > MAXIMUM_SELECT_TIMEOUT: + timeout = MAXIMUM_SELECT_TIMEOUT + elif timeout < 0: + timeout = 0 event_list = self._selector.select(timeout) self._process_events(event_list) diff --git a/Lib/asyncio/base_subprocess.py b/Lib/asyncio/base_subprocess.py index 4c9b0dd5653..224b1883808 100644 --- a/Lib/asyncio/base_subprocess.py +++ b/Lib/asyncio/base_subprocess.py @@ -1,6 +1,9 @@ import collections import subprocess import warnings +import os +import signal +import sys from . import protocols from . import transports @@ -23,6 +26,7 @@ def __init__(self, loop, protocol, args, shell, self._pending_calls = collections.deque() self._pipes = {} self._finished = False + self._pipes_connected = False if stdin == subprocess.PIPE: self._pipes[0] = None @@ -101,7 +105,12 @@ def close(self): for proto in self._pipes.values(): if proto is None: continue - proto.pipe.close() + # See gh-114177 + # skip closing the pipe if loop is already closed + # this can happen e.g. when loop is closed immediately after + # process is killed + if self._loop and not self._loop.is_closed(): + proto.pipe.close() if (self._proc is not None and # has the child process finished? @@ -115,7 +124,8 @@ def close(self): try: self._proc.kill() - except ProcessLookupError: + except (ProcessLookupError, PermissionError): + # the process may have already exited or may be running setuid pass # Don't clear the _proc reference yet: _post_init() may still run @@ -141,17 +151,31 @@ def _check_proc(self): if self._proc is None: raise ProcessLookupError() - def send_signal(self, signal): - self._check_proc() - self._proc.send_signal(signal) + if sys.platform == 'win32': + def send_signal(self, signal): + self._check_proc() + self._proc.send_signal(signal) + + def terminate(self): + self._check_proc() + self._proc.terminate() + + def kill(self): + self._check_proc() + self._proc.kill() + else: + def send_signal(self, signal): + self._check_proc() + try: + os.kill(self._proc.pid, signal) + except ProcessLookupError: + pass - def terminate(self): - self._check_proc() - self._proc.terminate() + def terminate(self): + self.send_signal(signal.SIGTERM) - def kill(self): - self._check_proc() - self._proc.kill() + def kill(self): + self.send_signal(signal.SIGKILL) async def _connect_pipes(self, waiter): try: @@ -190,6 +214,7 @@ async def _connect_pipes(self, waiter): else: if waiter is not None and not waiter.cancelled(): waiter.set_result(None) + self._pipes_connected = True def _call(self, cb, *data): if self._pending_calls is not None: @@ -233,6 +258,15 @@ def _try_finish(self): assert not self._finished if self._returncode is None: return + if not self._pipes_connected: + # self._pipes_connected can be False if not all pipes were connected + # because either the process failed to start or the self._connect_pipes task + # got cancelled. In this broken state we consider all pipes disconnected and + # to avoid hanging forever in self._wait as otherwise _exit_waiters + # would never be woken up, we wake them up here. + for waiter in self._exit_waiters: + if not waiter.done(): + waiter.set_result(self._returncode) if all(p is not None and p.disconnected for p in self._pipes.values()): self._finished = True @@ -244,7 +278,7 @@ def _call_connection_lost(self, exc): finally: # wake up futures waiting for wait() for waiter in self._exit_waiters: - if not waiter.cancelled(): + if not waiter.done(): waiter.set_result(self._returncode) self._exit_waiters = None self._loop = None diff --git a/Lib/asyncio/coroutines.py b/Lib/asyncio/coroutines.py index ab4f30eb51b..6727065bbe3 100644 --- a/Lib/asyncio/coroutines.py +++ b/Lib/asyncio/coroutines.py @@ -19,6 +19,15 @@ def _is_debug_mode(): def iscoroutinefunction(func): """Return True if func is a decorated coroutine function.""" + import warnings + warnings._deprecated("asyncio.iscoroutinefunction", + f"{warnings._DEPRECATED_MSG}; " + "use inspect.iscoroutinefunction() instead", + remove=(3,16)) + return _iscoroutinefunction(func) + + +def _iscoroutinefunction(func): return (inspect.iscoroutinefunction(func) or getattr(func, '_is_coroutine', None) is _is_coroutine) diff --git a/Lib/asyncio/events.py b/Lib/asyncio/events.py index 016852880ca..42d8408a38d 100644 --- a/Lib/asyncio/events.py +++ b/Lib/asyncio/events.py @@ -5,14 +5,18 @@ # SPDX-FileCopyrightText: Copyright (c) 2015-2021 MagicStack Inc. http://magic.io __all__ = ( - 'AbstractEventLoopPolicy', - 'AbstractEventLoop', 'AbstractServer', - 'Handle', 'TimerHandle', - 'get_event_loop_policy', 'set_event_loop_policy', - 'get_event_loop', 'set_event_loop', 'new_event_loop', - 'get_child_watcher', 'set_child_watcher', - '_set_running_loop', 'get_running_loop', - '_get_running_loop', + "AbstractEventLoop", + "AbstractServer", + "Handle", + "TimerHandle", + "get_event_loop_policy", + "set_event_loop_policy", + "get_event_loop", + "set_event_loop", + "new_event_loop", + "_set_running_loop", + "get_running_loop", + "_get_running_loop", ) import contextvars @@ -22,6 +26,7 @@ import subprocess import sys import threading +import warnings from . import format_helpers @@ -54,7 +59,8 @@ def _repr_info(self): info.append('cancelled') if self._callback is not None: info.append(format_helpers._format_callback_source( - self._callback, self._args)) + self._callback, self._args, + debug=self._loop.get_debug())) if self._source_traceback: frame = self._source_traceback[-1] info.append(f'created at {frame[0]}:{frame[1]}') @@ -90,7 +96,8 @@ def _run(self): raise except BaseException as exc: cb = format_helpers._format_callback_source( - self._callback, self._args) + self._callback, self._args, + debug=self._loop.get_debug()) msg = f'Exception in callback {cb}' context = { 'message': msg, @@ -102,6 +109,34 @@ def _run(self): self._loop.call_exception_handler(context) self = None # Needed to break cycles when an exception occurs. +# _ThreadSafeHandle is used for callbacks scheduled with call_soon_threadsafe +# and is thread safe unlike Handle which is not thread safe. +class _ThreadSafeHandle(Handle): + + __slots__ = ('_lock',) + + def __init__(self, callback, args, loop, context=None): + super().__init__(callback, args, loop, context) + self._lock = threading.RLock() + + def cancel(self): + with self._lock: + return super().cancel() + + def cancelled(self): + with self._lock: + return super().cancelled() + + def _run(self): + # The event loop checks for cancellation without holding the lock + # It is possible that the handle is cancelled after the check + # but before the callback is called so check it again after acquiring + # the lock and return without calling the callback if it is cancelled. + with self._lock: + if self._cancelled: + return + return super()._run() + class TimerHandle(Handle): """Object returned by timed callback registration methods.""" @@ -173,6 +208,14 @@ def close(self): """Stop serving. This leaves existing connections open.""" raise NotImplementedError + def close_clients(self): + """Close all active connections.""" + raise NotImplementedError + + def abort_clients(self): + """Close all active connections immediately.""" + raise NotImplementedError + def get_loop(self): """Get the event loop the Server object is attached to.""" raise NotImplementedError @@ -282,7 +325,7 @@ def create_future(self): # Method scheduling a coroutine object: create a task. - def create_task(self, coro, *, name=None, context=None): + def create_task(self, coro, **kwargs): raise NotImplementedError # Methods for interacting with threads. @@ -320,6 +363,7 @@ async def create_server( *, family=socket.AF_UNSPEC, flags=socket.AI_PASSIVE, sock=None, backlog=100, ssl=None, reuse_address=None, reuse_port=None, + keep_alive=None, ssl_handshake_timeout=None, ssl_shutdown_timeout=None, start_serving=True): @@ -330,8 +374,8 @@ async def create_server( If host is an empty string or None all interfaces are assumed and a list of multiple sockets will be returned (most likely - one for IPv4 and another one for IPv6). The host parameter can also be - a sequence (e.g. list) of hosts to bind to. + one for IPv4 and another one for IPv6). The host parameter can also + be a sequence (e.g. list) of hosts to bind to. family can be set to either AF_INET or AF_INET6 to force the socket to use IPv4 or IPv6. If not set it will be determined @@ -358,6 +402,9 @@ async def create_server( they all set this flag when being created. This option is not supported on Windows. + keep_alive set to True keeps connections active by enabling the + periodic transmission of messages. + ssl_handshake_timeout is the time in seconds that an SSL server will wait for completion of the SSL handshake before aborting the connection. Default is 60s. @@ -368,8 +415,9 @@ async def create_server( start_serving set to True (default) causes the created server to start accepting connections immediately. When set to False, - the user should await Server.start_serving() or Server.serve_forever() - to make the server to start accepting connections. + the user should await Server.start_serving() or + Server.serve_forever() to make the server to start accepting + connections. """ raise NotImplementedError @@ -432,8 +480,9 @@ async def create_unix_server( start_serving set to True (default) causes the created server to start accepting connections immediately. When set to False, - the user should await Server.start_serving() or Server.serve_forever() - to make the server to start accepting connections. + the user should await Server.start_serving() or + Server.serve_forever() to make the server to start accepting + connections. """ raise NotImplementedError @@ -464,8 +513,8 @@ async def create_datagram_endpoint(self, protocol_factory, protocol_factory must be a callable returning a protocol instance. - socket family AF_INET, socket.AF_INET6 or socket.AF_UNIX depending on - host (or family if specified), socket type SOCK_DGRAM. + socket family AF_INET, socket.AF_INET6 or socket.AF_UNIX depending + on host (or family if specified), socket type SOCK_DGRAM. reuse_address tells the kernel to reuse a local socket in TIME_WAIT state, without waiting for its natural timeout to @@ -505,7 +554,8 @@ async def connect_read_pipe(self, protocol_factory, pipe): async def connect_write_pipe(self, protocol_factory, pipe): """Register write pipe in event loop. - protocol_factory should instantiate object with BaseProtocol interface. + protocol_factory should instantiate object with BaseProtocol + interface. Pipe is file-like object already switched to nonblocking. Return pair (transport, protocol), where transport support WriteTransport interface.""" @@ -615,7 +665,7 @@ def set_debug(self, enabled): raise NotImplementedError -class AbstractEventLoopPolicy: +class _AbstractEventLoopPolicy: """Abstract policy for accessing the event loop.""" def get_event_loop(self): @@ -638,18 +688,7 @@ def new_event_loop(self): the current context, set_event_loop must be called explicitly.""" raise NotImplementedError - # Child processes handling (Unix only). - - def get_child_watcher(self): - "Get the watcher for child processes." - raise NotImplementedError - - def set_child_watcher(self, watcher): - """Set the watcher for child processes.""" - raise NotImplementedError - - -class BaseDefaultEventLoopPolicy(AbstractEventLoopPolicy): +class _BaseDefaultEventLoopPolicy(_AbstractEventLoopPolicy): """Default policy implementation for accessing the event loop. In this policy, each thread has its own event loop. However, we @@ -666,7 +705,6 @@ class BaseDefaultEventLoopPolicy(AbstractEventLoopPolicy): class _Local(threading.local): _loop = None - _set_called = False def __init__(self): self._local = self._Local() @@ -676,28 +714,6 @@ def get_event_loop(self): Returns an instance of EventLoop or raises an exception. """ - if (self._local._loop is None and - not self._local._set_called and - threading.current_thread() is threading.main_thread()): - stacklevel = 2 - try: - f = sys._getframe(1) - except AttributeError: - pass - else: - # Move up the call stack so that the warning is attached - # to the line outside asyncio itself. - while f: - module = f.f_globals.get('__name__') - if not (module == 'asyncio' or module.startswith('asyncio.')): - break - f = f.f_back - stacklevel += 1 - import warnings - warnings.warn('There is no current event loop', - DeprecationWarning, stacklevel=stacklevel) - self.set_event_loop(self.new_event_loop()) - if self._local._loop is None: raise RuntimeError('There is no current event loop in thread %r.' % threading.current_thread().name) @@ -706,7 +722,6 @@ def get_event_loop(self): def set_event_loop(self, loop): """Set the event loop.""" - self._local._set_called = True if loop is not None and not isinstance(loop, AbstractEventLoop): raise TypeError(f"loop must be an instance of AbstractEventLoop or None, not '{type(loop).__name__}'") self._local._loop = loop @@ -776,26 +791,35 @@ def _init_event_loop_policy(): global _event_loop_policy with _lock: if _event_loop_policy is None: # pragma: no branch - from . import DefaultEventLoopPolicy - _event_loop_policy = DefaultEventLoopPolicy() + if sys.platform == 'win32': + from .windows_events import _DefaultEventLoopPolicy + else: + from .unix_events import _DefaultEventLoopPolicy + _event_loop_policy = _DefaultEventLoopPolicy() -def get_event_loop_policy(): +def _get_event_loop_policy(): """Get the current event loop policy.""" if _event_loop_policy is None: _init_event_loop_policy() return _event_loop_policy +def get_event_loop_policy(): + warnings._deprecated('asyncio.get_event_loop_policy', remove=(3, 16)) + return _get_event_loop_policy() -def set_event_loop_policy(policy): +def _set_event_loop_policy(policy): """Set the current event loop policy. If policy is None, the default policy is restored.""" global _event_loop_policy - if policy is not None and not isinstance(policy, AbstractEventLoopPolicy): + if policy is not None and not isinstance(policy, _AbstractEventLoopPolicy): raise TypeError(f"policy must be an instance of AbstractEventLoopPolicy or None, not '{type(policy).__name__}'") _event_loop_policy = policy +def set_event_loop_policy(policy): + warnings._deprecated('asyncio.set_event_loop_policy', remove=(3,16)) + _set_event_loop_policy(policy) def get_event_loop(): """Return an asyncio event loop. @@ -810,28 +834,17 @@ def get_event_loop(): current_loop = _get_running_loop() if current_loop is not None: return current_loop - return get_event_loop_policy().get_event_loop() + return _get_event_loop_policy().get_event_loop() def set_event_loop(loop): """Equivalent to calling get_event_loop_policy().set_event_loop(loop).""" - get_event_loop_policy().set_event_loop(loop) + _get_event_loop_policy().set_event_loop(loop) def new_event_loop(): """Equivalent to calling get_event_loop_policy().new_event_loop().""" - return get_event_loop_policy().new_event_loop() - - -def get_child_watcher(): - """Equivalent to calling get_event_loop_policy().get_child_watcher().""" - return get_event_loop_policy().get_child_watcher() - - -def set_child_watcher(watcher): - """Equivalent to calling - get_event_loop_policy().set_child_watcher(watcher).""" - return get_event_loop_policy().set_child_watcher(watcher) + return _get_event_loop_policy().new_event_loop() # Alias pure-Python implementations for testing purposes. @@ -861,7 +874,7 @@ def set_child_watcher(watcher): def on_fork(): # Reset the loop and wakeupfd in the forked child process. if _event_loop_policy is not None: - _event_loop_policy._local = BaseDefaultEventLoopPolicy._Local() + _event_loop_policy._local = _BaseDefaultEventLoopPolicy._Local() _set_running_loop(None) signal.set_wakeup_fd(-1) diff --git a/Lib/asyncio/format_helpers.py b/Lib/asyncio/format_helpers.py index 27d11fd4fa9..93737b7708a 100644 --- a/Lib/asyncio/format_helpers.py +++ b/Lib/asyncio/format_helpers.py @@ -19,19 +19,26 @@ def _get_function_source(func): return None -def _format_callback_source(func, args): - func_repr = _format_callback(func, args, None) +def _format_callback_source(func, args, *, debug=False): + func_repr = _format_callback(func, args, None, debug=debug) source = _get_function_source(func) if source: func_repr += f' at {source[0]}:{source[1]}' return func_repr -def _format_args_and_kwargs(args, kwargs): +def _format_args_and_kwargs(args, kwargs, *, debug=False): """Format function arguments and keyword arguments. Special case for a single parameter: ('hello',) is formatted as ('hello'). + + Note that this function only returns argument details when + debug=True is specified, as arguments may contain sensitive + information. """ + if not debug: + return '()' + # use reprlib to limit the length of the output items = [] if args: @@ -41,10 +48,11 @@ def _format_args_and_kwargs(args, kwargs): return '({})'.format(', '.join(items)) -def _format_callback(func, args, kwargs, suffix=''): +def _format_callback(func, args, kwargs, *, debug=False, suffix=''): if isinstance(func, functools.partial): - suffix = _format_args_and_kwargs(args, kwargs) + suffix - return _format_callback(func.func, func.args, func.keywords, suffix) + suffix = _format_args_and_kwargs(args, kwargs, debug=debug) + suffix + return _format_callback(func.func, func.args, func.keywords, + debug=debug, suffix=suffix) if hasattr(func, '__qualname__') and func.__qualname__: func_repr = func.__qualname__ @@ -53,7 +61,7 @@ def _format_callback(func, args, kwargs, suffix=''): else: func_repr = repr(func) - func_repr += _format_args_and_kwargs(args, kwargs) + func_repr += _format_args_and_kwargs(args, kwargs, debug=debug) if suffix: func_repr += suffix return func_repr diff --git a/Lib/asyncio/futures.py b/Lib/asyncio/futures.py index 97fc4e3fcb6..e8a99556b18 100644 --- a/Lib/asyncio/futures.py +++ b/Lib/asyncio/futures.py @@ -2,6 +2,7 @@ __all__ = ( 'Future', 'wrap_future', 'isfuture', + 'future_add_to_awaited_by', 'future_discard_from_awaited_by', ) import concurrent.futures @@ -43,7 +44,6 @@ class Future: - This class is not compatible with the wait() and as_completed() methods in the concurrent.futures package. - (In Python 3.4 or later we may be able to unify the implementations.) """ # Class variables serving as defaults for instance variables. @@ -61,12 +61,15 @@ class Future: # the Future protocol (i.e. is intended to be duck-type compatible). # The value must also be not-None, to enable a subclass to declare # that it is not compatible by setting this to None. - # - It is set by __iter__() below so that Task._step() can tell + # - It is set by __iter__() below so that Task.__step() can tell # the difference between - # `await Future()` or`yield from Future()` (correct) vs. + # `await Future()` or `yield from Future()` (correct) vs. # `yield Future()` (incorrect). _asyncio_future_blocking = False + # Used by the capture_call_stack() API. + __asyncio_awaited_by = None + __log_traceback = False def __init__(self, *, loop=None): @@ -116,6 +119,12 @@ def _log_traceback(self, val): raise ValueError('_log_traceback can only be set to False') self.__log_traceback = False + @property + def _asyncio_awaited_by(self): + if self.__asyncio_awaited_by is None: + return None + return frozenset(self.__asyncio_awaited_by) + def get_loop(self): """Return the event loop the Future is bound to.""" loop = self._loop @@ -138,9 +147,6 @@ def _make_cancelled_error(self): exc = exceptions.CancelledError() else: exc = exceptions.CancelledError(self._cancel_message) - exc.__context__ = self._cancelled_exc - # Remove the reference since we don't need this anymore. - self._cancelled_exc = None return exc def cancel(self, msg=None): @@ -194,8 +200,7 @@ def result(self): the future is done and has an exception set, this exception is raised. """ if self._state == _CANCELLED: - exc = self._make_cancelled_error() - raise exc + raise self._make_cancelled_error() if self._state != _FINISHED: raise exceptions.InvalidStateError('Result is not ready.') self.__log_traceback = False @@ -212,8 +217,7 @@ def exception(self): InvalidStateError. """ if self._state == _CANCELLED: - exc = self._make_cancelled_error() - raise exc + raise self._make_cancelled_error() if self._state != _FINISHED: raise exceptions.InvalidStateError('Exception is not set.') self.__log_traceback = False @@ -272,9 +276,13 @@ def set_exception(self, exception): raise exceptions.InvalidStateError(f'{self._state}: {self!r}') if isinstance(exception, type): exception = exception() - if type(exception) is StopIteration: - raise TypeError("StopIteration interacts badly with generators " - "and cannot be raised into a Future") + if isinstance(exception, StopIteration): + new_exc = RuntimeError("StopIteration interacts badly with " + "generators and cannot be raised into a " + "Future") + new_exc.__cause__ = exception + new_exc.__context__ = exception + exception = new_exc self._exception = exception self._exception_tb = exception.__traceback__ self._state = _FINISHED @@ -318,11 +326,9 @@ def _set_result_unless_cancelled(fut, result): def _convert_future_exc(exc): exc_class = type(exc) if exc_class is concurrent.futures.CancelledError: - return exceptions.CancelledError(*exc.args) - elif exc_class is concurrent.futures.TimeoutError: - return exceptions.TimeoutError(*exc.args) + return exceptions.CancelledError(*exc.args).with_traceback(exc.__traceback__) elif exc_class is concurrent.futures.InvalidStateError: - return exceptions.InvalidStateError(*exc.args) + return exceptions.InvalidStateError(*exc.args).with_traceback(exc.__traceback__) else: return exc @@ -386,7 +392,7 @@ def _set_state(future, other): def _call_check_cancel(destination): if destination.cancelled(): - if source_loop is None or source_loop is dest_loop: + if source_loop is None or source_loop is events._get_running_loop(): source.cancel() else: source_loop.call_soon_threadsafe(source.cancel) @@ -395,7 +401,7 @@ def _call_set_state(source): if (destination.cancelled() and dest_loop is not None and dest_loop.is_closed()): return - if dest_loop is None or dest_loop is source_loop: + if dest_loop is None or dest_loop is events._get_running_loop(): _set_state(destination, source) else: if dest_loop.is_closed(): @@ -419,6 +425,49 @@ def wrap_future(future, *, loop=None): return new_future +def future_add_to_awaited_by(fut, waiter, /): + """Record that `fut` is awaited on by `waiter`.""" + # For the sake of keeping the implementation minimal and assuming + # that most of asyncio users use the built-in Futures and Tasks + # (or their subclasses), we only support native Future objects + # and their subclasses. + # + # Longer version: tracking requires storing the caller-callee + # dependency somewhere. One obvious choice is to store that + # information right in the future itself in a dedicated attribute. + # This means that we'd have to require all duck-type compatible + # futures to implement a specific attribute used by asyncio for + # the book keeping. Another solution would be to store that in + # a global dictionary. The downside here is that that would create + # strong references and any scenario where the "add" call isn't + # followed by a "discard" call would lead to a memory leak. + # Using WeakDict would resolve that issue, but would complicate + # the C code (_asynciomodule.c). The bottom line here is that + # it's not clear that all this work would be worth the effort. + # + # Note that there's an accelerated version of this function + # shadowing this implementation later in this file. + if isinstance(fut, _PyFuture) and isinstance(waiter, _PyFuture): + if fut._Future__asyncio_awaited_by is None: + fut._Future__asyncio_awaited_by = set() + fut._Future__asyncio_awaited_by.add(waiter) + + +def future_discard_from_awaited_by(fut, waiter, /): + """Record that `fut` is no longer awaited on by `waiter`.""" + # See the comment in "future_add_to_awaited_by()" body for + # details on implementation. + # + # Note that there's an accelerated version of this function + # shadowing this implementation later in this file. + if isinstance(fut, _PyFuture) and isinstance(waiter, _PyFuture): + if fut._Future__asyncio_awaited_by is not None: + fut._Future__asyncio_awaited_by.discard(waiter) + + +_py_future_add_to_awaited_by = future_add_to_awaited_by +_py_future_discard_from_awaited_by = future_discard_from_awaited_by + try: import _asyncio except ImportError: @@ -426,3 +475,7 @@ def wrap_future(future, *, loop=None): else: # _CFuture is needed for tests. Future = _CFuture = _asyncio.Future + future_add_to_awaited_by = _asyncio.future_add_to_awaited_by + future_discard_from_awaited_by = _asyncio.future_discard_from_awaited_by + _c_future_add_to_awaited_by = future_add_to_awaited_by + _c_future_discard_from_awaited_by = future_discard_from_awaited_by diff --git a/Lib/asyncio/graph.py b/Lib/asyncio/graph.py new file mode 100644 index 00000000000..35f7fa62140 --- /dev/null +++ b/Lib/asyncio/graph.py @@ -0,0 +1,276 @@ +"""Introspection utils for tasks call graphs.""" + +import dataclasses +import io +import sys +import types + +from . import events +from . import futures +from . import tasks + +__all__ = ( + 'capture_call_graph', + 'format_call_graph', + 'print_call_graph', + 'FrameCallGraphEntry', + 'FutureCallGraph', +) + +# Sadly, we can't re-use the traceback module's datastructures as those +# are tailored for error reporting, whereas we need to represent an +# async call graph. +# +# Going with pretty verbose names as we'd like to export them to the +# top level asyncio namespace, and want to avoid future name clashes. + + +@dataclasses.dataclass(frozen=True, slots=True) +class FrameCallGraphEntry: + frame: types.FrameType + + +@dataclasses.dataclass(frozen=True, slots=True) +class FutureCallGraph: + future: futures.Future + call_stack: tuple["FrameCallGraphEntry", ...] + awaited_by: tuple["FutureCallGraph", ...] + + +def _build_graph_for_future( + future: futures.Future, + *, + limit: int | None = None, +) -> FutureCallGraph: + if not isinstance(future, futures.Future): + raise TypeError( + f"{future!r} object does not appear to be compatible " + f"with asyncio.Future" + ) + + coro = None + if get_coro := getattr(future, 'get_coro', None): + coro = get_coro() if limit != 0 else None + + st: list[FrameCallGraphEntry] = [] + awaited_by: list[FutureCallGraph] = [] + + while coro is not None: + if hasattr(coro, 'cr_await'): + # A native coroutine or duck-type compatible iterator + st.append(FrameCallGraphEntry(coro.cr_frame)) + coro = coro.cr_await + elif hasattr(coro, 'ag_await'): + # A native async generator or duck-type compatible iterator + st.append(FrameCallGraphEntry(coro.cr_frame)) + coro = coro.ag_await + else: + break + + if future._asyncio_awaited_by: + for parent in future._asyncio_awaited_by: + awaited_by.append(_build_graph_for_future(parent, limit=limit)) + + if limit is not None: + if limit > 0: + st = st[:limit] + elif limit < 0: + st = st[limit:] + st.reverse() + return FutureCallGraph(future, tuple(st), tuple(awaited_by)) + + +def capture_call_graph( + future: futures.Future | None = None, + /, + *, + depth: int = 1, + limit: int | None = None, +) -> FutureCallGraph | None: + """Capture the async call graph for the current task or the provided Future. + + The graph is represented with three data structures: + + * FutureCallGraph(future, call_stack, awaited_by) + + Where 'future' is an instance of asyncio.Future or asyncio.Task. + + 'call_stack' is a tuple of FrameGraphEntry objects. + + 'awaited_by' is a tuple of FutureCallGraph objects. + + * FrameCallGraphEntry(frame) + + Where 'frame' is a frame object of a regular Python function + in the call stack. + + Receives an optional 'future' argument. If not passed, + the current task will be used. If there's no current task, the function + returns None. + + If "capture_call_graph()" is introspecting *the current task*, the + optional keyword-only 'depth' argument can be used to skip the specified + number of frames from top of the stack. + + If the optional keyword-only 'limit' argument is provided, each call + stack in the resulting graph is truncated to include at most + ``abs(limit)`` entries. If 'limit' is positive, the entries left are + the closest to the invocation point. If 'limit' is negative, the + topmost entries are left. If 'limit' is omitted or None, all entries + are present. If 'limit' is 0, the call stack is not captured at all, + only "awaited by" information is present. + """ + + loop = events._get_running_loop() + + if future is not None: + # Check if we're in a context of a running event loop; + # if yes - check if the passed future is the currently + # running task or not. + if loop is None or future is not tasks.current_task(loop=loop): + return _build_graph_for_future(future, limit=limit) + # else: future is the current task, move on. + else: + if loop is None: + raise RuntimeError( + 'capture_call_graph() is called outside of a running ' + 'event loop and no *future* to introspect was provided') + future = tasks.current_task(loop=loop) + + if future is None: + # This isn't a generic call stack introspection utility. If we + # can't determine the current task and none was provided, we + # just return. + return None + + if not isinstance(future, futures.Future): + raise TypeError( + f"{future!r} object does not appear to be compatible " + f"with asyncio.Future" + ) + + call_stack: list[FrameCallGraphEntry] = [] + + f = sys._getframe(depth) if limit != 0 else None + try: + while f is not None: + is_async = f.f_generator is not None + call_stack.append(FrameCallGraphEntry(f)) + + if is_async: + if f.f_back is not None and f.f_back.f_generator is None: + # We've reached the bottom of the coroutine stack, which + # must be the Task that runs it. + break + + f = f.f_back + finally: + del f + + awaited_by = [] + if future._asyncio_awaited_by: + for parent in future._asyncio_awaited_by: + awaited_by.append(_build_graph_for_future(parent, limit=limit)) + + if limit is not None: + limit *= -1 + if limit > 0: + call_stack = call_stack[:limit] + elif limit < 0: + call_stack = call_stack[limit:] + + return FutureCallGraph(future, tuple(call_stack), tuple(awaited_by)) + + +def format_call_graph( + future: futures.Future | None = None, + /, + *, + depth: int = 1, + limit: int | None = None, +) -> str: + """Return the async call graph as a string for `future`. + + If `future` is not provided, format the call graph for the current task. + """ + + def render_level(st: FutureCallGraph, buf: list[str], level: int) -> None: + def add_line(line: str) -> None: + buf.append(level * ' ' + line) + + if isinstance(st.future, tasks.Task): + add_line( + f'* Task(name={st.future.get_name()!r}, id={id(st.future):#x})' + ) + else: + add_line( + f'* Future(id={id(st.future):#x})' + ) + + if st.call_stack: + add_line( + f' + Call stack:' + ) + for ste in st.call_stack: + f = ste.frame + + if f.f_generator is None: + f = ste.frame + add_line( + f' | File {f.f_code.co_filename!r},' + f' line {f.f_lineno}, in' + f' {f.f_code.co_qualname}()' + ) + else: + c = f.f_generator + + try: + f = c.cr_frame + code = c.cr_code + tag = 'async' + except AttributeError: + try: + f = c.ag_frame + code = c.ag_code + tag = 'async generator' + except AttributeError: + f = c.gi_frame + code = c.gi_code + tag = 'generator' + + add_line( + f' | File {f.f_code.co_filename!r},' + f' line {f.f_lineno}, in' + f' {tag} {code.co_qualname}()' + ) + + if st.awaited_by: + add_line( + f' + Awaited by:' + ) + for fut in st.awaited_by: + render_level(fut, buf, level + 1) + + graph = capture_call_graph(future, depth=depth + 1, limit=limit) + if graph is None: + return "" + + buf: list[str] = [] + try: + render_level(graph, buf, 0) + finally: + # 'graph' has references to frames so we should + # make sure it's GC'ed as soon as we don't need it. + del graph + return '\n'.join(buf) + +def print_call_graph( + future: futures.Future | None = None, + /, + *, + file: io.Writer[str] | None = None, + depth: int = 1, + limit: int | None = None, +) -> None: + """Print the async call graph for the current task or the provided Future.""" + print(format_call_graph(future, depth=depth, limit=limit), file=file) diff --git a/Lib/asyncio/locks.py b/Lib/asyncio/locks.py index ce5d8d5bfb2..1592d30ed45 100644 --- a/Lib/asyncio/locks.py +++ b/Lib/asyncio/locks.py @@ -24,25 +24,23 @@ class Lock(_ContextManagerMixin, mixins._LoopBoundMixin): """Primitive lock objects. A primitive lock is a synchronization primitive that is not owned - by a particular coroutine when locked. A primitive lock is in one + by a particular task when locked. A primitive lock is in one of two states, 'locked' or 'unlocked'. It is created in the unlocked state. It has two basic methods, acquire() and release(). When the state is unlocked, acquire() changes the state to locked and returns immediately. When the state is locked, acquire() blocks until a call to release() in - another coroutine changes it to unlocked, then the acquire() call + another task changes it to unlocked, then the acquire() call resets it to locked and returns. The release() method should only be called in the locked state; it changes the state to unlocked and returns immediately. If an attempt is made to release an unlocked lock, a RuntimeError will be raised. - When more than one coroutine is blocked in acquire() waiting for - the state to turn to unlocked, only one coroutine proceeds when a - release() call resets the state to unlocked; first coroutine which - is blocked in acquire() is being processed. - - acquire() is a coroutine and should be called with 'await'. + When more than one task is blocked in acquire() waiting for + the state to turn to unlocked, only one task proceeds when a + release() call resets the state to unlocked; successive release() + calls will unblock tasks in FIFO order. Locks also support the asynchronous context management protocol. 'async with lock' statement should be used. @@ -95,6 +93,8 @@ async def acquire(self): This method blocks until the lock is unlocked, then sets it to locked and returns True. """ + # Implement fair scheduling, where thread always waits + # its turn. Jumping the queue if all are cancelled is an optimization. if (not self._locked and (self._waiters is None or all(w.cancelled() for w in self._waiters))): self._locked = True @@ -105,19 +105,22 @@ async def acquire(self): fut = self._get_loop().create_future() self._waiters.append(fut) - # Finally block should be called before the CancelledError - # handling as we don't want CancelledError to call - # _wake_up_first() and attempt to wake up itself. try: try: await fut finally: self._waiters.remove(fut) except exceptions.CancelledError: + # Currently the only exception designed be able to occur here. + + # Ensure the lock invariant: If lock is not claimed (or about + # to be claimed by us) and there is a Task in waiters, + # ensure that the Task at the head will run. if not self._locked: self._wake_up_first() raise + # assert self._locked is False self._locked = True return True @@ -125,7 +128,7 @@ def release(self): """Release a lock. When the lock is locked, reset it to unlocked, and return. - If any other coroutines are blocked waiting for the lock to become + If any other tasks are blocked waiting for the lock to become unlocked, allow exactly one of them to proceed. When invoked on an unlocked lock, a RuntimeError is raised. @@ -139,7 +142,7 @@ def release(self): raise RuntimeError('Lock is not acquired.') def _wake_up_first(self): - """Wake up the first waiter if it isn't done.""" + """Ensure that the first waiter will wake up.""" if not self._waiters: return try: @@ -147,9 +150,7 @@ def _wake_up_first(self): except StopIteration: return - # .done() necessarily means that a waiter will wake up later on and - # either take the lock, or, if it was cancelled and lock wasn't - # taken already, will hit this again and wake up a new waiter. + # .done() means that the waiter is already set to wake up. if not fut.done(): fut.set_result(True) @@ -157,10 +158,10 @@ def _wake_up_first(self): class Event(mixins._LoopBoundMixin): """Asynchronous equivalent to threading.Event. - Class implementing event objects. An event manages a flag that can be set - to true with the set() method and reset to false with the clear() method. - The wait() method blocks until the flag is true. The flag is initially - false. + Class implementing event objects. An event manages a flag that can be + set to true with the set() method and reset to false with the clear() + method. The wait() method blocks until the flag is true. The flag is + initially false. """ def __init__(self): @@ -179,8 +180,8 @@ def is_set(self): return self._value def set(self): - """Set the internal flag to true. All coroutines waiting for it to - become true are awakened. Coroutine that call wait() once the flag is + """Set the internal flag to true. All tasks waiting for it to + become true are awakened. Tasks that call wait() once the flag is true will not block at all. """ if not self._value: @@ -191,7 +192,7 @@ def set(self): fut.set_result(True) def clear(self): - """Reset the internal flag to false. Subsequently, coroutines calling + """Reset the internal flag to false. Subsequently, tasks calling wait() will block until set() is called to set the internal flag to true again.""" self._value = False @@ -200,7 +201,7 @@ async def wait(self): """Block until the internal flag is true. If the internal flag is true on entry, return True - immediately. Otherwise, block until another coroutine calls + immediately. Otherwise, block until another task calls set() to set the flag to true, then return True. """ if self._value: @@ -219,8 +220,8 @@ class Condition(_ContextManagerMixin, mixins._LoopBoundMixin): """Asynchronous equivalent to threading.Condition. This class implements condition variable objects. A condition variable - allows one or more coroutines to wait until they are notified by another - coroutine. + allows one or more tasks to wait until they are notified by another + task. A new Lock object is created and used as the underlying lock. """ @@ -247,45 +248,64 @@ def __repr__(self): async def wait(self): """Wait until notified. - If the calling coroutine has not acquired the lock when this + If the calling task has not acquired the lock when this method is called, a RuntimeError is raised. This method releases the underlying lock, and then blocks until it is awakened by a notify() or notify_all() call for - the same condition variable in another coroutine. Once + the same condition variable in another task. Once awakened, it re-acquires the lock and returns True. + + This method may return spuriously, + which is why the caller should always + re-check the state and be prepared to wait() again. """ if not self.locked(): raise RuntimeError('cannot wait on un-acquired lock') + fut = self._get_loop().create_future() self.release() try: - fut = self._get_loop().create_future() - self._waiters.append(fut) try: - await fut - return True - finally: - self._waiters.remove(fut) - - finally: - # Must reacquire lock even if wait is cancelled - cancelled = False - while True: + self._waiters.append(fut) try: - await self.acquire() - break - except exceptions.CancelledError: - cancelled = True + await fut + return True + finally: + self._waiters.remove(fut) - if cancelled: - raise exceptions.CancelledError + finally: + # Must re-acquire lock even if wait is cancelled. + # We only catch CancelledError here, since we don't want any + # other (fatal) errors with the future to cause us to spin. + err = None + while True: + try: + await self.acquire() + break + except exceptions.CancelledError as e: + err = e + + if err is not None: + try: + raise err # Re-raise most recent exception instance. + finally: + err = None # Break reference cycles. + except BaseException: + # Any error raised out of here _may_ have occurred after this Task + # believed to have been successfully notified. + # Make sure to notify another Task instead. This may result + # in a "spurious wakeup", which is allowed as part of the + # Condition Variable protocol. + self._notify(1) + raise async def wait_for(self, predicate): """Wait until a predicate becomes true. - The predicate should be a callable which result will be - interpreted as a boolean value. The final predicate value is + The predicate should be a callable whose result will be + interpreted as a boolean value. The method will repeatedly + wait() until it evaluates to true. The final predicate value is the return value. """ result = predicate() @@ -295,20 +315,22 @@ async def wait_for(self, predicate): return result def notify(self, n=1): - """By default, wake up one coroutine waiting on this condition, if any. - If the calling coroutine has not acquired the lock when this method + """By default, wake up one task waiting on this condition, if any. + If the calling task has not acquired the lock when this method is called, a RuntimeError is raised. - This method wakes up at most n of the coroutines waiting for the - condition variable; it is a no-op if no coroutines are waiting. + This method wakes up n of the tasks waiting for the condition + variable; if fewer than n are waiting, they are all awoken. - Note: an awakened coroutine does not actually return from its + Note: an awakened task does not actually return from its wait() call until it can reacquire the lock. Since notify() does not release the lock, its caller should. """ if not self.locked(): raise RuntimeError('cannot notify on un-acquired lock') + self._notify(n) + def _notify(self, n): idx = 0 for fut in self._waiters: if idx >= n: @@ -319,9 +341,9 @@ def notify(self, n=1): fut.set_result(False) def notify_all(self): - """Wake up all threads waiting on this condition. This method acts - like notify(), but wakes up all waiting threads instead of one. If the - calling thread has not acquired the lock when this method is called, + """Wake up all tasks waiting on this condition. This method acts + like notify(), but wakes up all waiting tasks instead of one. If the + calling task has not acquired the lock when this method is called, a RuntimeError is raised. """ self.notify(len(self._waiters)) @@ -331,9 +353,9 @@ class Semaphore(_ContextManagerMixin, mixins._LoopBoundMixin): """A Semaphore implementation. A semaphore manages an internal counter which is decremented by each - acquire() call and incremented by each release() call. The counter - can never go below zero; when acquire() finds that it is zero, it blocks, - waiting until some other thread calls release(). + acquire() call and incremented by each release() call. The counter + can never go below zero; when acquire() finds that it is zero, it + blocks, waiting until some other thread calls release(). Semaphores also support the context management protocol. @@ -357,6 +379,7 @@ def __repr__(self): def locked(self): """Returns True if semaphore cannot be acquired immediately.""" + # Due to state, or FIFO rules (must allow others to run first). return self._value == 0 or ( any(not w.cancelled() for w in (self._waiters or ()))) @@ -365,11 +388,12 @@ async def acquire(self): If the internal counter is larger than zero on entry, decrement it by one and return True immediately. If it is - zero on entry, block, waiting until some other coroutine has + zero on entry, block, waiting until some other task has called release() to make it larger than 0, and then return True. """ if not self.locked(): + # Maintain FIFO, wait for others to start even if _value > 0. self._value -= 1 return True @@ -378,29 +402,34 @@ async def acquire(self): fut = self._get_loop().create_future() self._waiters.append(fut) - # Finally block should be called before the CancelledError - # handling as we don't want CancelledError to call - # _wake_up_first() and attempt to wake up itself. try: try: await fut finally: self._waiters.remove(fut) except exceptions.CancelledError: - if not fut.cancelled(): + # Currently the only exception designed be able to occur here. + if fut.done() and not fut.cancelled(): + # Our Future was successfully set to True via _wake_up_next(), + # but we are not about to successfully acquire(). Therefore we + # must undo the bookkeeping already done and attempt to wake + # up someone else. self._value += 1 - self._wake_up_next() raise - if self._value > 0: - self._wake_up_next() + finally: + # New waiters may have arrived but had to wait due to FIFO. + # Wake up as many as are allowed. + while self._value > 0: + if not self._wake_up_next(): + break # There was no-one to wake up. return True def release(self): """Release a semaphore, incrementing the internal counter by one. - When it was zero on entry and another coroutine is waiting for it to - become larger than zero again, wake up that coroutine. + When it was zero on entry and another task is waiting for it to + become larger than zero again, wake up that task. """ self._value += 1 self._wake_up_next() @@ -408,13 +437,15 @@ def release(self): def _wake_up_next(self): """Wake up the first waiter that isn't done.""" if not self._waiters: - return + return False for fut in self._waiters: if not fut.done(): self._value -= 1 fut.set_result(True) - return + # `fut` is now `done()` and not `cancelled()`. + return True + return False class BoundedSemaphore(Semaphore): @@ -454,7 +485,7 @@ class Barrier(mixins._LoopBoundMixin): def __init__(self, parties): """Create a barrier, initialised to 'parties' tasks.""" if parties < 1: - raise ValueError('parties must be > 0') + raise ValueError('parties must be >= 1') self._cond = Condition() # notify all tasks when state changes @@ -480,8 +511,8 @@ async def __aexit__(self, *args): async def wait(self): """Wait for the barrier. - When the specified number of tasks have started waiting, they are all - simultaneously awoken. + When the specified number of tasks have started waiting, they are + all simultaneously awoken. Returns an unique and individual index number from 0 to 'parties-1'. """ async with self._cond: diff --git a/Lib/asyncio/proactor_events.py b/Lib/asyncio/proactor_events.py index 1e2a730cf36..6b94975d004 100644 --- a/Lib/asyncio/proactor_events.py +++ b/Lib/asyncio/proactor_events.py @@ -63,7 +63,7 @@ def __init__(self, loop, sock, protocol, waiter=None, self._called_connection_lost = False self._eof_written = False if self._server is not None: - self._server._attach() + self._server._attach(self) self._loop.call_soon(self._protocol.connection_made, self) if waiter is not None: # only wake up the waiter when connection_made() has been called @@ -167,7 +167,7 @@ def _call_connection_lost(self, exc): self._sock = None server = self._server if server is not None: - server._detach() + server._detach(self) self._server = None self._called_connection_lost = True @@ -460,6 +460,8 @@ def _pipe_closed(self, fut): class _ProactorDatagramTransport(_ProactorBasePipeTransport, transports.DatagramTransport): max_size = 256 * 1024 + _header_size = 8 + def __init__(self, loop, sock, protocol, address=None, waiter=None, extra=None): self._address = address @@ -487,9 +489,6 @@ def sendto(self, data, addr=None): raise TypeError('data argument must be bytes-like object (%r)', type(data)) - if not data: - return - if self._address is not None and addr not in (None, self._address): raise ValueError( f'Invalid address: must be None or {self._address}') @@ -502,7 +501,7 @@ def sendto(self, data, addr=None): # Ensure that what we buffer is immutable. self._buffer.append((bytes(data), addr)) - self._buffer_size += len(data) + self._buffer_size += len(data) + self._header_size if self._write_fut is None: # No current write operations are active, kick one off @@ -529,7 +528,7 @@ def _loop_writing(self, fut=None): return data, addr = self._buffer.popleft() - self._buffer_size -= len(data) + self._buffer_size -= len(data) + self._header_size if self._address is not None: self._write_fut = self._loop._proactor.send(self._sock, data) @@ -724,6 +723,8 @@ async def sock_sendto(self, sock, data, address): return await self._proactor.sendto(sock, data, 0, address) async def sock_connect(self, sock, address): + if self._debug and sock.gettimeout() != 0: + raise ValueError("the socket must be non-blocking") return await self._proactor.connect(sock, address) async def sock_accept(self, sock): @@ -755,8 +756,7 @@ async def _sock_sendfile_native(self, sock, file, offset, count): offset += blocksize total_sent += blocksize finally: - if total_sent > 0: - file.seek(offset) + file.seek(offset) async def _sendfile_native(self, transp, file, offset, count): resume_reading = transp.is_reading() diff --git a/Lib/asyncio/queues.py b/Lib/asyncio/queues.py index a9656a6df56..30004f2bc9b 100644 --- a/Lib/asyncio/queues.py +++ b/Lib/asyncio/queues.py @@ -1,4 +1,11 @@ -__all__ = ('Queue', 'PriorityQueue', 'LifoQueue', 'QueueFull', 'QueueEmpty') +__all__ = ( + 'Queue', + 'PriorityQueue', + 'LifoQueue', + 'QueueFull', + 'QueueEmpty', + 'QueueShutDown', +) import collections import heapq @@ -18,14 +25,19 @@ class QueueFull(Exception): pass +class QueueShutDown(Exception): + """Raised when putting on to or getting from a shut-down Queue.""" + pass + + class Queue(mixins._LoopBoundMixin): """A queue, useful for coordinating producer and consumer coroutines. - If maxsize is less than or equal to zero, the queue size is infinite. If it - is an integer greater than 0, then "await put()" will block when the - queue reaches maxsize, until an item is removed by get(). + If maxsize is less than or equal to zero, the queue size is infinite. + If it is an integer greater than 0, then "await put()" will block when + the queue reaches maxsize, until an item is removed by get(). - Unlike the standard library Queue, you can reliably know this Queue's size + Unlike queue.Queue, you can reliably know this Queue's size with qsize(), since your single-threaded asyncio application won't be interrupted between calling qsize() and doing an operation on the Queue. """ @@ -41,6 +53,7 @@ def __init__(self, maxsize=0): self._finished = locks.Event() self._finished.set() self._init(maxsize) + self._is_shutdown = False # These three are overridable in subclasses. @@ -81,6 +94,8 @@ def _format(self): result += f' _putters[{len(self._putters)}]' if self._unfinished_tasks: result += f' tasks={self._unfinished_tasks}' + if self._is_shutdown: + result += ' shutdown' return result def qsize(self): @@ -112,8 +127,12 @@ async def put(self, item): Put an item into the queue. If the queue is full, wait until a free slot is available before adding item. + + Raises QueueShutDown if the queue has been shut down. """ while self.full(): + if self._is_shutdown: + raise QueueShutDown putter = self._get_loop().create_future() self._putters.append(putter) try: @@ -125,7 +144,7 @@ async def put(self, item): self._putters.remove(putter) except ValueError: # The putter could be removed from self._putters by a - # previous get_nowait call. + # previous get_nowait call or a shutdown call. pass if not self.full() and not putter.cancelled(): # We were woken up by get_nowait(), but can't take @@ -138,7 +157,11 @@ def put_nowait(self, item): """Put an item into the queue without blocking. If no free slot is immediately available, raise QueueFull. + + Raises QueueShutDown if the queue has been shut down. """ + if self._is_shutdown: + raise QueueShutDown if self.full(): raise QueueFull self._put(item) @@ -150,8 +173,13 @@ async def get(self): """Remove and return an item from the queue. If queue is empty, wait until an item is available. + + Raises QueueShutDown if the queue has been shut down and is empty, + or if the queue has been shut down immediately. """ while self.empty(): + if self._is_shutdown and self.empty(): + raise QueueShutDown getter = self._get_loop().create_future() self._getters.append(getter) try: @@ -163,7 +191,7 @@ async def get(self): self._getters.remove(getter) except ValueError: # The getter could be removed from self._getters by a - # previous put_nowait call. + # previous put_nowait call, or a shutdown call. pass if not self.empty() and not getter.cancelled(): # We were woken up by put_nowait(), but can't take @@ -175,9 +203,15 @@ async def get(self): def get_nowait(self): """Remove and return an item from the queue. - Return an item if one is immediately available, else raise QueueEmpty. + Return an item if one is immediately available, else raise + QueueEmpty. + + Raises QueueShutDown if the queue has been shut down and is empty, + or if the queue has been shut down immediately. """ if self.empty(): + if self._is_shutdown: + raise QueueShutDown raise QueueEmpty item = self._get() self._wakeup_next(self._putters) @@ -190,12 +224,12 @@ def task_done(self): a subsequent call to task_done() tells the queue that the processing on the task is complete. - If a join() is currently blocking, it will resume when all items have - been processed (meaning that a task_done() call was received for every - item that had been put() into the queue). + If a join() is currently blocking, it will resume when all items + have been processed (meaning that a task_done() call was received + for every item that had been put() into the queue). - Raises ValueError if called more times than there were items placed in - the queue. + Raises ValueError if called more times than there were items placed + in the queue. """ if self._unfinished_tasks <= 0: raise ValueError('task_done() called too many times') @@ -206,14 +240,45 @@ def task_done(self): async def join(self): """Block until all items in the queue have been gotten and processed. - The count of unfinished tasks goes up whenever an item is added to the - queue. The count goes down whenever a consumer calls task_done() to - indicate that the item was retrieved and all work on it is complete. - When the count of unfinished tasks drops to zero, join() unblocks. + The count of unfinished tasks goes up whenever an item is added to + the queue. The count goes down whenever a consumer calls + task_done() to indicate that the item was retrieved and all work on + it is complete. When the count of unfinished tasks drops to zero, + join() unblocks. """ if self._unfinished_tasks > 0: await self._finished.wait() + def shutdown(self, immediate=False): + """Shut-down the queue, making queue gets and puts raise QueueShutDown. + + By default, gets will only raise once the queue is empty. Set + 'immediate' to True to make gets raise immediately instead. + + All blocked callers of put() and get() will be unblocked. + + If 'immediate', the queue is drained and unfinished tasks + is reduced by the number of drained tasks. If unfinished tasks + is reduced to zero, callers of Queue.join are unblocked. + """ + self._is_shutdown = True + if immediate: + while not self.empty(): + self._get() + if self._unfinished_tasks > 0: + self._unfinished_tasks -= 1 + if self._unfinished_tasks == 0: + self._finished.set() + # All getters need to re-check queue-empty to raise ShutDown + while self._getters: + getter = self._getters.popleft() + if not getter.done(): + getter.set_result(None) + while self._putters: + putter = self._putters.popleft() + if not putter.done(): + putter.set_result(None) + class PriorityQueue(Queue): """A subclass of Queue; retrieves entries in priority order (lowest first). diff --git a/Lib/asyncio/runners.py b/Lib/asyncio/runners.py index 1b89236599a..774a317564d 100644 --- a/Lib/asyncio/runners.py +++ b/Lib/asyncio/runners.py @@ -3,6 +3,7 @@ import contextvars import enum import functools +import inspect import threading import signal from . import coroutines @@ -34,7 +35,8 @@ class Runner: with asyncio.Runner(debug=True) as runner: runner.run(main()) - The run() method can be called multiple times within the runner's context. + The run() method can be called multiple times within the runner's + context. This can be useful for interactive console (e.g. IPython), unittest runners, console tools, -- everywhere when async code @@ -84,10 +86,7 @@ def get_loop(self): return self._loop def run(self, coro, *, context=None): - """Run a coroutine inside the embedded event loop.""" - if not coroutines.iscoroutine(coro): - raise ValueError("a coroutine was expected, got {!r}".format(coro)) - + """Run code in the embedded event loop.""" if events._get_running_loop() is not None: # fail fast with short traceback raise RuntimeError( @@ -95,8 +94,19 @@ def run(self, coro, *, context=None): self._lazy_init() + if not coroutines.iscoroutine(coro): + if inspect.isawaitable(coro): + async def _wrap_awaitable(awaitable): + return await awaitable + + coro = _wrap_awaitable(coro) + else: + raise TypeError('An asyncio.Future, a coroutine or an ' + 'awaitable is required') + if context is None: context = self._context + task = self._loop.create_task(coro, context=context) if (threading.current_thread() is threading.main_thread() @@ -168,6 +178,7 @@ def run(main, *, debug=None, loop_factory=None): running in the same thread. If debug is True, the event loop will be run in debug mode. + If loop_factory is passed, it is used for new event loop creation. This function always creates a new event loop and closes it at the end. It should be used as a main entry point for asyncio programs, and should diff --git a/Lib/asyncio/selector_events.py b/Lib/asyncio/selector_events.py index 790711f8340..7c2062e3dd5 100644 --- a/Lib/asyncio/selector_events.py +++ b/Lib/asyncio/selector_events.py @@ -173,16 +173,20 @@ def _accept_connection( # listening socket has triggered an EVENT_READ. There may be multiple # connections waiting for an .accept() so it is called in a loop. # See https://bugs.python.org/issue27906 for more details. - for _ in range(backlog): + for _ in range(backlog + 1): try: conn, addr = sock.accept() if self._debug: logger.debug("%r got a new connection from %r: %r", server, addr, conn) conn.setblocking(False) - except (BlockingIOError, InterruptedError, ConnectionAbortedError): - # Early exit because the socket accept buffer is empty. - return None + except ConnectionAbortedError: + # Discard connections that were aborted before accept(). + continue + except (BlockingIOError, InterruptedError): + # Early exit because of a signal or + # the socket accept buffer is empty. + return except OSError as exc: # There's nowhere to send the error, so just log it. if exc.errno in (errno.EMFILE, errno.ENFILE, @@ -265,22 +269,17 @@ def _ensure_fd_no_transport(self, fd): except (AttributeError, TypeError, ValueError): # This code matches selectors._fileobj_to_fd function. raise ValueError(f"Invalid file object: {fd!r}") from None - try: - transport = self._transports[fileno] - except KeyError: - pass - else: - if not transport.is_closing(): - raise RuntimeError( - f'File descriptor {fd!r} is used by transport ' - f'{transport!r}') + transport = self._transports.get(fileno) + if transport and not transport.is_closing(): + raise RuntimeError( + f'File descriptor {fd!r} is used by transport ' + f'{transport!r}') def _add_reader(self, fd, callback, *args): self._check_closed() handle = events.Handle(callback, args, self, None) - try: - key = self._selector.get_key(fd) - except KeyError: + key = self._selector.get_map().get(fd) + if key is None: self._selector.register(fd, selectors.EVENT_READ, (handle, None)) else: @@ -294,30 +293,27 @@ def _add_reader(self, fd, callback, *args): def _remove_reader(self, fd): if self.is_closed(): return False - try: - key = self._selector.get_key(fd) - except KeyError: + key = self._selector.get_map().get(fd) + if key is None: return False + mask, (reader, writer) = key.events, key.data + mask &= ~selectors.EVENT_READ + if not mask: + self._selector.unregister(fd) else: - mask, (reader, writer) = key.events, key.data - mask &= ~selectors.EVENT_READ - if not mask: - self._selector.unregister(fd) - else: - self._selector.modify(fd, mask, (None, writer)) + self._selector.modify(fd, mask, (None, writer)) - if reader is not None: - reader.cancel() - return True - else: - return False + if reader is not None: + reader.cancel() + return True + else: + return False def _add_writer(self, fd, callback, *args): self._check_closed() handle = events.Handle(callback, args, self, None) - try: - key = self._selector.get_key(fd) - except KeyError: + key = self._selector.get_map().get(fd) + if key is None: self._selector.register(fd, selectors.EVENT_WRITE, (None, handle)) else: @@ -332,24 +328,22 @@ def _remove_writer(self, fd): """Remove a writer callback.""" if self.is_closed(): return False - try: - key = self._selector.get_key(fd) - except KeyError: + key = self._selector.get_map().get(fd) + if key is None: return False + mask, (reader, writer) = key.events, key.data + # Remove both writer and connector. + mask &= ~selectors.EVENT_WRITE + if not mask: + self._selector.unregister(fd) else: - mask, (reader, writer) = key.events, key.data - # Remove both writer and connector. - mask &= ~selectors.EVENT_WRITE - if not mask: - self._selector.unregister(fd) - else: - self._selector.modify(fd, mask, (reader, None)) + self._selector.modify(fd, mask, (reader, None)) - if writer is not None: - writer.cancel() - return True - else: - return False + if writer is not None: + writer.cancel() + return True + else: + return False def add_reader(self, fd, callback, *args): """Add a reader callback.""" @@ -536,11 +530,12 @@ def _sock_recvfrom_into(self, fut, sock, buf, bufsize): async def sock_sendall(self, sock, data): """Send data to the socket. - The socket must be connected to a remote socket. This method continues - to send data from data until either all data has been sent or an - error occurs. None is returned on success. On error, an exception is - raised, and there is no way to determine how much data, if any, was - successfully processed by the receiving end of the connection. + The socket must be connected to a remote socket. This method + continues to send data from data until either all data has been + sent or an error occurs. None is returned on success. On error, + an exception is raised, and there is no way to determine how much + data, if any, was successfully processed by the receiving end of + the connection. """ base_events._check_ssl_socket(sock) if self._debug and sock.gettimeout() != 0: @@ -589,11 +584,12 @@ def _sock_sendall(self, fut, sock, view, pos): async def sock_sendto(self, sock, data, address): """Send data to the socket. - The socket must be connected to a remote socket. This method continues - to send data from data until either all data has been sent or an - error occurs. None is returned on success. On error, an exception is - raised, and there is no way to determine how much data, if any, was - successfully processed by the receiving end of the connection. + The socket must be connected to a remote socket. This method + continues to send data from data until either all data has been + sent or an error occurs. None is returned on success. On error, + an exception is raised, and there is no way to determine how much + data, if any, was successfully processed by the receiving end of + the connection. """ base_events._check_ssl_socket(sock) if self._debug and sock.gettimeout() != 0: @@ -704,10 +700,11 @@ def _sock_connect_cb(self, fut, sock, address): async def sock_accept(self, sock): """Accept a connection. - The socket must be bound to an address and listening for connections. - The return value is a pair (conn, address) where conn is a new socket - object usable to send and receive data on the connection, and address - is the address bound to the socket on the other end of the connection. + The socket must be bound to an address and listening for + connections. The return value is a pair (conn, address) where + conn is a new socket object usable to send and receive data on the + connection, and address is the address bound to the socket on the + other end of the connection. """ base_events._check_ssl_socket(sock) if self._debug and sock.gettimeout() != 0: @@ -801,7 +798,7 @@ def __init__(self, loop, sock, protocol, extra=None, server=None): self._paused = False # Set when pause_reading() called if self._server is not None: - self._server._attach() + self._server._attach(self) loop._transports[self._sock_fd] = self def __repr__(self): @@ -878,6 +875,8 @@ def __del__(self, _warn=warnings.warn): if self._sock is not None: _warn(f"unclosed transport {self!r}", ResourceWarning, source=self) self._sock.close() + if self._server is not None: + self._server._detach(self) def _fatal_error(self, exc, message='Fatal error on transport'): # Should be called from exception handler only. @@ -916,7 +915,7 @@ def _call_connection_lost(self, exc): self._loop = None server = self._server if server is not None: - server._detach() + server._detach(self) self._server = None def get_write_buffer_size(self): @@ -1054,8 +1053,8 @@ def _read_ready__on_eof(self): def write(self, data): if not isinstance(data, (bytes, bytearray, memoryview)): - raise TypeError(f'data argument must be a bytes-like object, ' - f'not {type(data).__name__!r}') + raise TypeError(f'data argument must be a bytes, bytearray, or memoryview ' + f'object, not {type(data).__name__!r}') if self._eof: raise RuntimeError('Cannot call write() after write_eof()') if self._empty_waiter is not None: @@ -1178,20 +1177,31 @@ def writelines(self, list_of_data): raise RuntimeError('unable to writelines; sendfile is in progress') if not list_of_data: return + + if self._conn_lost: + if self._conn_lost >= constants.LOG_THRESHOLD_FOR_CONNLOST_WRITES: + logger.warning('socket.send() raised exception.') + self._conn_lost += 1 + return + self._buffer.extend([memoryview(data) for data in list_of_data]) self._write_ready() # If the entire buffer couldn't be written, register a write handler if self._buffer: self._loop._add_writer(self._sock_fd, self._write_ready) + self._maybe_pause_protocol() def can_write_eof(self): return True def _call_connection_lost(self, exc): - super()._call_connection_lost(exc) - if self._empty_waiter is not None: - self._empty_waiter.set_exception( - ConnectionError("Connection is closed by peer")) + try: + super()._call_connection_lost(exc) + finally: + self._write_ready = None + if self._empty_waiter is not None: + self._empty_waiter.set_exception( + ConnectionError("Connection is closed by peer")) def _make_empty_waiter(self): if self._empty_waiter is not None: @@ -1206,13 +1216,13 @@ def _reset_empty_waiter(self): def close(self): self._read_ready_cb = None - self._write_ready = None super().close() class _SelectorDatagramTransport(_SelectorTransport, transports.DatagramTransport): _buffer_factory = collections.deque + _header_size = 8 def __init__(self, loop, sock, protocol, address=None, waiter=None, extra=None): @@ -1251,8 +1261,6 @@ def sendto(self, data, addr=None): if not isinstance(data, (bytes, bytearray, memoryview)): raise TypeError(f'data argument must be a bytes-like object, ' f'not {type(data).__name__!r}') - if not data: - return if self._address: if addr not in (None, self._address): @@ -1288,13 +1296,13 @@ def sendto(self, data, addr=None): # Ensure that what we buffer is immutable. self._buffer.append((bytes(data), addr)) - self._buffer_size += len(data) + self._buffer_size += len(data) + self._header_size self._maybe_pause_protocol() def _sendto_ready(self): while self._buffer: data, addr = self._buffer.popleft() - self._buffer_size -= len(data) + self._buffer_size -= len(data) + self._header_size try: if self._extra['peername']: self._sock.send(data) @@ -1302,7 +1310,7 @@ def _sendto_ready(self): self._sock.sendto(data, addr) except (BlockingIOError, InterruptedError): self._buffer.appendleft((data, addr)) # Try again later. - self._buffer_size += len(data) + self._buffer_size += len(data) + self._header_size break except OSError as exc: self._protocol.error_received(exc) diff --git a/Lib/asyncio/sslproto.py b/Lib/asyncio/sslproto.py index e51669a2ab2..74c5f0d5ca0 100644 --- a/Lib/asyncio/sslproto.py +++ b/Lib/asyncio/sslproto.py @@ -101,7 +101,7 @@ def get_protocol(self): return self._ssl_protocol._app_protocol def is_closing(self): - return self._closed + return self._closed or self._ssl_protocol._is_transport_closing() def close(self): """Close the transport. @@ -379,6 +379,9 @@ def _get_app_transport(self): self._app_transport_created = True return self._app_transport + def _is_transport_closing(self): + return self._transport is not None and self._transport.is_closing() + def connection_made(self, transport): """Called when the low-level connection is made. @@ -542,7 +545,7 @@ def _start_handshake(self): # start handshake timeout count down self._handshake_timeout_handle = \ self._loop.call_later(self._ssl_handshake_timeout, - lambda: self._check_handshake_timeout()) + self._check_handshake_timeout) self._do_handshake() @@ -623,7 +626,7 @@ def _start_shutdown(self): self._set_state(SSLProtocolState.FLUSHING) self._shutdown_timeout_handle = self._loop.call_later( self._ssl_shutdown_timeout, - lambda: self._check_shutdown_timeout() + self._check_shutdown_timeout ) self._do_flush() @@ -762,7 +765,7 @@ def _do_read__buffered(self): else: break else: - self._loop.call_soon(lambda: self._do_read()) + self._loop.call_soon(self._do_read) except SSLAgainErrors: pass if offset > 0: diff --git a/Lib/asyncio/staggered.py b/Lib/asyncio/staggered.py index 451a53a16f3..2ad65d8648e 100644 --- a/Lib/asyncio/staggered.py +++ b/Lib/asyncio/staggered.py @@ -3,24 +3,15 @@ __all__ = 'staggered_race', import contextlib -import typing from . import events from . import exceptions as exceptions_mod from . import locks from . import tasks +from . import futures -async def staggered_race( - coro_fns: typing.Iterable[typing.Callable[[], typing.Awaitable]], - delay: typing.Optional[float], - *, - loop: events.AbstractEventLoop = None, -) -> typing.Tuple[ - typing.Any, - typing.Optional[int], - typing.List[typing.Optional[Exception]] -]: +async def staggered_race(coro_fns, delay, *, loop=None): """Run coroutines with staggered start times and take the first to finish. This method takes an iterable of coroutine functions. The first one is @@ -73,14 +64,38 @@ async def staggered_race( """ # TODO: when we have aiter() and anext(), allow async iterables in coro_fns. loop = loop or events.get_running_loop() + parent_task = tasks.current_task(loop) enum_coro_fns = enumerate(coro_fns) winner_result = None winner_index = None + unhandled_exceptions = [] exceptions = [] - running_tasks = [] + running_tasks = set() + on_completed_fut = None + + def task_done(task): + running_tasks.discard(task) + futures.future_discard_from_awaited_by(task, parent_task) + if ( + on_completed_fut is not None + and not on_completed_fut.done() + and not running_tasks + ): + on_completed_fut.set_result(None) + + if task.cancelled(): + return + + exc = task.exception() + if exc is None: + return + unhandled_exceptions.append(exc) - async def run_one_coro( - previous_failed: typing.Optional[locks.Event]) -> None: + async def run_one_coro(ok_to_start, previous_failed) -> None: + # in eager tasks this waits for the calling task to append this task + # to running_tasks, in regular tasks this wait is a no-op that does + # not yield a future. See gh-124309. + await ok_to_start.wait() # Wait for the previous task to finish, or for delay seconds if previous_failed is not None: with contextlib.suppress(exceptions_mod.TimeoutError): @@ -96,9 +111,14 @@ async def run_one_coro( return # Start task that will run the next coroutine this_failed = locks.Event() - next_task = loop.create_task(run_one_coro(this_failed)) - running_tasks.append(next_task) - assert len(running_tasks) == this_index + 2 + next_ok_to_start = locks.Event() + next_task = loop.create_task(run_one_coro(next_ok_to_start, this_failed)) + futures.future_add_to_awaited_by(next_task, parent_task) + running_tasks.add(next_task) + next_task.add_done_callback(task_done) + # next_task has been appended to running_tasks so next_task is ok to + # start. + next_ok_to_start.set() # Prepare place to put this coroutine's exceptions if not won exceptions.append(None) assert len(exceptions) == this_index + 1 @@ -123,27 +143,37 @@ async def run_one_coro( # up as done() == True, cancelled() == False, exception() == # asyncio.CancelledError. This behavior is specified in # https://bugs.python.org/issue30048 - for i, t in enumerate(running_tasks): - if i != this_index: + current_task = tasks.current_task(loop) + for t in running_tasks: + if t is not current_task: t.cancel() - first_task = loop.create_task(run_one_coro(None)) - running_tasks.append(first_task) + propagate_cancellation_error = None try: - # Wait for a growing list of tasks to all finish: poor man's version of - # curio's TaskGroup or trio's nursery - done_count = 0 - while done_count != len(running_tasks): - done, _ = await tasks.wait(running_tasks) - done_count = len(done) + ok_to_start = locks.Event() + first_task = loop.create_task(run_one_coro(ok_to_start, None)) + futures.future_add_to_awaited_by(first_task, parent_task) + running_tasks.add(first_task) + first_task.add_done_callback(task_done) + # first_task has been appended to running_tasks so first_task is ok to start. + ok_to_start.set() + propagate_cancellation_error = None + # Make sure no tasks are left running if we leave this function + while running_tasks: + on_completed_fut = loop.create_future() + try: + await on_completed_fut + except exceptions_mod.CancelledError as ex: + propagate_cancellation_error = ex + for task in running_tasks: + task.cancel(*ex.args) + on_completed_fut = None + if __debug__ and unhandled_exceptions: # If run_one_coro raises an unhandled exception, it's probably a # programming error, and I want to see it. - if __debug__: - for d in done: - if d.done() and not d.cancelled() and d.exception(): - raise d.exception() + raise ExceptionGroup("staggered race failed", unhandled_exceptions) + if propagate_cancellation_error is not None: + raise propagate_cancellation_error return winner_result, winner_index, exceptions finally: - # Make sure no tasks are left running if we leave this function - for t in running_tasks: - t.cancel() + del exceptions, propagate_cancellation_error, unhandled_exceptions, parent_task diff --git a/Lib/asyncio/streams.py b/Lib/asyncio/streams.py index f310aa2f367..dd8f6618623 100644 --- a/Lib/asyncio/streams.py +++ b/Lib/asyncio/streams.py @@ -201,7 +201,6 @@ def __init__(self, stream_reader, client_connected_cb=None, loop=None): # is established. self._strong_reader = stream_reader self._reject_connection = False - self._stream_writer = None self._task = None self._transport = None self._client_connected_cb = client_connected_cb @@ -214,10 +213,8 @@ def _stream_reader(self): return None return self._stream_reader_wr() - def _replace_writer(self, writer): + def _replace_transport(self, transport): loop = self._loop - transport = writer.transport - self._stream_writer = writer self._transport = transport self._over_ssl = transport.get_extra_info('sslcontext') is not None @@ -239,11 +236,8 @@ def connection_made(self, transport): reader.set_transport(transport) self._over_ssl = transport.get_extra_info('sslcontext') is not None if self._client_connected_cb is not None: - self._stream_writer = StreamWriter(transport, self, - reader, - self._loop) - res = self._client_connected_cb(reader, - self._stream_writer) + writer = StreamWriter(transport, self, reader, self._loop) + res = self._client_connected_cb(reader, writer) if coroutines.iscoroutine(res): def callback(task): if task.cancelled(): @@ -405,9 +399,9 @@ async def start_tls(self, sslcontext, *, ssl_handshake_timeout=ssl_handshake_timeout, ssl_shutdown_timeout=ssl_shutdown_timeout) self._transport = new_transport - protocol._replace_writer(self) + protocol._replace_transport(new_transport) - def __del__(self): + def __del__(self, warnings=warnings): if not self._transport.is_closing(): if self._loop.is_closed(): warnings.warn("loop is closed", ResourceWarning) @@ -547,17 +541,17 @@ async def _wait_for_data(self, func_name): self._waiter = None async def readline(self): - """Read chunk of data from the stream until newline (b'\n') is found. + r"""Read chunk of data from the stream until newline (b'\n') is found. - On success, return chunk that ends with newline. If only partial + On success, return chunk that ends with newline. If only partial line can be read due to EOF, return incomplete line without - terminating newline. When EOF was reached while no bytes read, empty - bytes object is returned. + terminating newline. When EOF was reached while no bytes read, + empty bytes object is returned. - If limit is reached, ValueError will be raised. In that case, if + If limit is reached, ValueError will be raised. In that case, if newline was found, complete line including newline will be removed - from internal buffer. Else, internal buffer will be cleared. Limit is - compared against part of the line without newline. + from internal buffer. Else, internal buffer will be cleared. + Limit is compared against part of the line without newline. If stream was paused, this function will automatically resume it if needed. @@ -596,20 +590,34 @@ async def readuntil(self, separator=b'\n'): If the data cannot be read because of over limit, a LimitOverrunError exception will be raised, and the data will be left in the internal buffer, so it can be read again. + + The ``separator`` may also be a tuple of separators. In this + case the return value will be the shortest possible that has any + separator as the suffix. For the purposes of LimitOverrunError, + the shortest possible separator is considered to be the one that + matched. """ - seplen = len(separator) - if seplen == 0: + if isinstance(separator, tuple): + # Makes sure shortest matches wins + separator = sorted(separator, key=len) + else: + separator = [separator] + if not separator: + raise ValueError('Separator should contain at least one element') + min_seplen = len(separator[0]) + max_seplen = len(separator[-1]) + if min_seplen == 0: raise ValueError('Separator should be at least one-byte string') if self._exception is not None: raise self._exception # Consume whole buffer except last bytes, which length is - # one less than seplen. Let's check corner cases with - # separator='SEPARATOR': + # one less than max_seplen. Let's check corner cases with + # separator[-1]='SEPARATOR': # * we have received almost complete separator (without last # byte). i.e buffer='some textSEPARATO'. In this case we - # can safely consume len(separator) - 1 bytes. + # can safely consume max_seplen - 1 bytes. # * last byte of buffer is first byte of separator, i.e. # buffer='abcdefghijklmnopqrS'. We may safely consume # everything except that last byte, but this require to @@ -622,26 +630,35 @@ async def readuntil(self, separator=b'\n'): # messages :) # `offset` is the number of bytes from the beginning of the buffer - # where there is no occurrence of `separator`. + # where there is no occurrence of any `separator`. offset = 0 - # Loop until we find `separator` in the buffer, exceed the buffer size, + # Loop until we find a `separator` in the buffer, exceed the buffer size, # or an EOF has happened. while True: buflen = len(self._buffer) - # Check if we now have enough data in the buffer for `separator` to - # fit. - if buflen - offset >= seplen: - isep = self._buffer.find(separator, offset) - - if isep != -1: - # `separator` is in the buffer. `isep` will be used later - # to retrieve the data. + # Check if we now have enough data in the buffer for shortest + # separator to fit. + if buflen - offset >= min_seplen: + match_start = None + match_end = None + for sep in separator: + isep = self._buffer.find(sep, offset) + + if isep != -1: + # `separator` is in the buffer. `match_start` and + # `match_end` will be used later to retrieve the + # data. + end = isep + len(sep) + if match_end is None or end < match_end: + match_end = end + match_start = isep + if match_end is not None: break # see upper comment for explanation. - offset = buflen + 1 - seplen + offset = max(0, buflen + 1 - max_seplen) if offset > self._limit: raise exceptions.LimitOverrunError( 'Separator is not found, and chunk exceed the limit', @@ -650,7 +667,7 @@ async def readuntil(self, separator=b'\n'): # Complete message (with full separator) may be present in buffer # even when EOF flag is set. This may happen when the last chunk # adds data which makes separator be found. That's why we check for - # EOF *ater* inspecting the buffer. + # EOF *after* inspecting the buffer. if self._eof: chunk = bytes(self._buffer) self._buffer.clear() @@ -659,12 +676,12 @@ async def readuntil(self, separator=b'\n'): # _wait_for_data() will resume reading if stream was paused. await self._wait_for_data('readuntil') - if isep > self._limit: + if match_start > self._limit: raise exceptions.LimitOverrunError( - 'Separator is found, but chunk is longer than limit', isep) + 'Separator is found, but chunk is longer than limit', match_start) - chunk = self._buffer[:isep + seplen] - del self._buffer[:isep + seplen] + chunk = self._buffer[:match_end] + del self._buffer[:match_end] self._maybe_resume_transport() return bytes(chunk) diff --git a/Lib/asyncio/taskgroups.py b/Lib/asyncio/taskgroups.py index d264e51f1fd..00e8f6d5d1a 100644 --- a/Lib/asyncio/taskgroups.py +++ b/Lib/asyncio/taskgroups.py @@ -6,6 +6,7 @@ from . import events from . import exceptions +from . import futures from . import tasks @@ -66,6 +67,20 @@ async def __aenter__(self): return self async def __aexit__(self, et, exc, tb): + tb = None + try: + return await self._aexit(et, exc) + finally: + # Exceptions are heavy objects that can have object + # cycles (bad for GC); let's not keep a reference to + # a bunch of them. It would be nicer to use a try/finally + # in __aexit__ directly but that introduced some diff noise + self._parent_task = None + self._errors = None + self._base_error = None + exc = None + + async def _aexit(self, et, exc): self._exiting = True if (exc is not None and @@ -73,14 +88,10 @@ async def __aexit__(self, et, exc, tb): self._base_error is None): self._base_error = exc - propagate_cancellation_error = \ - exc if et is exceptions.CancelledError else None - if self._parent_cancel_requested: - # If this flag is set we *must* call uncancel(). - if self._parent_task.uncancel() == 0: - # If there are no pending cancellations left, - # don't propagate CancelledError. - propagate_cancellation_error = None + if et is not None and issubclass(et, exceptions.CancelledError): + propagate_cancellation_error = exc + else: + propagate_cancellation_error = None if et is not None: if not self._aborting: @@ -126,51 +137,78 @@ async def __aexit__(self, et, exc, tb): assert not self._tasks if self._base_error is not None: - raise self._base_error + try: + raise self._base_error + finally: + exc = None + + if self._parent_cancel_requested: + # If this flag is set we *must* call uncancel(). + if self._parent_task.uncancel() == 0: + # If there are no pending cancellations left, + # don't propagate CancelledError. + propagate_cancellation_error = None # Propagate CancelledError if there is one, except if there # are other errors -- those have priority. - if propagate_cancellation_error and not self._errors: - raise propagate_cancellation_error - - if et is not None and et is not exceptions.CancelledError: + try: + if propagate_cancellation_error is not None and not self._errors: + try: + raise propagate_cancellation_error + finally: + exc = None + finally: + propagate_cancellation_error = None + + if et is not None and not issubclass(et, exceptions.CancelledError): self._errors.append(exc) if self._errors: - # Exceptions are heavy objects that can have object - # cycles (bad for GC); let's not keep a reference to - # a bunch of them. + # If the parent task is being cancelled from the outside + # of the taskgroup, un-cancel and re-cancel the parent task, + # which will keep the cancel count stable. + if self._parent_task.cancelling(): + self._parent_task.uncancel() + self._parent_task.cancel() try: - me = BaseExceptionGroup('unhandled errors in a TaskGroup', self._errors) - raise me from None + raise BaseExceptionGroup( + 'unhandled errors in a TaskGroup', + self._errors, + ) from None finally: - self._errors = None + exc = None - def create_task(self, coro, *, name=None, context=None): + + def create_task(self, coro, **kwargs): """Create a new task in this group and return it. Similar to `asyncio.create_task`. """ if not self._entered: + coro.close() raise RuntimeError(f"TaskGroup {self!r} has not been entered") if self._exiting and not self._tasks: + coro.close() raise RuntimeError(f"TaskGroup {self!r} is finished") if self._aborting: + coro.close() raise RuntimeError(f"TaskGroup {self!r} is shutting down") - if context is None: - task = self._loop.create_task(coro) - else: - task = self._loop.create_task(coro, context=context) - tasks._set_task_name(task, name) - # optimization: Immediately call the done callback if the task is + task = self._loop.create_task(coro, **kwargs) + + futures.future_add_to_awaited_by(task, self._parent_task) + + # Always schedule the done callback even if the task is # already done (e.g. if the coro was able to complete eagerly), - # and skip scheduling a done callback - if task.done(): - self._on_task_done(task) - else: - self._tasks.add(task) - task.add_done_callback(self._on_task_done) - return task + # otherwise if the task completes with an exception then it will cancel + # the current task too early. gh-128550, gh-128588 + self._tasks.add(task) + task.add_done_callback(self._on_task_done) + try: + return task + finally: + # gh-128552: prevent a refcycle of + # task.exception().__traceback__->TaskGroup.create_task->task + del task # Since Python 3.8 Tasks propagate all exceptions correctly, # except for KeyboardInterrupt and SystemExit which are @@ -190,6 +228,8 @@ def _abort(self): def _on_task_done(self, task): self._tasks.discard(task) + futures.future_discard_from_awaited_by(task, self._parent_task) + if self._on_completed_fut is not None and not self._tasks: if not self._on_completed_fut.done(): self._on_completed_fut.set_result(True) diff --git a/Lib/asyncio/tasks.py b/Lib/asyncio/tasks.py index 0b22e28d8e0..14d3c1ceb58 100644 --- a/Lib/asyncio/tasks.py +++ b/Lib/asyncio/tasks.py @@ -15,8 +15,8 @@ import functools import inspect import itertools +import math import types -import warnings import weakref from types import GenericAlias @@ -25,6 +25,7 @@ from . import events from . import exceptions from . import futures +from . import queues from . import timeouts # Helper to generate new task names @@ -47,37 +48,9 @@ def all_tasks(loop=None): # capturing the set of eager tasks first, so if an eager task "graduates" # to a regular task in another thread, we don't risk missing it. eager_tasks = list(_eager_tasks) - # Looping over the WeakSet isn't safe as it can be updated from another - # thread, therefore we cast it to list prior to filtering. The list cast - # itself requires iteration, so we repeat it several times ignoring - # RuntimeErrors (which are not very likely to occur). - # See issues 34970 and 36607 for details. - scheduled_tasks = None - i = 0 - while True: - try: - scheduled_tasks = list(_scheduled_tasks) - except RuntimeError: - i += 1 - if i >= 1000: - raise - else: - break - return {t for t in itertools.chain(scheduled_tasks, eager_tasks) - if futures._get_loop(t) is loop and not t.done()} - -def _set_task_name(task, name): - if name is not None: - try: - set_name = task.set_name - except AttributeError: - warnings.warn("Task.set_name() was added in Python 3.8, " - "the method support will be mandatory for third-party " - "task implementations since 3.13.", - DeprecationWarning, stacklevel=3) - else: - set_name(name) + return {t for t in itertools.chain(_scheduled_tasks, eager_tasks) + if futures._get_loop(t) is loop and not t.done()} class Task(futures._PyFuture): # Inherit Python Task implementation @@ -137,7 +110,7 @@ def __init__(self, coro, *, loop=None, name=None, context=None, self.__eager_start() else: self._loop.call_soon(self.__step, context=self._context) - _register_task(self) + _py_register_task(self) def __del__(self): if self._state == futures._PENDING and self._log_destroy_pending: @@ -267,42 +240,44 @@ def uncancel(self): """ if self._num_cancels_requested > 0: self._num_cancels_requested -= 1 + if self._num_cancels_requested == 0: + self._must_cancel = False return self._num_cancels_requested def __eager_start(self): - prev_task = _swap_current_task(self._loop, self) + prev_task = _py_swap_current_task(self._loop, self) try: - _register_eager_task(self) + _py_register_eager_task(self) try: self._context.run(self.__step_run_and_handle_result, None) finally: - _unregister_eager_task(self) + _py_unregister_eager_task(self) finally: try: - curtask = _swap_current_task(self._loop, prev_task) + curtask = _py_swap_current_task(self._loop, prev_task) assert curtask is self finally: if self.done(): self._coro = None self = None # Needed to break cycles when an exception occurs. else: - _register_task(self) + _py_register_task(self) def __step(self, exc=None): if self.done(): raise exceptions.InvalidStateError( - f'_step(): already done: {self!r}, {exc!r}') + f'__step(): already done: {self!r}, {exc!r}') if self._must_cancel: if not isinstance(exc, exceptions.CancelledError): exc = self._make_cancelled_error() self._must_cancel = False self._fut_waiter = None - _enter_task(self._loop, self) + _py_enter_task(self._loop, self) try: self.__step_run_and_handle_result(exc) finally: - _leave_task(self._loop, self) + _py_leave_task(self._loop, self) self = None # Needed to break cycles when an exception occurs. def __step_run_and_handle_result(self, exc): @@ -347,6 +322,7 @@ def __step_run_and_handle_result(self, exc): self._loop.call_soon( self.__step, new_exc, context=self._context) else: + futures.future_add_to_awaited_by(result, self) result._asyncio_future_blocking = False result.add_done_callback( self.__wakeup, context=self._context) @@ -381,6 +357,7 @@ def __step_run_and_handle_result(self, exc): self = None # Needed to break cycles when an exception occurs. def __wakeup(self, future): + futures.future_discard_from_awaited_by(future, self) try: future.result() except BaseException as exc: @@ -389,7 +366,7 @@ def __wakeup(self, future): else: # Don't pass the value of `future.result()` explicitly, # as `Future.__iter__` and `Future.__await__` don't need it. - # If we call `_step(value, None)` instead of `_step()`, + # If we call `__step(value, None)` instead of `__step()`, # Python eval loop would use `.send(value)` method call, # instead of `__next__()`, which is slower for futures # that return non-generator iterators from their `__iter__`. @@ -409,20 +386,13 @@ def __wakeup(self, future): Task = _CTask = _asyncio.Task -def create_task(coro, *, name=None, context=None): +def create_task(coro, **kwargs): """Schedule the execution of a coroutine object in a spawn task. Return a Task object. """ loop = events.get_running_loop() - if context is None: - # Use legacy API if context is not needed - task = loop.create_task(coro) - else: - task = loop.create_task(coro, context=context) - - _set_task_name(task, name) - return task + return loop.create_task(coro, **kwargs) # wait() and as_completed() similar to those in PEP 3148. @@ -437,8 +407,6 @@ async def wait(fs, *, timeout=None, return_when=ALL_COMPLETED): The fs iterable must not be empty. - Coroutines will be wrapped in Tasks. - Returns two sets of Future: (done, pending). Usage: @@ -530,6 +498,7 @@ async def _wait(fs, timeout, return_when, loop): if timeout is not None: timeout_handle = loop.call_later(timeout, _release_waiter, waiter) counter = len(fs) + cur_task = current_task() def _on_completion(f): nonlocal counter @@ -542,9 +511,11 @@ def _on_completion(f): timeout_handle.cancel() if not waiter.done(): waiter.set_result(None) + futures.future_discard_from_awaited_by(f, cur_task) for f in fs: f.add_done_callback(_on_completion) + futures.future_add_to_awaited_by(f, cur_task) try: await waiter @@ -580,62 +551,127 @@ async def _cancel_and_wait(fut): fut.remove_done_callback(cb) -# This is *not* a @coroutine! It is just an iterator (yielding Futures). +class _AsCompletedIterator: + """Iterator of awaitables representing tasks of asyncio.as_completed. + + As an asynchronous iterator, iteration yields futures as they finish. As a + plain iterator, new coroutines are yielded that will return or raise the + result of the next underlying future to complete. + """ + def __init__(self, aws, timeout): + self._done = queues.Queue() + self._timeout_handle = None + + loop = events.get_event_loop() + todo = {ensure_future(aw, loop=loop) for aw in set(aws)} + for f in todo: + f.add_done_callback(self._handle_completion) + if todo and timeout is not None: + self._timeout_handle = ( + loop.call_later(timeout, self._handle_timeout) + ) + self._todo = todo + self._todo_left = len(todo) + + def __aiter__(self): + return self + + def __iter__(self): + return self + + async def __anext__(self): + if not self._todo_left: + raise StopAsyncIteration + assert self._todo_left > 0 + self._todo_left -= 1 + return await self._wait_for_one() + + def __next__(self): + if not self._todo_left: + raise StopIteration + assert self._todo_left > 0 + self._todo_left -= 1 + return self._wait_for_one(resolve=True) + + def _handle_timeout(self): + for f in self._todo: + f.remove_done_callback(self._handle_completion) + self._done.put_nowait(None) # Sentinel for _wait_for_one(). + self._todo.clear() # Can't do todo.remove(f) in the loop. + + def _handle_completion(self, f): + if not self._todo: + return # _handle_timeout() was here first. + self._todo.remove(f) + self._done.put_nowait(f) + if not self._todo and self._timeout_handle is not None: + self._timeout_handle.cancel() + + async def _wait_for_one(self, resolve=False): + # Wait for the next future to be done and return it unless resolve is + # set, in which case return either the result of the future or raise + # an exception. + f = await self._done.get() + if f is None: + # Dummy value from _handle_timeout(). + raise exceptions.TimeoutError + return f.result() if resolve else f + + def as_completed(fs, *, timeout=None): - """Return an iterator whose values are coroutines. + """Create an iterator of awaitables or their results in completion order. - When waiting for the yielded coroutines you'll get the results (or - exceptions!) of the original Futures (or coroutines), in the order - in which and as soon as they complete. + Run the supplied awaitables concurrently. The returned object can be + iterated to obtain the results of the awaitables as they finish. - This differs from PEP 3148; the proper way to use this is: + The object returned can be iterated as an asynchronous iterator or + a plain iterator. When asynchronous iteration is used, the + originally-supplied awaitables are yielded if they are tasks or + futures. This makes it easy to correlate previously-scheduled tasks + with their results: - for f in as_completed(fs): - result = await f # The 'await' may raise. - # Use result. + ipv4_connect = create_task(open_connection("127.0.0.1", 80)) + ipv6_connect = create_task(open_connection("::1", 80)) + tasks = [ipv4_connect, ipv6_connect] - If a timeout is specified, the 'await' will raise - TimeoutError when the timeout occurs before all Futures are done. + async for earliest_connect in as_completed(tasks): + # earliest_connect is done. The result can be obtained by + # awaiting it or calling earliest_connect.result() + reader, writer = await earliest_connect - Note: The futures 'f' are not necessarily members of fs. - """ - if futures.isfuture(fs) or coroutines.iscoroutine(fs): - raise TypeError(f"expect an iterable of futures, not {type(fs).__name__}") + if earliest_connect is ipv6_connect: + print("IPv6 connection established.") + else: + print("IPv4 connection established.") - from .queues import Queue # Import here to avoid circular import problem. - done = Queue() + During asynchronous iteration, implicitly-created tasks will be + yielded for supplied awaitables that aren't tasks or futures. - loop = events.get_event_loop() - todo = {ensure_future(f, loop=loop) for f in set(fs)} - timeout_handle = None + When used as a plain iterator, each iteration yields a new coroutine + that returns the result or raises the exception of the next completed + awaitable. This pattern is compatible with Python versions older than + 3.13: - def _on_timeout(): - for f in todo: - f.remove_done_callback(_on_completion) - done.put_nowait(None) # Queue a dummy value for _wait_for_one(). - todo.clear() # Can't do todo.remove(f) in the loop. + ipv4_connect = create_task(open_connection("127.0.0.1", 80)) + ipv6_connect = create_task(open_connection("::1", 80)) + tasks = [ipv4_connect, ipv6_connect] - def _on_completion(f): - if not todo: - return # _on_timeout() was here first. - todo.remove(f) - done.put_nowait(f) - if not todo and timeout_handle is not None: - timeout_handle.cancel() + for next_connect in as_completed(tasks): + # next_connect is not one of the original task objects. It must + # be awaited to obtain the result value or raise the exception + # of the awaitable that finishes next. + reader, writer = await next_connect - async def _wait_for_one(): - f = await done.get() - if f is None: - # Dummy value from _on_timeout(). - raise exceptions.TimeoutError - return f.result() # May raise f.exception(). + A TimeoutError is raised if the timeout occurs before all awaitables + are done. This is raised by the async for loop during asynchronous + iteration or by the coroutines yielded during plain iteration. + """ + if inspect.isawaitable(fs): + raise TypeError( + f"expects an iterable of awaitables, not {type(fs).__name__}" + ) - for f in todo: - f.add_done_callback(_on_completion) - if todo and timeout is not None: - timeout_handle = loop.call_later(timeout, _on_timeout) - for _ in range(len(todo)): - yield _wait_for_one() + return _AsCompletedIterator(fs, timeout) @types.coroutine @@ -656,6 +692,9 @@ async def sleep(delay, result=None): await __sleep0() return result + if math.isnan(delay): + raise ValueError("Invalid delay: NaN (not a number)") + loop = events.get_running_loop() future = loop.create_future() h = loop.call_later(delay, @@ -764,10 +803,19 @@ def gather(*coros_or_futures, return_exceptions=False): outer.set_result([]) return outer - def _done_callback(fut): + loop = events._get_running_loop() + if loop is not None: + cur_task = current_task(loop) + else: + cur_task = None + + def _done_callback(fut, cur_task=cur_task): nonlocal nfinished nfinished += 1 + if cur_task is not None: + futures.future_discard_from_awaited_by(fut, cur_task) + if outer is None or outer.done(): if not fut.cancelled(): # Mark exception retrieved. @@ -824,7 +872,6 @@ def _done_callback(fut): nfuts = 0 nfinished = 0 done_futs = [] - loop = None outer = None # bpo-46672 for arg in coros_or_futures: if arg not in arg_to_fut: @@ -837,12 +884,13 @@ def _done_callback(fut): # can't control it, disable the "destroy pending task" # warning. fut._log_destroy_pending = False - nfuts += 1 arg_to_fut[arg] = fut if fut.done(): done_futs.append(fut) else: + if cur_task is not None: + futures.future_add_to_awaited_by(fut, cur_task) fut.add_done_callback(_done_callback) else: @@ -862,6 +910,25 @@ def _done_callback(fut): return outer +def _log_on_exception(fut): + if fut.cancelled(): + return + + exc = fut.exception() + if exc is None: + return + + context = { + 'message': + f'{exc.__class__.__name__} exception in shielded future', + 'exception': exc, + 'future': fut, + } + if fut._source_traceback: + context['source_traceback'] = fut._source_traceback + fut._loop.call_exception_handler(context) + + def shield(arg): """Wait for a future, shielding it from cancellation. @@ -902,11 +969,16 @@ def shield(arg): loop = futures._get_loop(inner) outer = loop.create_future() + if loop is not None and (cur_task := current_task(loop)) is not None: + futures.future_add_to_awaited_by(inner, cur_task) + else: + cur_task = None + + def _clear_awaited_by_callback(inner): + futures.future_discard_from_awaited_by(inner, cur_task) + def _inner_done_callback(inner): if outer.cancelled(): - if not inner.cancelled(): - # Mark inner's result as retrieved. - inner.exception() return if inner.cancelled(): @@ -918,10 +990,16 @@ def _inner_done_callback(inner): else: outer.set_result(inner.result()) - def _outer_done_callback(outer): if not inner.done(): inner.remove_done_callback(_inner_done_callback) + # Keep only one callback to log on cancel + inner.remove_done_callback(_log_on_exception) + inner.add_done_callback(_log_on_exception) + + if cur_task is not None: + inner.add_done_callback(_clear_awaited_by_callback) + inner.add_done_callback(_inner_done_callback) outer.add_done_callback(_outer_done_callback) @@ -954,25 +1032,26 @@ def callback(): def create_eager_task_factory(custom_task_constructor): """Create a function suitable for use as a task factory on an event-loop. - Example usage: + Example usage: - loop.set_task_factory( - asyncio.create_eager_task_factory(my_task_constructor)) + loop.set_task_factory( + asyncio.create_eager_task_factory(my_task_constructor)) - Now, tasks created will be started immediately (rather than being first - scheduled to an event loop). The constructor argument can be any callable - that returns a Task-compatible object and has a signature compatible - with `Task.__init__`; it must have the `eager_start` keyword argument. + Now, tasks created will be started immediately (rather than being first + scheduled to an event loop). The constructor argument can be any + callable that returns a Task-compatible object and has a signature + compatible with `Task.__init__`; it must have the `eager_start` + keyword argument. - Most applications will use `Task` for `custom_task_constructor` and in - this case there's no need to call `create_eager_task_factory()` - directly. Instead the global `eager_task_factory` instance can be - used. E.g. `loop.set_task_factory(asyncio.eager_task_factory)`. - """ + Most applications will use `Task` for `custom_task_constructor` and in + this case there's no need to call `create_eager_task_factory()` + directly. Instead the global `eager_task_factory` instance can be + used. E.g. `loop.set_task_factory(asyncio.eager_task_factory)`. + """ - def factory(loop, coro, *, name=None, context=None): + def factory(loop, coro, *, eager_start=True, **kwargs): return custom_task_constructor( - coro, loop=loop, name=name, context=context, eager_start=True) + coro, loop=loop, eager_start=eager_start, **kwargs) return factory @@ -1044,14 +1123,13 @@ def _unregister_eager_task(task): _py_enter_task = _enter_task _py_leave_task = _leave_task _py_swap_current_task = _swap_current_task - +_py_all_tasks = all_tasks try: from _asyncio import (_register_task, _register_eager_task, _unregister_task, _unregister_eager_task, _enter_task, _leave_task, _swap_current_task, - _scheduled_tasks, _eager_tasks, _current_tasks, - current_task) + current_task, all_tasks) except ImportError: pass else: @@ -1063,3 +1141,4 @@ def _unregister_eager_task(task): _c_enter_task = _enter_task _c_leave_task = _leave_task _c_swap_current_task = _swap_current_task + _c_all_tasks = all_tasks diff --git a/Lib/asyncio/threads.py b/Lib/asyncio/threads.py index db048a8231d..5001351b097 100644 --- a/Lib/asyncio/threads.py +++ b/Lib/asyncio/threads.py @@ -17,7 +17,8 @@ async def to_thread(func, /, *args, **kwargs): allowing context variables from the main thread to be accessed in the separate thread. - Return a coroutine that can be awaited to get the eventual result of *func*. + Return a coroutine that can be awaited to get the eventual result of + *func*. """ loop = events.get_running_loop() ctx = contextvars.copy_context() diff --git a/Lib/asyncio/timeouts.py b/Lib/asyncio/timeouts.py index 30042abb3ad..65ddc285abd 100644 --- a/Lib/asyncio/timeouts.py +++ b/Lib/asyncio/timeouts.py @@ -1,7 +1,6 @@ import enum from types import TracebackType -from typing import final, Optional, Type from . import events from . import exceptions @@ -23,14 +22,14 @@ class _State(enum.Enum): EXITED = "finished" -@final class Timeout: """Asynchronous context manager for cancelling overdue coroutines. - Use `timeout()` or `timeout_at()` rather than instantiating this class directly. + Use `timeout()` or `timeout_at()` rather than instantiating this class + directly. """ - def __init__(self, when: Optional[float]) -> None: + def __init__(self, when: float | None) -> None: """Schedule a timeout that will trigger at a given loop time. - If `when` is `None`, the timeout will never trigger. @@ -39,15 +38,15 @@ def __init__(self, when: Optional[float]) -> None: """ self._state = _State.CREATED - self._timeout_handler: Optional[events.TimerHandle] = None - self._task: Optional[tasks.Task] = None + self._timeout_handler: events.TimerHandle | None = None + self._task: tasks.Task | None = None self._when = when - def when(self) -> Optional[float]: + def when(self) -> float | None: """Return the current deadline.""" return self._when - def reschedule(self, when: Optional[float]) -> None: + def reschedule(self, when: float | None) -> None: """Reschedule the timeout.""" if self._state is not _State.ENTERED: if self._state is _State.CREATED: @@ -96,10 +95,10 @@ async def __aenter__(self) -> "Timeout": async def __aexit__( self, - exc_type: Optional[Type[BaseException]], - exc_val: Optional[BaseException], - exc_tb: Optional[TracebackType], - ) -> Optional[bool]: + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: TracebackType | None, + ) -> bool | None: assert self._state in (_State.ENTERED, _State.EXPIRING) if self._timeout_handler is not None: @@ -109,10 +108,16 @@ async def __aexit__( if self._state is _State.EXPIRING: self._state = _State.EXPIRED - if self._task.uncancel() <= self._cancelling and exc_type is exceptions.CancelledError: + if self._task.uncancel() <= self._cancelling and exc_type is not None: # Since there are no new cancel requests, we're # handling this. - raise TimeoutError from exc_val + if issubclass(exc_type, exceptions.CancelledError): + raise TimeoutError from exc_val + elif exc_val is not None: + self._insert_timeout_error(exc_val) + if isinstance(exc_val, ExceptionGroup): + for exc in exc_val.exceptions: + self._insert_timeout_error(exc) elif self._state is _State.ENTERED: self._state = _State.EXITED @@ -125,8 +130,18 @@ def _on_timeout(self) -> None: # drop the reference early self._timeout_handler = None + @staticmethod + def _insert_timeout_error(exc_val: BaseException) -> None: + while exc_val.__context__ is not None: + if isinstance(exc_val.__context__, exceptions.CancelledError): + te = TimeoutError() + te.__context__ = te.__cause__ = exc_val.__context__ + exc_val.__context__ = te + break + exc_val = exc_val.__context__ -def timeout(delay: Optional[float]) -> Timeout: + +def timeout(delay: float | None) -> Timeout: """Timeout async context manager. Useful in cases when you want to apply timeout logic around block @@ -146,7 +161,7 @@ def timeout(delay: Optional[float]) -> Timeout: return Timeout(loop.time() + delay if delay is not None else None) -def timeout_at(when: Optional[float]) -> Timeout: +def timeout_at(when: float | None) -> Timeout: """Schedule the timeout at absolute time. Like timeout() but argument gives absolute time in the same clock system diff --git a/Lib/asyncio/tools.py b/Lib/asyncio/tools.py new file mode 100644 index 00000000000..f39e11fdd51 --- /dev/null +++ b/Lib/asyncio/tools.py @@ -0,0 +1,276 @@ +"""Tools to analyze tasks running in asyncio programs.""" + +from collections import defaultdict, namedtuple +from itertools import count +from enum import Enum +import sys +from _remote_debugging import RemoteUnwinder, FrameInfo + +class NodeType(Enum): + COROUTINE = 1 + TASK = 2 + + +class CycleFoundException(Exception): + """Raised when there is a cycle when drawing the call tree.""" + def __init__( + self, + cycles: list[list[int]], + id2name: dict[int, str], + ) -> None: + super().__init__(cycles, id2name) + self.cycles = cycles + self.id2name = id2name + + + +# ─── indexing helpers ─────────────────────────────────────────── +def _format_stack_entry(elem: str|FrameInfo) -> str: + if not isinstance(elem, str): + if elem.lineno == 0 and elem.filename == "": + return f"{elem.funcname}" + else: + return f"{elem.funcname} {elem.filename}:{elem.lineno}" + return elem + + +def _index(result): + id2name, awaits, task_stacks = {}, [], {} + for awaited_info in result: + for task_info in awaited_info.awaited_by: + task_id = task_info.task_id + task_name = task_info.task_name + id2name[task_id] = task_name + + # Store the internal coroutine stack for this task + if task_info.coroutine_stack: + for coro_info in task_info.coroutine_stack: + call_stack = coro_info.call_stack + internal_stack = [_format_stack_entry(frame) for frame in call_stack] + task_stacks[task_id] = internal_stack + + # Add the awaited_by relationships (external dependencies) + if task_info.awaited_by: + for coro_info in task_info.awaited_by: + call_stack = coro_info.call_stack + parent_task_id = coro_info.task_name + stack = [_format_stack_entry(frame) for frame in call_stack] + awaits.append((parent_task_id, stack, task_id)) + return id2name, awaits, task_stacks + + +def _build_tree(id2name, awaits, task_stacks): + id2label = {(NodeType.TASK, tid): name for tid, name in id2name.items()} + children = defaultdict(list) + cor_nodes = defaultdict(dict) # Maps parent -> {frame_name: node_key} + next_cor_id = count(1) + + def get_or_create_cor_node(parent, frame): + """Get existing coroutine node or create new one under parent""" + if frame in cor_nodes[parent]: + return cor_nodes[parent][frame] + + node_key = (NodeType.COROUTINE, f"c{next(next_cor_id)}") + id2label[node_key] = frame + children[parent].append(node_key) + cor_nodes[parent][frame] = node_key + return node_key + + # Build task dependency tree with coroutine frames + for parent_id, stack, child_id in awaits: + cur = (NodeType.TASK, parent_id) + for frame in reversed(stack): + cur = get_or_create_cor_node(cur, frame) + + child_key = (NodeType.TASK, child_id) + if child_key not in children[cur]: + children[cur].append(child_key) + + # Add coroutine stacks for leaf tasks + awaiting_tasks = {parent_id for parent_id, _, _ in awaits} + for task_id in id2name: + if task_id not in awaiting_tasks and task_id in task_stacks: + cur = (NodeType.TASK, task_id) + for frame in reversed(task_stacks[task_id]): + cur = get_or_create_cor_node(cur, frame) + + return id2label, children + + +def _roots(id2label, children): + all_children = {c for kids in children.values() for c in kids} + return [n for n in id2label if n not in all_children] + +# ─── detect cycles in the task-to-task graph ─────────────────────── +def _task_graph(awaits): + """Return {parent_task_id: {child_task_id, …}, …}.""" + g = defaultdict(set) + for parent_id, _stack, child_id in awaits: + g[parent_id].add(child_id) + return g + + +def _find_cycles(graph): + """ + Depth-first search for back-edges. + + Returns a list of cycles (each cycle is a list of task-ids) or an + empty list if the graph is acyclic. + """ + WHITE, GREY, BLACK = 0, 1, 2 + color = defaultdict(lambda: WHITE) + path, cycles = [], [] + + def dfs(v): + color[v] = GREY + path.append(v) + for w in graph.get(v, ()): + if color[w] == WHITE: + dfs(w) + elif color[w] == GREY: # back-edge → cycle! + i = path.index(w) + cycles.append(path[i:] + [w]) # make a copy + color[v] = BLACK + path.pop() + + for v in list(graph): + if color[v] == WHITE: + dfs(v) + return cycles + + +# ─── PRINT TREE FUNCTION ─────────────────────────────────────── +def get_all_awaited_by(pid): + unwinder = RemoteUnwinder(pid) + return unwinder.get_all_awaited_by() + + +def build_async_tree(result, task_emoji="(T)", cor_emoji=""): + """ + Build a list of strings for pretty-print an async call tree. + + The call tree is produced by `get_all_async_stacks()`, prefixing tasks + with `task_emoji` and coroutine frames with `cor_emoji`. + """ + id2name, awaits, task_stacks = _index(result) + g = _task_graph(awaits) + cycles = _find_cycles(g) + if cycles: + raise CycleFoundException(cycles, id2name) + labels, children = _build_tree(id2name, awaits, task_stacks) + + def pretty(node): + flag = task_emoji if node[0] == NodeType.TASK else cor_emoji + return f"{flag} {labels[node]}" + + def render(node, prefix="", last=True, buf=None): + if buf is None: + buf = [] + buf.append(f"{prefix}{'└── ' if last else '├── '}{pretty(node)}") + new_pref = prefix + (" " if last else "│ ") + kids = children.get(node, []) + for i, kid in enumerate(kids): + render(kid, new_pref, i == len(kids) - 1, buf) + return buf + + return [render(root) for root in _roots(labels, children)] + + +def build_task_table(result): + id2name, _, _ = _index(result) + table = [] + + for awaited_info in result: + thread_id = awaited_info.thread_id + for task_info in awaited_info.awaited_by: + # Get task info + task_id = task_info.task_id + task_name = task_info.task_name + + # Build coroutine stack string + frames = [frame for coro in task_info.coroutine_stack + for frame in coro.call_stack] + coro_stack = " -> ".join(_format_stack_entry(x).split(" ")[0] + for x in frames) + + # Handle tasks with no awaiters + if not task_info.awaited_by: + table.append([thread_id, hex(task_id), task_name, coro_stack, + "", "", "0x0"]) + continue + + # Handle tasks with awaiters + for coro_info in task_info.awaited_by: + parent_id = coro_info.task_name + awaiter_frames = [_format_stack_entry(x).split(" ")[0] + for x in coro_info.call_stack] + awaiter_chain = " -> ".join(awaiter_frames) + awaiter_name = id2name.get(parent_id, "Unknown") + parent_id_str = (hex(parent_id) if isinstance(parent_id, int) + else str(parent_id)) + + table.append([thread_id, hex(task_id), task_name, coro_stack, + awaiter_chain, awaiter_name, parent_id_str]) + + return table + +def _print_cycle_exception(exception: CycleFoundException): + print("ERROR: await-graph contains cycles - cannot print a tree!", file=sys.stderr) + print("", file=sys.stderr) + for c in exception.cycles: + inames = " → ".join(exception.id2name.get(tid, hex(tid)) for tid in c) + print(f"cycle: {inames}", file=sys.stderr) + + +def exit_with_permission_help_text(): + """ + Prints a message pointing to platform-specific permission help text and exits the program. + This function is called when a PermissionError is encountered while trying + to attach to a process. + """ + print( + "Error: The specified process cannot be attached to due to insufficient permissions.\n" + "See the Python documentation for details on required privileges and troubleshooting:\n" + "https://docs.python.org/3.14/howto/remote_debugging.html#permission-requirements\n" + ) + sys.exit(1) + + +def _get_awaited_by_tasks(pid: int) -> list: + try: + return get_all_awaited_by(pid) + except RuntimeError as e: + while e.__context__ is not None: + e = e.__context__ + print(f"Error retrieving tasks: {e}") + sys.exit(1) + except PermissionError as e: + exit_with_permission_help_text() + + +def display_awaited_by_tasks_table(pid: int) -> None: + """Build and print a table of all pending tasks under `pid`.""" + + tasks = _get_awaited_by_tasks(pid) + table = build_task_table(tasks) + # Print the table in a simple tabular format + print( + f"{'tid':<10} {'task id':<20} {'task name':<20} {'coroutine stack':<50} {'awaiter chain':<50} {'awaiter name':<15} {'awaiter id':<15}" + ) + print("-" * 180) + for row in table: + print(f"{row[0]:<10} {row[1]:<20} {row[2]:<20} {row[3]:<50} {row[4]:<50} {row[5]:<15} {row[6]:<15}") + + +def display_awaited_by_tasks_tree(pid: int) -> None: + """Build and print a tree of all pending tasks under `pid`.""" + + tasks = _get_awaited_by_tasks(pid) + try: + result = build_async_tree(tasks) + except CycleFoundException as e: + _print_cycle_exception(e) + sys.exit(1) + + for tree in result: + print("\n".join(tree)) diff --git a/Lib/asyncio/transports.py b/Lib/asyncio/transports.py index 30fd41d49af..34c7ad44ffd 100644 --- a/Lib/asyncio/transports.py +++ b/Lib/asyncio/transports.py @@ -181,6 +181,8 @@ def sendto(self, data, addr=None): to be sent out asynchronously. addr is target socket address. If addr is None use target address pointed on transport creation. + If data is an empty bytes object a zero-length datagram will be + sent. """ raise NotImplementedError diff --git a/Lib/asyncio/unix_events.py b/Lib/asyncio/unix_events.py index f2e920ada46..bfb3da0debb 100644 --- a/Lib/asyncio/unix_events.py +++ b/Lib/asyncio/unix_events.py @@ -28,10 +28,7 @@ __all__ = ( 'SelectorEventLoop', - 'AbstractChildWatcher', 'SafeChildWatcher', - 'FastChildWatcher', 'PidfdChildWatcher', - 'MultiLoopChildWatcher', 'ThreadedChildWatcher', - 'DefaultEventLoopPolicy', + 'EventLoop', ) @@ -57,12 +54,18 @@ def waitstatus_to_exitcode(status): class _UnixSelectorEventLoop(selector_events.BaseSelectorEventLoop): """Unix event loop. - Adds signal handling and UNIX Domain Socket support to SelectorEventLoop. + Adds signal handling and UNIX Domain Socket support to + SelectorEventLoop. """ def __init__(self, selector=None): super().__init__(selector) self._signal_handlers = {} + self._unix_server_sockets = {} + if can_use_pidfd(): + self._watcher = _PidfdChildWatcher() + else: + self._watcher = _ThreadedChildWatcher() def close(self): super().close() @@ -92,7 +95,7 @@ def add_signal_handler(self, sig, callback, *args): Raise RuntimeError if there is a problem setting up the handler. """ if (coroutines.iscoroutine(callback) or - coroutines.iscoroutinefunction(callback)): + coroutines._iscoroutinefunction(callback)): raise TypeError("coroutines cannot be used " "with add_signal_handler()") self._check_signal(sig) @@ -195,33 +198,22 @@ def _make_write_pipe_transport(self, pipe, protocol, waiter=None, async def _make_subprocess_transport(self, protocol, args, shell, stdin, stdout, stderr, bufsize, extra=None, **kwargs): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', DeprecationWarning) - watcher = events.get_child_watcher() - - with watcher: - if not watcher.is_active(): - # Check early. - # Raising exception before process creation - # prevents subprocess execution if the watcher - # is not ready to handle it. - raise RuntimeError("asyncio.get_child_watcher() is not activated, " - "subprocess support is not installed.") - waiter = self.create_future() - transp = _UnixSubprocessTransport(self, protocol, args, shell, - stdin, stdout, stderr, bufsize, - waiter=waiter, extra=extra, - **kwargs) - watcher.add_child_handler(transp.get_pid(), - self._child_watcher_callback, transp) - try: - await waiter - except (SystemExit, KeyboardInterrupt): - raise - except BaseException: - transp.close() - await transp._wait() - raise + watcher = self._watcher + waiter = self.create_future() + transp = _UnixSubprocessTransport(self, protocol, args, shell, + stdin, stdout, stderr, bufsize, + waiter=waiter, extra=extra, + **kwargs) + watcher.add_child_handler(transp.get_pid(), + self._child_watcher_callback, transp) + try: + await waiter + except (SystemExit, KeyboardInterrupt): + raise + except BaseException: + transp.close() + await transp._wait() + raise return transp @@ -283,7 +275,7 @@ async def create_unix_server( sock=None, backlog=100, ssl=None, ssl_handshake_timeout=None, ssl_shutdown_timeout=None, - start_serving=True): + start_serving=True, cleanup_socket=True): if isinstance(ssl, bool): raise TypeError('ssl argument must be an SSLContext or None') @@ -339,6 +331,15 @@ async def create_unix_server( raise ValueError( f'A UNIX Domain Stream Socket was expected, got {sock!r}') + if cleanup_socket: + path = sock.getsockname() + # Check for abstract socket. `str` and `bytes` paths are supported. + if path[0] not in (0, '\x00'): + try: + self._unix_server_sockets[sock] = os.stat(path).st_ino + except FileNotFoundError: + pass + sock.setblocking(False) server = base_events.Server(self, [sock], protocol_factory, ssl, backlog, ssl_handshake_timeout, @@ -384,15 +385,18 @@ def _sock_sendfile_native_impl(self, fut, registered_fd, sock, fileno, # order to simplify the common case. self.remove_writer(registered_fd) if fut.cancelled(): - self._sock_sendfile_update_filepos(fileno, offset, total_sent) + self._sock_sendfile_update_filepos(fileno, offset) return if count: blocksize = count - total_sent if blocksize <= 0: - self._sock_sendfile_update_filepos(fileno, offset, total_sent) + self._sock_sendfile_update_filepos(fileno, offset) fut.set_result(total_sent) return + # On 32-bit architectures truncate to 1GiB to avoid OverflowError + blocksize = min(blocksize, sys.maxsize//2 + 1) + try: sent = os.sendfile(fd, fileno, offset, blocksize) except (BlockingIOError, InterruptedError): @@ -420,20 +424,20 @@ def _sock_sendfile_native_impl(self, fut, registered_fd, sock, fileno, # plain send(). err = exceptions.SendfileNotAvailableError( "os.sendfile call failed") - self._sock_sendfile_update_filepos(fileno, offset, total_sent) + self._sock_sendfile_update_filepos(fileno, offset) fut.set_exception(err) else: - self._sock_sendfile_update_filepos(fileno, offset, total_sent) + self._sock_sendfile_update_filepos(fileno, offset) fut.set_exception(exc) except (SystemExit, KeyboardInterrupt): raise except BaseException as exc: - self._sock_sendfile_update_filepos(fileno, offset, total_sent) + self._sock_sendfile_update_filepos(fileno, offset) fut.set_exception(exc) else: if sent == 0: # EOF - self._sock_sendfile_update_filepos(fileno, offset, total_sent) + self._sock_sendfile_update_filepos(fileno, offset) fut.set_result(total_sent) else: offset += sent @@ -444,9 +448,9 @@ def _sock_sendfile_native_impl(self, fut, registered_fd, sock, fileno, fd, sock, fileno, offset, count, blocksize, total_sent) - def _sock_sendfile_update_filepos(self, fileno, offset, total_sent): - if total_sent > 0: - os.lseek(fileno, offset, os.SEEK_SET) + def _sock_sendfile_update_filepos(self, fileno, offset): + # After this helper runs, the source fd's lseek pointer is at offset." + os.lseek(fileno, offset, os.SEEK_SET) def _sock_add_cancellation_callback(self, fut, sock): def cb(fut): @@ -456,6 +460,27 @@ def cb(fut): self.remove_writer(fd) fut.add_done_callback(cb) + def _stop_serving(self, sock): + # Is this a unix socket that needs cleanup? + if sock in self._unix_server_sockets: + path = sock.getsockname() + else: + path = None + + super()._stop_serving(sock) + + if path is not None: + prev_ino = self._unix_server_sockets[sock] + del self._unix_server_sockets[sock] + try: + if os.stat(path).st_ino == prev_ino: + os.unlink(path) + except FileNotFoundError: + pass + except OSError as err: + logger.error('Unable to clean up listening UNIX socket ' + '%r: %r', path, err) + class _UnixReadPipeTransport(transports.ReadTransport): @@ -830,93 +855,7 @@ def _start(self, args, shell, stdin, stdout, stderr, bufsize, **kwargs): stdin_w.close() -class AbstractChildWatcher: - """Abstract base class for monitoring child processes. - - Objects derived from this class monitor a collection of subprocesses and - report their termination or interruption by a signal. - - New callbacks are registered with .add_child_handler(). Starting a new - process must be done within a 'with' block to allow the watcher to suspend - its activity until the new process if fully registered (this is needed to - prevent a race condition in some implementations). - - Example: - with watcher: - proc = subprocess.Popen("sleep 1") - watcher.add_child_handler(proc.pid, callback) - - Notes: - Implementations of this class must be thread-safe. - - Since child watcher objects may catch the SIGCHLD signal and call - waitpid(-1), there should be only one active object per process. - """ - - def __init_subclass__(cls) -> None: - if cls.__module__ != __name__: - warnings._deprecated("AbstractChildWatcher", - "{name!r} is deprecated as of Python 3.12 and will be " - "removed in Python {remove}.", - remove=(3, 14)) - - def add_child_handler(self, pid, callback, *args): - """Register a new child handler. - - Arrange for callback(pid, returncode, *args) to be called when - process 'pid' terminates. Specifying another callback for the same - process replaces the previous handler. - - Note: callback() must be thread-safe. - """ - raise NotImplementedError() - - def remove_child_handler(self, pid): - """Removes the handler for process 'pid'. - - The function returns True if the handler was successfully removed, - False if there was nothing to remove.""" - - raise NotImplementedError() - - def attach_loop(self, loop): - """Attach the watcher to an event loop. - - If the watcher was previously attached to an event loop, then it is - first detached before attaching to the new loop. - - Note: loop may be None. - """ - raise NotImplementedError() - - def close(self): - """Close the watcher. - - This must be called to make sure that any underlying resource is freed. - """ - raise NotImplementedError() - - def is_active(self): - """Return ``True`` if the watcher is active and is used by the event loop. - - Return True if the watcher is installed and ready to handle process exit - notifications. - - """ - raise NotImplementedError() - - def __enter__(self): - """Enter the watcher's context and allow starting new processes - - This function must return self""" - raise NotImplementedError() - - def __exit__(self, a, b, c): - """Exit the watcher's context""" - raise NotImplementedError() - - -class PidfdChildWatcher(AbstractChildWatcher): +class _PidfdChildWatcher: """Child watcher implementation using Linux's pid file descriptors. This child watcher polls process file descriptors (pidfds) to await child @@ -928,21 +867,6 @@ class PidfdChildWatcher(AbstractChildWatcher): recent (5.3+) kernels. """ - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, exc_traceback): - pass - - def is_active(self): - return True - - def close(self): - pass - - def attach_loop(self, loop): - pass - def add_child_handler(self, pid, callback, *args): loop = events.get_running_loop() pidfd = os.pidfd_open(pid) @@ -967,386 +891,7 @@ def _do_wait(self, pid, pidfd, callback, args): os.close(pidfd) callback(pid, returncode, *args) - def remove_child_handler(self, pid): - # asyncio never calls remove_child_handler() !!! - # The method is no-op but is implemented because - # abstract base classes require it. - return True - - -class BaseChildWatcher(AbstractChildWatcher): - - def __init__(self): - self._loop = None - self._callbacks = {} - - def close(self): - self.attach_loop(None) - - def is_active(self): - return self._loop is not None and self._loop.is_running() - - def _do_waitpid(self, expected_pid): - raise NotImplementedError() - - def _do_waitpid_all(self): - raise NotImplementedError() - - def attach_loop(self, loop): - assert loop is None or isinstance(loop, events.AbstractEventLoop) - - if self._loop is not None and loop is None and self._callbacks: - warnings.warn( - 'A loop is being detached ' - 'from a child watcher with pending handlers', - RuntimeWarning) - - if self._loop is not None: - self._loop.remove_signal_handler(signal.SIGCHLD) - - self._loop = loop - if loop is not None: - loop.add_signal_handler(signal.SIGCHLD, self._sig_chld) - - # Prevent a race condition in case a child terminated - # during the switch. - self._do_waitpid_all() - - def _sig_chld(self): - try: - self._do_waitpid_all() - except (SystemExit, KeyboardInterrupt): - raise - except BaseException as exc: - # self._loop should always be available here - # as '_sig_chld' is added as a signal handler - # in 'attach_loop' - self._loop.call_exception_handler({ - 'message': 'Unknown exception in SIGCHLD handler', - 'exception': exc, - }) - - -class SafeChildWatcher(BaseChildWatcher): - """'Safe' child watcher implementation. - - This implementation avoids disrupting other code spawning processes by - polling explicitly each process in the SIGCHLD handler instead of calling - os.waitpid(-1). - - This is a safe solution but it has a significant overhead when handling a - big number of children (O(n) each time SIGCHLD is raised) - """ - - def __init__(self): - super().__init__() - warnings._deprecated("SafeChildWatcher", - "{name!r} is deprecated as of Python 3.12 and will be " - "removed in Python {remove}.", - remove=(3, 14)) - - def close(self): - self._callbacks.clear() - super().close() - - def __enter__(self): - return self - - def __exit__(self, a, b, c): - pass - - def add_child_handler(self, pid, callback, *args): - self._callbacks[pid] = (callback, args) - - # Prevent a race condition in case the child is already terminated. - self._do_waitpid(pid) - - def remove_child_handler(self, pid): - try: - del self._callbacks[pid] - return True - except KeyError: - return False - - def _do_waitpid_all(self): - - for pid in list(self._callbacks): - self._do_waitpid(pid) - - def _do_waitpid(self, expected_pid): - assert expected_pid > 0 - - try: - pid, status = os.waitpid(expected_pid, os.WNOHANG) - except ChildProcessError: - # The child process is already reaped - # (may happen if waitpid() is called elsewhere). - pid = expected_pid - returncode = 255 - logger.warning( - "Unknown child process pid %d, will report returncode 255", - pid) - else: - if pid == 0: - # The child process is still alive. - return - - returncode = waitstatus_to_exitcode(status) - if self._loop.get_debug(): - logger.debug('process %s exited with returncode %s', - expected_pid, returncode) - - try: - callback, args = self._callbacks.pop(pid) - except KeyError: # pragma: no cover - # May happen if .remove_child_handler() is called - # after os.waitpid() returns. - if self._loop.get_debug(): - logger.warning("Child watcher got an unexpected pid: %r", - pid, exc_info=True) - else: - callback(pid, returncode, *args) - - -class FastChildWatcher(BaseChildWatcher): - """'Fast' child watcher implementation. - - This implementation reaps every terminated processes by calling - os.waitpid(-1) directly, possibly breaking other code spawning processes - and waiting for their termination. - - There is no noticeable overhead when handling a big number of children - (O(1) each time a child terminates). - """ - def __init__(self): - super().__init__() - self._lock = threading.Lock() - self._zombies = {} - self._forks = 0 - warnings._deprecated("FastChildWatcher", - "{name!r} is deprecated as of Python 3.12 and will be " - "removed in Python {remove}.", - remove=(3, 14)) - - def close(self): - self._callbacks.clear() - self._zombies.clear() - super().close() - - def __enter__(self): - with self._lock: - self._forks += 1 - - return self - - def __exit__(self, a, b, c): - with self._lock: - self._forks -= 1 - - if self._forks or not self._zombies: - return - - collateral_victims = str(self._zombies) - self._zombies.clear() - - logger.warning( - "Caught subprocesses termination from unknown pids: %s", - collateral_victims) - - def add_child_handler(self, pid, callback, *args): - assert self._forks, "Must use the context manager" - - with self._lock: - try: - returncode = self._zombies.pop(pid) - except KeyError: - # The child is running. - self._callbacks[pid] = callback, args - return - - # The child is dead already. We can fire the callback. - callback(pid, returncode, *args) - - def remove_child_handler(self, pid): - try: - del self._callbacks[pid] - return True - except KeyError: - return False - - def _do_waitpid_all(self): - # Because of signal coalescing, we must keep calling waitpid() as - # long as we're able to reap a child. - while True: - try: - pid, status = os.waitpid(-1, os.WNOHANG) - except ChildProcessError: - # No more child processes exist. - return - else: - if pid == 0: - # A child process is still alive. - return - - returncode = waitstatus_to_exitcode(status) - - with self._lock: - try: - callback, args = self._callbacks.pop(pid) - except KeyError: - # unknown child - if self._forks: - # It may not be registered yet. - self._zombies[pid] = returncode - if self._loop.get_debug(): - logger.debug('unknown process %s exited ' - 'with returncode %s', - pid, returncode) - continue - callback = None - else: - if self._loop.get_debug(): - logger.debug('process %s exited with returncode %s', - pid, returncode) - - if callback is None: - logger.warning( - "Caught subprocess termination from unknown pid: " - "%d -> %d", pid, returncode) - else: - callback(pid, returncode, *args) - - -class MultiLoopChildWatcher(AbstractChildWatcher): - """A watcher that doesn't require running loop in the main thread. - - This implementation registers a SIGCHLD signal handler on - instantiation (which may conflict with other code that - install own handler for this signal). - - The solution is safe but it has a significant overhead when - handling a big number of processes (*O(n)* each time a - SIGCHLD is received). - """ - - # Implementation note: - # The class keeps compatibility with AbstractChildWatcher ABC - # To achieve this it has empty attach_loop() method - # and doesn't accept explicit loop argument - # for add_child_handler()/remove_child_handler() - # but retrieves the current loop by get_running_loop() - - def __init__(self): - self._callbacks = {} - self._saved_sighandler = None - warnings._deprecated("MultiLoopChildWatcher", - "{name!r} is deprecated as of Python 3.12 and will be " - "removed in Python {remove}.", - remove=(3, 14)) - - def is_active(self): - return self._saved_sighandler is not None - - def close(self): - self._callbacks.clear() - if self._saved_sighandler is None: - return - - handler = signal.getsignal(signal.SIGCHLD) - if handler != self._sig_chld: - logger.warning("SIGCHLD handler was changed by outside code") - else: - signal.signal(signal.SIGCHLD, self._saved_sighandler) - self._saved_sighandler = None - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - pass - - def add_child_handler(self, pid, callback, *args): - loop = events.get_running_loop() - self._callbacks[pid] = (loop, callback, args) - - # Prevent a race condition in case the child is already terminated. - self._do_waitpid(pid) - - def remove_child_handler(self, pid): - try: - del self._callbacks[pid] - return True - except KeyError: - return False - - def attach_loop(self, loop): - # Don't save the loop but initialize itself if called first time - # The reason to do it here is that attach_loop() is called from - # unix policy only for the main thread. - # Main thread is required for subscription on SIGCHLD signal - if self._saved_sighandler is not None: - return - - self._saved_sighandler = signal.signal(signal.SIGCHLD, self._sig_chld) - if self._saved_sighandler is None: - logger.warning("Previous SIGCHLD handler was set by non-Python code, " - "restore to default handler on watcher close.") - self._saved_sighandler = signal.SIG_DFL - - # Set SA_RESTART to limit EINTR occurrences. - signal.siginterrupt(signal.SIGCHLD, False) - - def _do_waitpid_all(self): - for pid in list(self._callbacks): - self._do_waitpid(pid) - - def _do_waitpid(self, expected_pid): - assert expected_pid > 0 - - try: - pid, status = os.waitpid(expected_pid, os.WNOHANG) - except ChildProcessError: - # The child process is already reaped - # (may happen if waitpid() is called elsewhere). - pid = expected_pid - returncode = 255 - logger.warning( - "Unknown child process pid %d, will report returncode 255", - pid) - debug_log = False - else: - if pid == 0: - # The child process is still alive. - return - - returncode = waitstatus_to_exitcode(status) - debug_log = True - try: - loop, callback, args = self._callbacks.pop(pid) - except KeyError: # pragma: no cover - # May happen if .remove_child_handler() is called - # after os.waitpid() returns. - logger.warning("Child watcher got an unexpected pid: %r", - pid, exc_info=True) - else: - if loop.is_closed(): - logger.warning("Loop %r that handles pid %r is closed", loop, pid) - else: - if debug_log and loop.get_debug(): - logger.debug('process %s exited with returncode %s', - expected_pid, returncode) - loop.call_soon_threadsafe(callback, pid, returncode, *args) - - def _sig_chld(self, signum, frame): - try: - self._do_waitpid_all() - except (SystemExit, KeyboardInterrupt): - raise - except BaseException: - logger.warning('Unknown exception in SIGCHLD handler', exc_info=True) - - -class ThreadedChildWatcher(AbstractChildWatcher): +class _ThreadedChildWatcher: """Threaded child watcher implementation. The watcher uses a thread per process @@ -1363,18 +908,6 @@ def __init__(self): self._pid_counter = itertools.count(0) self._threads = {} - def is_active(self): - return True - - def close(self): - pass - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - pass - def __del__(self, _warn=warnings.warn): threads = [thread for thread in list(self._threads.values()) if thread.is_alive()] @@ -1392,15 +925,6 @@ def add_child_handler(self, pid, callback, *args): self._threads[pid] = thread thread.start() - def remove_child_handler(self, pid): - # asyncio never calls remove_child_handler() !!! - # The method is no-op but is implemented because - # abstract base classes require it. - return True - - def attach_loop(self, loop): - pass - def _do_waitpid(self, loop, expected_pid, callback, args): assert expected_pid > 0 @@ -1439,62 +963,11 @@ def can_use_pidfd(): return True -class _UnixDefaultEventLoopPolicy(events.BaseDefaultEventLoopPolicy): - """UNIX event loop policy with a watcher for child processes.""" +class _UnixDefaultEventLoopPolicy(events._BaseDefaultEventLoopPolicy): + """UNIX event loop policy""" _loop_factory = _UnixSelectorEventLoop - def __init__(self): - super().__init__() - self._watcher = None - - def _init_watcher(self): - with events._lock: - if self._watcher is None: # pragma: no branch - if can_use_pidfd(): - self._watcher = PidfdChildWatcher() - else: - self._watcher = ThreadedChildWatcher() - - def set_event_loop(self, loop): - """Set the event loop. - - As a side effect, if a child watcher was set before, then calling - .set_event_loop() from the main thread will call .attach_loop(loop) on - the child watcher. - """ - - super().set_event_loop(loop) - - if (self._watcher is not None and - threading.current_thread() is threading.main_thread()): - self._watcher.attach_loop(loop) - - def get_child_watcher(self): - """Get the watcher for child processes. - - If not yet set, a ThreadedChildWatcher object is automatically created. - """ - if self._watcher is None: - self._init_watcher() - - warnings._deprecated("get_child_watcher", - "{name!r} is deprecated as of Python 3.12 and will be " - "removed in Python {remove}.", remove=(3, 14)) - return self._watcher - - def set_child_watcher(self, watcher): - """Set the watcher for child processes.""" - - assert watcher is None or isinstance(watcher, AbstractChildWatcher) - - if self._watcher is not None: - self._watcher.close() - - self._watcher = watcher - warnings._deprecated("set_child_watcher", - "{name!r} is deprecated as of Python 3.12 and will be " - "removed in Python {remove}.", remove=(3, 14)) - SelectorEventLoop = _UnixSelectorEventLoop -DefaultEventLoopPolicy = _UnixDefaultEventLoopPolicy +_DefaultEventLoopPolicy = _UnixDefaultEventLoopPolicy +EventLoop = SelectorEventLoop diff --git a/Lib/asyncio/windows_events.py b/Lib/asyncio/windows_events.py index cb613451a58..0bf7732136f 100644 --- a/Lib/asyncio/windows_events.py +++ b/Lib/asyncio/windows_events.py @@ -29,8 +29,8 @@ __all__ = ( 'SelectorEventLoop', 'ProactorEventLoop', 'IocpProactor', - 'DefaultEventLoopPolicy', 'WindowsSelectorEventLoopPolicy', - 'WindowsProactorEventLoopPolicy', + '_DefaultEventLoopPolicy', '_WindowsSelectorEventLoopPolicy', + '_WindowsProactorEventLoopPolicy', 'EventLoop', ) @@ -315,24 +315,25 @@ def __init__(self, proactor=None): proactor = IocpProactor() super().__init__(proactor) - def run_forever(self): - try: - assert self._self_reading_future is None - self.call_soon(self._loop_self_reading) - super().run_forever() - finally: - if self._self_reading_future is not None: - ov = self._self_reading_future._ov - self._self_reading_future.cancel() - # self_reading_future always uses IOCP, so even though it's - # been cancelled, we need to make sure that the IOCP message - # is received so that the kernel is not holding on to the - # memory, possibly causing memory corruption later. Only - # unregister it if IO is complete in all respects. Otherwise - # we need another _poll() later to complete the IO. - if ov is not None and not ov.pending: - self._proactor._unregister(ov) - self._self_reading_future = None + def _run_forever_setup(self): + assert self._self_reading_future is None + self.call_soon(self._loop_self_reading) + super()._run_forever_setup() + + def _run_forever_cleanup(self): + super()._run_forever_cleanup() + if self._self_reading_future is not None: + ov = self._self_reading_future._ov + self._self_reading_future.cancel() + # self_reading_future always uses IOCP, so even though it's + # been cancelled, we need to make sure that the IOCP message + # is received so that the kernel is not holding on to the + # memory, possibly causing memory corruption later. Only + # unregister it if IO is complete in all respects. Otherwise + # we need another _poll() later to complete the IO. + if ov is not None and not ov.pending: + self._proactor._unregister(ov) + self._self_reading_future = None async def create_pipe_connection(self, protocol_factory, address): f = self._proactor.connect_pipe(address) @@ -609,6 +610,9 @@ def sendfile(self, sock, file, offset, count): ov = _overlapped.Overlapped(NULL) offset_low = offset & 0xffff_ffff offset_high = (offset >> 32) & 0xffff_ffff + # TransmitFile ignores OVERLAPPED.Offset for handles not opened with + # FILE_FLAG_OVERLAPPED, so seek the CRT file pointer to match. + file.seek(offset) ov.TransmitFile(sock.fileno(), msvcrt.get_osfhandle(file.fileno()), offset_low, offset_high, @@ -890,12 +894,13 @@ def callback(f): SelectorEventLoop = _WindowsSelectorEventLoop -class WindowsSelectorEventLoopPolicy(events.BaseDefaultEventLoopPolicy): +class _WindowsSelectorEventLoopPolicy(events._BaseDefaultEventLoopPolicy): _loop_factory = SelectorEventLoop -class WindowsProactorEventLoopPolicy(events.BaseDefaultEventLoopPolicy): +class _WindowsProactorEventLoopPolicy(events._BaseDefaultEventLoopPolicy): _loop_factory = ProactorEventLoop -DefaultEventLoopPolicy = WindowsProactorEventLoopPolicy +_DefaultEventLoopPolicy = _WindowsProactorEventLoopPolicy +EventLoop = ProactorEventLoop diff --git a/Lib/asyncio/windows_utils.py b/Lib/asyncio/windows_utils.py index ef277fac3e2..d6393f0b1ff 100644 --- a/Lib/asyncio/windows_utils.py +++ b/Lib/asyncio/windows_utils.py @@ -10,7 +10,6 @@ import msvcrt import os import subprocess -import tempfile import warnings @@ -24,6 +23,7 @@ PIPE = subprocess.PIPE STDOUT = subprocess.STDOUT _mmap_counter = itertools.count() +_MAX_PIPE_ATTEMPTS = 20 # Replacement for os.pipe() using handles instead of fds @@ -31,10 +31,6 @@ def pipe(*, duplex=False, overlapped=(True, True), bufsize=BUFSIZE): """Like os.pipe() but with overlapped support and using handles not fds.""" - address = tempfile.mktemp( - prefix=r'\\.\pipe\python-pipe-{:d}-{:d}-'.format( - os.getpid(), next(_mmap_counter))) - if duplex: openmode = _winapi.PIPE_ACCESS_DUPLEX access = _winapi.GENERIC_READ | _winapi.GENERIC_WRITE @@ -56,9 +52,20 @@ def pipe(*, duplex=False, overlapped=(True, True), bufsize=BUFSIZE): h1 = h2 = None try: - h1 = _winapi.CreateNamedPipe( - address, openmode, _winapi.PIPE_WAIT, - 1, obsize, ibsize, _winapi.NMPWAIT_WAIT_FOREVER, _winapi.NULL) + for attempts in itertools.count(): + address = r'\\.\pipe\python-pipe-{:d}-{:d}-{}'.format( + os.getpid(), next(_mmap_counter), os.urandom(8).hex()) + try: + h1 = _winapi.CreateNamedPipe( + address, openmode, _winapi.PIPE_WAIT, + 1, obsize, ibsize, _winapi.NMPWAIT_WAIT_FOREVER, _winapi.NULL) + break + except OSError as e: + if attempts >= _MAX_PIPE_ATTEMPTS: + raise + if e.winerror not in (_winapi.ERROR_PIPE_BUSY, + _winapi.ERROR_ACCESS_DENIED): + raise h2 = _winapi.CreateFile( address, access, 0, _winapi.NULL, _winapi.OPEN_EXISTING, @@ -104,8 +111,9 @@ def fileno(self): def close(self, *, CloseHandle=_winapi.CloseHandle): if self._handle is not None: - CloseHandle(self._handle) + handle = self._handle self._handle = None + CloseHandle(handle) def __del__(self, _warn=warnings.warn): if self._handle is not None: diff --git a/Lib/asyncore.py b/Lib/asyncore.py deleted file mode 100644 index 0e92be3ad19..00000000000 --- a/Lib/asyncore.py +++ /dev/null @@ -1,642 +0,0 @@ -# -*- Mode: Python -*- -# Id: asyncore.py,v 2.51 2000/09/07 22:29:26 rushing Exp -# Author: Sam Rushing - -# ====================================================================== -# Copyright 1996 by Sam Rushing -# -# All Rights Reserved -# -# Permission to use, copy, modify, and distribute this software and -# its documentation for any purpose and without fee is hereby -# granted, provided that the above copyright notice appear in all -# copies and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of Sam -# Rushing not be used in advertising or publicity pertaining to -# distribution of the software without specific, written prior -# permission. -# -# SAM RUSHING DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -# INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN -# NO EVENT SHALL SAM RUSHING BE LIABLE FOR ANY SPECIAL, INDIRECT OR -# CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, -# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN -# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -# ====================================================================== - -"""Basic infrastructure for asynchronous socket service clients and servers. - -There are only two ways to have a program on a single processor do "more -than one thing at a time". Multi-threaded programming is the simplest and -most popular way to do it, but there is another very different technique, -that lets you have nearly all the advantages of multi-threading, without -actually using multiple threads. it's really only practical if your program -is largely I/O bound. If your program is CPU bound, then pre-emptive -scheduled threads are probably what you really need. Network servers are -rarely CPU-bound, however. - -If your operating system supports the select() system call in its I/O -library (and nearly all do), then you can use it to juggle multiple -communication channels at once; doing other work while your I/O is taking -place in the "background." Although this strategy can seem strange and -complex, especially at first, it is in many ways easier to understand and -control than multi-threaded programming. The module documented here solves -many of the difficult problems for you, making the task of building -sophisticated high-performance network servers and clients a snap. -""" - -import select -import socket -import sys -import time -import warnings - -import os -from errno import EALREADY, EINPROGRESS, EWOULDBLOCK, ECONNRESET, EINVAL, \ - ENOTCONN, ESHUTDOWN, EISCONN, EBADF, ECONNABORTED, EPIPE, EAGAIN, \ - errorcode - -_DISCONNECTED = frozenset({ECONNRESET, ENOTCONN, ESHUTDOWN, ECONNABORTED, EPIPE, - EBADF}) - -try: - socket_map -except NameError: - socket_map = {} - -def _strerror(err): - try: - return os.strerror(err) - except (ValueError, OverflowError, NameError): - if err in errorcode: - return errorcode[err] - return "Unknown error %s" %err - -class ExitNow(Exception): - pass - -_reraised_exceptions = (ExitNow, KeyboardInterrupt, SystemExit) - -def read(obj): - try: - obj.handle_read_event() - except _reraised_exceptions: - raise - except: - obj.handle_error() - -def write(obj): - try: - obj.handle_write_event() - except _reraised_exceptions: - raise - except: - obj.handle_error() - -def _exception(obj): - try: - obj.handle_expt_event() - except _reraised_exceptions: - raise - except: - obj.handle_error() - -def readwrite(obj, flags): - try: - if flags & select.POLLIN: - obj.handle_read_event() - if flags & select.POLLOUT: - obj.handle_write_event() - if flags & select.POLLPRI: - obj.handle_expt_event() - if flags & (select.POLLHUP | select.POLLERR | select.POLLNVAL): - obj.handle_close() - except OSError as e: - if e.args[0] not in _DISCONNECTED: - obj.handle_error() - else: - obj.handle_close() - except _reraised_exceptions: - raise - except: - obj.handle_error() - -def poll(timeout=0.0, map=None): - if map is None: - map = socket_map - if map: - r = []; w = []; e = [] - for fd, obj in list(map.items()): - is_r = obj.readable() - is_w = obj.writable() - if is_r: - r.append(fd) - # accepting sockets should not be writable - if is_w and not obj.accepting: - w.append(fd) - if is_r or is_w: - e.append(fd) - if [] == r == w == e: - time.sleep(timeout) - return - - r, w, e = select.select(r, w, e, timeout) - - for fd in r: - obj = map.get(fd) - if obj is None: - continue - read(obj) - - for fd in w: - obj = map.get(fd) - if obj is None: - continue - write(obj) - - for fd in e: - obj = map.get(fd) - if obj is None: - continue - _exception(obj) - -def poll2(timeout=0.0, map=None): - # Use the poll() support added to the select module in Python 2.0 - if map is None: - map = socket_map - if timeout is not None: - # timeout is in milliseconds - timeout = int(timeout*1000) - pollster = select.poll() - if map: - for fd, obj in list(map.items()): - flags = 0 - if obj.readable(): - flags |= select.POLLIN | select.POLLPRI - # accepting sockets should not be writable - if obj.writable() and not obj.accepting: - flags |= select.POLLOUT - if flags: - pollster.register(fd, flags) - - r = pollster.poll(timeout) - for fd, flags in r: - obj = map.get(fd) - if obj is None: - continue - readwrite(obj, flags) - -poll3 = poll2 # Alias for backward compatibility - -def loop(timeout=30.0, use_poll=False, map=None, count=None): - if map is None: - map = socket_map - - if use_poll and hasattr(select, 'poll'): - poll_fun = poll2 - else: - poll_fun = poll - - if count is None: - while map: - poll_fun(timeout, map) - - else: - while map and count > 0: - poll_fun(timeout, map) - count = count - 1 - -class dispatcher: - - debug = False - connected = False - accepting = False - connecting = False - closing = False - addr = None - ignore_log_types = frozenset({'warning'}) - - def __init__(self, sock=None, map=None): - if map is None: - self._map = socket_map - else: - self._map = map - - self._fileno = None - - if sock: - # Set to nonblocking just to make sure for cases where we - # get a socket from a blocking source. - sock.setblocking(0) - self.set_socket(sock, map) - self.connected = True - # The constructor no longer requires that the socket - # passed be connected. - try: - self.addr = sock.getpeername() - except OSError as err: - if err.args[0] in (ENOTCONN, EINVAL): - # To handle the case where we got an unconnected - # socket. - self.connected = False - else: - # The socket is broken in some unknown way, alert - # the user and remove it from the map (to prevent - # polling of broken sockets). - self.del_channel(map) - raise - else: - self.socket = None - - def __repr__(self): - status = [self.__class__.__module__+"."+self.__class__.__qualname__] - if self.accepting and self.addr: - status.append('listening') - elif self.connected: - status.append('connected') - if self.addr is not None: - try: - status.append('%s:%d' % self.addr) - except TypeError: - status.append(repr(self.addr)) - return '<%s at %#x>' % (' '.join(status), id(self)) - - def add_channel(self, map=None): - #self.log_info('adding channel %s' % self) - if map is None: - map = self._map - map[self._fileno] = self - - def del_channel(self, map=None): - fd = self._fileno - if map is None: - map = self._map - if fd in map: - #self.log_info('closing channel %d:%s' % (fd, self)) - del map[fd] - self._fileno = None - - def create_socket(self, family=socket.AF_INET, type=socket.SOCK_STREAM): - self.family_and_type = family, type - sock = socket.socket(family, type) - sock.setblocking(0) - self.set_socket(sock) - - def set_socket(self, sock, map=None): - self.socket = sock - self._fileno = sock.fileno() - self.add_channel(map) - - def set_reuse_addr(self): - # try to re-use a server port if possible - try: - self.socket.setsockopt( - socket.SOL_SOCKET, socket.SO_REUSEADDR, - self.socket.getsockopt(socket.SOL_SOCKET, - socket.SO_REUSEADDR) | 1 - ) - except OSError: - pass - - # ================================================== - # predicates for select() - # these are used as filters for the lists of sockets - # to pass to select(). - # ================================================== - - def readable(self): - return True - - def writable(self): - return True - - # ================================================== - # socket object methods. - # ================================================== - - def listen(self, num): - self.accepting = True - if os.name == 'nt' and num > 5: - num = 5 - return self.socket.listen(num) - - def bind(self, addr): - self.addr = addr - return self.socket.bind(addr) - - def connect(self, address): - self.connected = False - self.connecting = True - err = self.socket.connect_ex(address) - if err in (EINPROGRESS, EALREADY, EWOULDBLOCK) \ - or err == EINVAL and os.name == 'nt': - self.addr = address - return - if err in (0, EISCONN): - self.addr = address - self.handle_connect_event() - else: - raise OSError(err, errorcode[err]) - - def accept(self): - # XXX can return either an address pair or None - try: - conn, addr = self.socket.accept() - except TypeError: - return None - except OSError as why: - if why.args[0] in (EWOULDBLOCK, ECONNABORTED, EAGAIN): - return None - else: - raise - else: - return conn, addr - - def send(self, data): - try: - result = self.socket.send(data) - return result - except OSError as why: - if why.args[0] == EWOULDBLOCK: - return 0 - elif why.args[0] in _DISCONNECTED: - self.handle_close() - return 0 - else: - raise - - def recv(self, buffer_size): - try: - data = self.socket.recv(buffer_size) - if not data: - # a closed connection is indicated by signaling - # a read condition, and having recv() return 0. - self.handle_close() - return b'' - else: - return data - except OSError as why: - # winsock sometimes raises ENOTCONN - if why.args[0] in _DISCONNECTED: - self.handle_close() - return b'' - else: - raise - - def close(self): - self.connected = False - self.accepting = False - self.connecting = False - self.del_channel() - if self.socket is not None: - try: - self.socket.close() - except OSError as why: - if why.args[0] not in (ENOTCONN, EBADF): - raise - - # log and log_info may be overridden to provide more sophisticated - # logging and warning methods. In general, log is for 'hit' logging - # and 'log_info' is for informational, warning and error logging. - - def log(self, message): - sys.stderr.write('log: %s\n' % str(message)) - - def log_info(self, message, type='info'): - if type not in self.ignore_log_types: - print('%s: %s' % (type, message)) - - def handle_read_event(self): - if self.accepting: - # accepting sockets are never connected, they "spawn" new - # sockets that are connected - self.handle_accept() - elif not self.connected: - if self.connecting: - self.handle_connect_event() - self.handle_read() - else: - self.handle_read() - - def handle_connect_event(self): - err = self.socket.getsockopt(socket.SOL_SOCKET, socket.SO_ERROR) - if err != 0: - raise OSError(err, _strerror(err)) - self.handle_connect() - self.connected = True - self.connecting = False - - def handle_write_event(self): - if self.accepting: - # Accepting sockets shouldn't get a write event. - # We will pretend it didn't happen. - return - - if not self.connected: - if self.connecting: - self.handle_connect_event() - self.handle_write() - - def handle_expt_event(self): - # handle_expt_event() is called if there might be an error on the - # socket, or if there is OOB data - # check for the error condition first - err = self.socket.getsockopt(socket.SOL_SOCKET, socket.SO_ERROR) - if err != 0: - # we can get here when select.select() says that there is an - # exceptional condition on the socket - # since there is an error, we'll go ahead and close the socket - # like we would in a subclassed handle_read() that received no - # data - self.handle_close() - else: - self.handle_expt() - - def handle_error(self): - nil, t, v, tbinfo = compact_traceback() - - # sometimes a user repr method will crash. - try: - self_repr = repr(self) - except: - self_repr = '<__repr__(self) failed for object at %0x>' % id(self) - - self.log_info( - 'uncaptured python exception, closing channel %s (%s:%s %s)' % ( - self_repr, - t, - v, - tbinfo - ), - 'error' - ) - self.handle_close() - - def handle_expt(self): - self.log_info('unhandled incoming priority event', 'warning') - - def handle_read(self): - self.log_info('unhandled read event', 'warning') - - def handle_write(self): - self.log_info('unhandled write event', 'warning') - - def handle_connect(self): - self.log_info('unhandled connect event', 'warning') - - def handle_accept(self): - pair = self.accept() - if pair is not None: - self.handle_accepted(*pair) - - def handle_accepted(self, sock, addr): - sock.close() - self.log_info('unhandled accepted event', 'warning') - - def handle_close(self): - self.log_info('unhandled close event', 'warning') - self.close() - -# --------------------------------------------------------------------------- -# adds simple buffered output capability, useful for simple clients. -# [for more sophisticated usage use asynchat.async_chat] -# --------------------------------------------------------------------------- - -class dispatcher_with_send(dispatcher): - - def __init__(self, sock=None, map=None): - dispatcher.__init__(self, sock, map) - self.out_buffer = b'' - - def initiate_send(self): - num_sent = 0 - num_sent = dispatcher.send(self, self.out_buffer[:65536]) - self.out_buffer = self.out_buffer[num_sent:] - - def handle_write(self): - self.initiate_send() - - def writable(self): - return (not self.connected) or len(self.out_buffer) - - def send(self, data): - if self.debug: - self.log_info('sending %s' % repr(data)) - self.out_buffer = self.out_buffer + data - self.initiate_send() - -# --------------------------------------------------------------------------- -# used for debugging. -# --------------------------------------------------------------------------- - -def compact_traceback(): - t, v, tb = sys.exc_info() - tbinfo = [] - if not tb: # Must have a traceback - raise AssertionError("traceback does not exist") - while tb: - tbinfo.append(( - tb.tb_frame.f_code.co_filename, - tb.tb_frame.f_code.co_name, - str(tb.tb_lineno) - )) - tb = tb.tb_next - - # just to be safe - del tb - - file, function, line = tbinfo[-1] - info = ' '.join(['[%s|%s|%s]' % x for x in tbinfo]) - return (file, function, line), t, v, info - -def close_all(map=None, ignore_all=False): - if map is None: - map = socket_map - for x in list(map.values()): - try: - x.close() - except OSError as x: - if x.args[0] == EBADF: - pass - elif not ignore_all: - raise - except _reraised_exceptions: - raise - except: - if not ignore_all: - raise - map.clear() - -# Asynchronous File I/O: -# -# After a little research (reading man pages on various unixen, and -# digging through the linux kernel), I've determined that select() -# isn't meant for doing asynchronous file i/o. -# Heartening, though - reading linux/mm/filemap.c shows that linux -# supports asynchronous read-ahead. So _MOST_ of the time, the data -# will be sitting in memory for us already when we go to read it. -# -# What other OS's (besides NT) support async file i/o? [VMS?] -# -# Regardless, this is useful for pipes, and stdin/stdout... - -if os.name == 'posix': - class file_wrapper: - # Here we override just enough to make a file - # look like a socket for the purposes of asyncore. - # The passed fd is automatically os.dup()'d - - def __init__(self, fd): - self.fd = os.dup(fd) - - def __del__(self): - if self.fd >= 0: - warnings.warn("unclosed file %r" % self, ResourceWarning, - source=self) - self.close() - - def recv(self, *args): - return os.read(self.fd, *args) - - def send(self, *args): - return os.write(self.fd, *args) - - def getsockopt(self, level, optname, buflen=None): - if (level == socket.SOL_SOCKET and - optname == socket.SO_ERROR and - not buflen): - return 0 - raise NotImplementedError("Only asyncore specific behaviour " - "implemented.") - - read = recv - write = send - - def close(self): - if self.fd < 0: - return - fd = self.fd - self.fd = -1 - os.close(fd) - - def fileno(self): - return self.fd - - class file_dispatcher(dispatcher): - - def __init__(self, fd, map=None): - dispatcher.__init__(self, None, map) - self.connected = True - try: - fd = fd.fileno() - except AttributeError: - pass - self.set_file(fd) - # set it to non-blocking mode - os.set_blocking(fd, False) - - def set_file(self, fd): - self.socket = file_wrapper(fd) - self._fileno = self.socket.fileno() - self.add_channel() diff --git a/Lib/base64.py b/Lib/base64.py old mode 100755 new mode 100644 index e233647ee76..775ceab0ec0 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -1,12 +1,9 @@ -#! /usr/bin/env python3 - """Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings""" # Modified 04-Oct-1995 by Jack Jansen to use binascii module # Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support # Modified 22-May-2007 by Guido van Rossum to use bytes everywhere -import re import struct import binascii @@ -18,7 +15,7 @@ 'b64encode', 'b64decode', 'b32encode', 'b32decode', 'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode', # Base85 and Ascii85 encodings - 'b85encode', 'b85decode', 'a85encode', 'a85decode', + 'b85encode', 'b85decode', 'a85encode', 'a85decode', 'z85encode', 'z85decode', # Standard Base64 encoding 'standard_b64encode', 'standard_b64decode', # Some common Base64 alternatives. As referenced by RFC 3458, see thread @@ -164,7 +161,6 @@ def urlsafe_b64decode(s): _b32rev = {} def _b32encode(alphabet, s): - global _b32tab2 # Delay the initialization of the table to not waste memory # if the function is never called if alphabet not in _b32tab2: @@ -200,7 +196,6 @@ def _b32encode(alphabet, s): return bytes(encoded) def _b32decode(alphabet, s, casefold=False, map01=None): - global _b32rev # Delay the initialization of the table to not waste memory # if the function is never called if alphabet not in _b32rev: @@ -288,7 +283,7 @@ def b16decode(s, casefold=False): s = _bytes_from_decode_data(s) if casefold: s = s.upper() - if re.search(b'[^0-9A-F]', s): + if s.translate(None, delete=b'0123456789ABCDEF'): raise binascii.Error('Non-base16 digit found') return binascii.unhexlify(s) @@ -330,17 +325,20 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): foldspaces is an optional flag that uses the special short sequence 'y' instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This - feature is not supported by the "standard" Adobe encoding. + feature is not supported by the standard encoding used in PDF. + + If wrapcol is non-zero, insert a newline (b'\\n') character after at most + every wrapcol characters. - wrapcol controls whether the output should have newline (b'\\n') characters - added to it. If this is non-zero, each output line will be at most this - many characters long. + pad controls whether zero-padding applied to the end of the input + is fully retained in the output encoding, as done by btoa, + producing an exact multiple of 5 bytes of output. - pad controls whether the input is padded to a multiple of 4 before - encoding. Note that the btoa implementation always pads. + adobe controls whether the encoded byte sequence is framed with <~ + and ~>, as in a PostScript base-85 string literal. Note that + while ASCII85Decode streams in PDF documents must be terminated + with ~>, they must not use a leading <~. - adobe controls whether the encoded byte sequence is framed with <~ and ~>, - which is used by the Adobe implementation. """ global _a85chars, _a85chars2 # Delay the initialization of tables to not waste memory @@ -369,12 +367,14 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): """Decode the Ascii85 encoded bytes-like object or ASCII string b. - foldspaces is a flag that specifies whether the 'y' short sequence should be - accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is - not supported by the "standard" Adobe encoding. + foldspaces is a flag that specifies whether the 'y' short sequence + should be accepted as shorthand for 4 consecutive spaces (ASCII + 0x20). This feature is not supported by the standard Ascii85 + encoding used in PDF and PostScript. - adobe controls whether the input sequence is in Adobe Ascii85 format (i.e. - is framed with <~ and ~>). + adobe controls whether the <~ and ~> markers are present. While + the leading <~ is not required, the input must end with ~>, or a + ValueError is raised. ignorechars should be a byte string containing characters to ignore from the input. This should only contain whitespace characters, and by default @@ -447,8 +447,10 @@ def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): def b85encode(b, pad=False): """Encode bytes-like object b in base85 format and return a bytes object. - If pad is true, the input is padded with b'\\0' so its length is a multiple of - 4 bytes before encoding. + The input is padded with b'\0' so its length is a multiple of 4 + bytes before encoding. If pad is true, all the resulting + characters are retained in the output, which will always be a + multiple of 5 bytes. """ global _b85chars, _b85chars2 # Delay the initialization of tables to not waste memory @@ -467,9 +469,12 @@ def b85decode(b): # Delay the initialization of tables to not waste memory # if the function is never called if _b85dec is None: - _b85dec = [None] * 256 + # we don't assign to _b85dec directly to avoid issues when + # multiple threads call this function simultaneously + b85dec_tmp = [None] * 256 for i, c in enumerate(_b85alphabet): - _b85dec[c] = i + b85dec_tmp[c] = i + _b85dec = b85dec_tmp b = _bytes_from_decode_data(b) padding = (-len(b)) % 5 @@ -499,6 +504,33 @@ def b85decode(b): result = result[:-padding] return result +_z85alphabet = (b'0123456789abcdefghijklmnopqrstuvwxyz' + b'ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#') +# Translating b85 valid but z85 invalid chars to b'\x00' is required +# to prevent them from being decoded as b85 valid chars. +_z85_b85_decode_diff = b';_`|~' +_z85_decode_translation = bytes.maketrans( + _z85alphabet + _z85_b85_decode_diff, + _b85alphabet + b'\x00' * len(_z85_b85_decode_diff) +) +_z85_encode_translation = bytes.maketrans(_b85alphabet, _z85alphabet) + +def z85encode(s): + """Encode bytes-like object b in z85 format and return a bytes object.""" + return b85encode(s).translate(_z85_encode_translation) + +def z85decode(s): + """Decode the z85-encoded bytes-like object or ASCII string b + + The result is returned as a bytes object. + """ + s = _bytes_from_decode_data(s) + s = s.translate(_z85_decode_translation) + try: + return b85decode(s) + except ValueError as e: + raise ValueError(e.args[0].replace('base85', 'z85')) from None + # Legacy interface. This code could be cleaned up since I don't believe # binascii has any line length limitations. It just doesn't seem worth it # though. The files should be opened in binary mode. @@ -579,7 +611,14 @@ def main(): with open(args[0], 'rb') as f: func(f, sys.stdout.buffer) else: - func(sys.stdin.buffer, sys.stdout.buffer) + if sys.stdin.isatty(): + # gh-138775: read terminal input data all at once to detect EOF + import io + data = sys.stdin.buffer.read() + buffer = io.BytesIO(data) + else: + buffer = sys.stdin.buffer + func(buffer, sys.stdout.buffer) if __name__ == '__main__': diff --git a/Lib/bdb.py b/Lib/bdb.py index 0f3eec653ba..79da4bab9c9 100644 --- a/Lib/bdb.py +++ b/Lib/bdb.py @@ -2,7 +2,10 @@ import fnmatch import sys +import threading import os +import weakref +from contextlib import contextmanager from inspect import CO_GENERATOR, CO_COROUTINE, CO_ASYNC_GENERATOR __all__ = ["BdbQuit", "Bdb", "Breakpoint"] @@ -14,6 +17,166 @@ class BdbQuit(Exception): """Exception to give up completely.""" +E = sys.monitoring.events + +class _MonitoringTracer: + EVENT_CALLBACK_MAP = { + E.PY_START: 'call', + E.PY_RESUME: 'call', + E.PY_THROW: 'call', + E.LINE: 'line', + E.JUMP: 'jump', + E.PY_RETURN: 'return', + E.PY_YIELD: 'return', + E.PY_UNWIND: 'unwind', + E.RAISE: 'exception', + E.STOP_ITERATION: 'exception', + E.INSTRUCTION: 'opcode', + } + + GLOBAL_EVENTS = E.PY_START | E.PY_RESUME | E.PY_THROW | E.PY_UNWIND | E.RAISE + LOCAL_EVENTS = E.LINE | E.JUMP | E.PY_RETURN | E.PY_YIELD | E.STOP_ITERATION + + def __init__(self): + self._tool_id = sys.monitoring.DEBUGGER_ID + self._name = 'bdbtracer' + self._tracefunc = None + self._disable_current_event = False + self._tracing_thread = None + self._enabled = False + + def start_trace(self, tracefunc): + self._tracefunc = tracefunc + self._tracing_thread = threading.current_thread() + curr_tool = sys.monitoring.get_tool(self._tool_id) + if curr_tool is None: + sys.monitoring.use_tool_id(self._tool_id, self._name) + elif curr_tool == self._name: + sys.monitoring.clear_tool_id(self._tool_id) + else: + raise ValueError('Another debugger is using the monitoring tool') + E = sys.monitoring.events + all_events = 0 + for event, cb_name in self.EVENT_CALLBACK_MAP.items(): + callback = self.callback_wrapper(getattr(self, f'{cb_name}_callback'), event) + sys.monitoring.register_callback(self._tool_id, event, callback) + if event != E.INSTRUCTION: + all_events |= event + self.update_local_events() + sys.monitoring.set_events(self._tool_id, self.GLOBAL_EVENTS) + self._enabled = True + + def stop_trace(self): + self._enabled = False + self._tracing_thread = None + curr_tool = sys.monitoring.get_tool(self._tool_id) + if curr_tool != self._name: + return + sys.monitoring.clear_tool_id(self._tool_id) + sys.monitoring.free_tool_id(self._tool_id) + + def disable_current_event(self): + self._disable_current_event = True + + def restart_events(self): + if sys.monitoring.get_tool(self._tool_id) == self._name: + sys.monitoring.restart_events() + + def callback_wrapper(self, func, event): + import functools + + @functools.wraps(func) + def wrapper(*args): + if self._tracing_thread != threading.current_thread(): + return + try: + frame = sys._getframe().f_back + ret = func(frame, *args) + if self._enabled and frame.f_trace: + self.update_local_events() + if ( + self._disable_current_event + and event not in (E.PY_THROW, E.PY_UNWIND, E.RAISE) + ): + return sys.monitoring.DISABLE + else: + return ret + except BaseException: + self.stop_trace() + sys._getframe().f_back.f_trace = None + raise + finally: + self._disable_current_event = False + + return wrapper + + def call_callback(self, frame, code, *args): + local_tracefunc = self._tracefunc(frame, 'call', None) + if local_tracefunc is not None: + frame.f_trace = local_tracefunc + if self._enabled: + sys.monitoring.set_local_events(self._tool_id, code, self.LOCAL_EVENTS) + + def return_callback(self, frame, code, offset, retval): + if frame.f_trace: + frame.f_trace(frame, 'return', retval) + + def unwind_callback(self, frame, code, *args): + if frame.f_trace: + frame.f_trace(frame, 'return', None) + + def line_callback(self, frame, code, *args): + if frame.f_trace and frame.f_trace_lines: + frame.f_trace(frame, 'line', None) + + def jump_callback(self, frame, code, inst_offset, dest_offset): + if dest_offset > inst_offset: + return sys.monitoring.DISABLE + inst_lineno = self._get_lineno(code, inst_offset) + dest_lineno = self._get_lineno(code, dest_offset) + if inst_lineno != dest_lineno: + return sys.monitoring.DISABLE + if frame.f_trace and frame.f_trace_lines: + frame.f_trace(frame, 'line', None) + + def exception_callback(self, frame, code, offset, exc): + if frame.f_trace: + if exc.__traceback__ and hasattr(exc.__traceback__, 'tb_frame'): + tb = exc.__traceback__ + while tb: + if tb.tb_frame.f_locals.get('self') is self: + return + tb = tb.tb_next + frame.f_trace(frame, 'exception', (type(exc), exc, exc.__traceback__)) + + def opcode_callback(self, frame, code, offset): + if frame.f_trace and frame.f_trace_opcodes: + frame.f_trace(frame, 'opcode', None) + + def update_local_events(self, frame=None): + if sys.monitoring.get_tool(self._tool_id) != self._name: + return + if frame is None: + frame = sys._getframe().f_back + while frame is not None: + if frame.f_trace is not None: + if frame.f_trace_opcodes: + events = self.LOCAL_EVENTS | E.INSTRUCTION + else: + events = self.LOCAL_EVENTS + sys.monitoring.set_local_events(self._tool_id, frame.f_code, events) + frame = frame.f_back + + def _get_lineno(self, code, offset): + import dis + last_lineno = None + for start, lineno in dis.findlinestarts(code): + if offset < start: + return last_lineno + last_lineno = lineno + return last_lineno + + class Bdb: """Generic Python debugger base class. @@ -28,11 +191,24 @@ class Bdb: is determined by the __name__ in the frame globals. """ - def __init__(self, skip=None): + def __init__(self, skip=None, backend='settrace'): self.skip = set(skip) if skip else None self.breaks = {} self.fncache = {} + self.frame_trace_lines_opcodes = {} self.frame_returning = None + self.trace_opcodes = False + self.enterframe = None + self.cmdframe = None + self.cmdlineno = None + self.code_linenos = weakref.WeakKeyDictionary() + self.backend = backend + if backend == 'monitoring': + self.monitoring_tracer = _MonitoringTracer() + elif backend == 'settrace': + self.monitoring_tracer = None + else: + raise ValueError(f"Invalid backend '{backend}'") self._load_breaks() @@ -53,6 +229,18 @@ def canonic(self, filename): self.fncache[filename] = canonic return canonic + def start_trace(self): + if self.monitoring_tracer: + self.monitoring_tracer.start_trace(self.trace_dispatch) + else: + sys.settrace(self.trace_dispatch) + + def stop_trace(self): + if self.monitoring_tracer: + self.monitoring_tracer.stop_trace() + else: + sys.settrace(None) + def reset(self): """Set values of attributes as ready to start debugging.""" import linecache @@ -60,6 +248,12 @@ def reset(self): self.botframe = None self._set_stopinfo(None, None) + @contextmanager + def set_enterframe(self, frame): + self.enterframe = frame + yield + self.enterframe = None + def trace_dispatch(self, frame, event, arg): """Dispatch a trace function for debugged frames based on the event. @@ -84,24 +278,28 @@ def trace_dispatch(self, frame, event, arg): The arg parameter depends on the previous event. """ - if self.quitting: - return # None - if event == 'line': - return self.dispatch_line(frame) - if event == 'call': - return self.dispatch_call(frame, arg) - if event == 'return': - return self.dispatch_return(frame, arg) - if event == 'exception': - return self.dispatch_exception(frame, arg) - if event == 'c_call': - return self.trace_dispatch - if event == 'c_exception': - return self.trace_dispatch - if event == 'c_return': + + with self.set_enterframe(frame): + if self.quitting: + return # None + if event == 'line': + return self.dispatch_line(frame) + if event == 'call': + return self.dispatch_call(frame, arg) + if event == 'return': + return self.dispatch_return(frame, arg) + if event == 'exception': + return self.dispatch_exception(frame, arg) + if event == 'c_call': + return self.trace_dispatch + if event == 'c_exception': + return self.trace_dispatch + if event == 'c_return': + return self.trace_dispatch + if event == 'opcode': + return self.dispatch_opcode(frame, arg) + print('bdb.Bdb.dispatch: unknown debugging event:', repr(event)) return self.trace_dispatch - print('bdb.Bdb.dispatch: unknown debugging event:', repr(event)) - return self.trace_dispatch def dispatch_line(self, frame): """Invoke user function and return trace function for line event. @@ -110,9 +308,17 @@ def dispatch_line(self, frame): self.user_line(). Raise BdbQuit if self.quitting is set. Return self.trace_dispatch to continue tracing in this scope. """ - if self.stop_here(frame) or self.break_here(frame): + # GH-136057 + # For line events, we don't want to stop at the same line where + # the latest next/step command was issued. + if (self.stop_here(frame) or self.break_here(frame)) and not ( + self.cmdframe == frame and self.cmdlineno == frame.f_lineno + ): self.user_line(frame) + self.restart_events() if self.quitting: raise BdbQuit + elif not self.get_break(frame.f_code.co_filename, frame.f_lineno): + self.disable_current_event() return self.trace_dispatch def dispatch_call(self, frame, arg): @@ -128,12 +334,18 @@ def dispatch_call(self, frame, arg): self.botframe = frame.f_back # (CT) Note that this may also be None! return self.trace_dispatch if not (self.stop_here(frame) or self.break_anywhere(frame)): - # No need to trace this function + # We already know there's no breakpoint in this function + # If it's a next/until/return command, we don't need any CALL event + # and we don't need to set the f_trace on any new frame. + # If it's a step command, it must either hit stop_here, or skip the + # whole module. Either way, we don't need the CALL event here. + self.disable_current_event() return # None # Ignore call events in generator except when stepping. if self.stopframe and frame.f_code.co_flags & GENERATOR_AND_COROUTINE_FLAGS: return self.trace_dispatch self.user_call(frame, arg) + self.restart_events() if self.quitting: raise BdbQuit return self.trace_dispatch @@ -147,16 +359,25 @@ def dispatch_return(self, frame, arg): if self.stop_here(frame) or frame == self.returnframe: # Ignore return events in generator except when stepping. if self.stopframe and frame.f_code.co_flags & GENERATOR_AND_COROUTINE_FLAGS: + # It's possible to trigger a StopIteration exception in + # the caller so we must set the trace function in the caller + self._set_caller_tracefunc(frame) return self.trace_dispatch try: self.frame_returning = frame self.user_return(frame, arg) + self.restart_events() finally: self.frame_returning = None if self.quitting: raise BdbQuit # The user issued a 'next' or 'until' command. if self.stopframe is frame and self.stoplineno != -1: self._set_stopinfo(None, None) + # The previous frame might not have f_trace set, unless we are + # issuing a command that does not expect to stop, we should set + # f_trace + if self.stoplineno != -1: + self._set_caller_tracefunc(frame) return self.trace_dispatch def dispatch_exception(self, frame, arg): @@ -173,6 +394,7 @@ def dispatch_exception(self, frame, arg): if not (frame.f_code.co_flags & GENERATOR_AND_COROUTINE_FLAGS and arg[0] is StopIteration and arg[2] is None): self.user_exception(frame, arg) + self.restart_events() if self.quitting: raise BdbQuit # Stop at the StopIteration or GeneratorExit exception when the user # has set stopframe in a generator by issuing a return command, or a @@ -182,10 +404,26 @@ def dispatch_exception(self, frame, arg): and self.stopframe.f_code.co_flags & GENERATOR_AND_COROUTINE_FLAGS and arg[0] in (StopIteration, GeneratorExit)): self.user_exception(frame, arg) + self.restart_events() if self.quitting: raise BdbQuit return self.trace_dispatch + def dispatch_opcode(self, frame, arg): + """Invoke user function and return trace function for opcode event. + If the debugger stops on the current opcode, invoke + self.user_opcode(). Raise BdbQuit if self.quitting is set. + Return self.trace_dispatch to continue tracing in this scope. + + Opcode event will always trigger the user callback. For now the only + opcode event is from an inline set_trace() and we want to stop there + unconditionally. + """ + self.user_opcode(frame) + self.restart_events() + if self.quitting: raise BdbQuit + return self.trace_dispatch + # Normally derived classes don't override the following # methods, but they may if they want to redefine the # definition of stopping and breakpoints. @@ -249,9 +487,25 @@ def do_clear(self, arg): raise NotImplementedError("subclass of bdb must implement do_clear()") def break_anywhere(self, frame): - """Return True if there is any breakpoint for frame's filename. + """Return True if there is any breakpoint in that frame + """ + filename = self.canonic(frame.f_code.co_filename) + if filename not in self.breaks: + return False + for lineno in self.breaks[filename]: + if self._lineno_in_frame(lineno, frame): + return True + return False + + def _lineno_in_frame(self, lineno, frame): + """Return True if the line number is in the frame's code object. """ - return self.canonic(frame.f_code.co_filename) in self.breaks + code = frame.f_code + if lineno < code.co_firstlineno: + return False + if code not in self.code_linenos: + self.code_linenos[code] = set(lineno for _, _, lineno in code.co_lines()) + return lineno in self.code_linenos[code] # Derived classes should override the user_* methods # to gain control. @@ -272,7 +526,24 @@ def user_exception(self, frame, exc_info): """Called when we stop on an exception.""" pass - def _set_stopinfo(self, stopframe, returnframe, stoplineno=0): + def user_opcode(self, frame): + """Called when we are about to execute an opcode.""" + pass + + def _set_trace_opcodes(self, trace_opcodes): + if trace_opcodes != self.trace_opcodes: + self.trace_opcodes = trace_opcodes + frame = self.enterframe + while frame is not None: + frame.f_trace_opcodes = trace_opcodes + if frame is self.botframe: + break + frame = frame.f_back + if self.monitoring_tracer: + self.monitoring_tracer.update_local_events() + + def _set_stopinfo(self, stopframe, returnframe, stoplineno=0, opcode=False, + cmdframe=None, cmdlineno=None): """Set the attributes for stopping. If stoplineno is greater than or equal to 0, then stop at line @@ -285,6 +556,21 @@ def _set_stopinfo(self, stopframe, returnframe, stoplineno=0): # stoplineno >= 0 means: stop at line >= the stoplineno # stoplineno -1 means: don't stop at all self.stoplineno = stoplineno + # cmdframe/cmdlineno is the frame/line number when the user issued + # step/next commands. + self.cmdframe = cmdframe + self.cmdlineno = cmdlineno + self._set_trace_opcodes(opcode) + + def _set_caller_tracefunc(self, current_frame): + # Issue #13183: pdb skips frames after hitting a breakpoint and running + # step commands. + # Restore the trace function in the caller (that may not have been set + # for performance reasons) when returning from the current frame, unless + # the caller is the botframe. + caller_frame = current_frame.f_back + if caller_frame and not caller_frame.f_trace and caller_frame is not self.botframe: + caller_frame.f_trace = self.trace_dispatch # Derived classes and clients can call the following methods # to affect the stepping state. @@ -299,24 +585,22 @@ def set_until(self, frame, lineno=None): def set_step(self): """Stop after one line of code.""" - # Issue #13183: pdb skips frames after hitting a breakpoint and running - # step commands. - # Restore the trace function in the caller (that may not have been set - # for performance reasons) when returning from the current frame. - if self.frame_returning: - caller_frame = self.frame_returning.f_back - if caller_frame and not caller_frame.f_trace: - caller_frame.f_trace = self.trace_dispatch - self._set_stopinfo(None, None) + # set_step() could be called from signal handler so enterframe might be None + self._set_stopinfo(None, None, cmdframe=self.enterframe, + cmdlineno=getattr(self.enterframe, 'f_lineno', None)) + + def set_stepinstr(self): + """Stop before the next instruction.""" + self._set_stopinfo(None, None, opcode=True) def set_next(self, frame): """Stop on the next line in or below the given frame.""" - self._set_stopinfo(frame, None) + self._set_stopinfo(frame, None, cmdframe=frame, cmdlineno=frame.f_lineno) def set_return(self, frame): """Stop when returning from the given frame.""" if frame.f_code.co_flags & GENERATOR_AND_COROUTINE_FLAGS: - self._set_stopinfo(frame, None, -1) + self._set_stopinfo(frame, frame, -1) else: self._set_stopinfo(frame.f_back, frame) @@ -325,15 +609,21 @@ def set_trace(self, frame=None): If frame is not specified, debugging starts from caller's frame. """ + self.stop_trace() if frame is None: frame = sys._getframe().f_back self.reset() - while frame: - frame.f_trace = self.trace_dispatch - self.botframe = frame - frame = frame.f_back - self.set_step() - sys.settrace(self.trace_dispatch) + with self.set_enterframe(frame): + while frame: + frame.f_trace = self.trace_dispatch + self.botframe = frame + self.frame_trace_lines_opcodes[frame] = (frame.f_trace_lines, frame.f_trace_opcodes) + # We need f_trace_lines == True for the debugger to work + frame.f_trace_lines = True + frame = frame.f_back + self.set_stepinstr() + self.enterframe = None + self.start_trace() def set_continue(self): """Stop only at breakpoints or when finished. @@ -344,11 +634,16 @@ def set_continue(self): self._set_stopinfo(self.botframe, None, -1) if not self.breaks: # no breakpoints; run without debugger overhead - sys.settrace(None) + self.stop_trace() frame = sys._getframe().f_back while frame and frame is not self.botframe: del frame.f_trace frame = frame.f_back + for frame, (trace_lines, trace_opcodes) in self.frame_trace_lines_opcodes.items(): + frame.f_trace_lines, frame.f_trace_opcodes = trace_lines, trace_opcodes + if self.backend == 'monitoring': + self.monitoring_tracer.update_local_events() + self.frame_trace_lines_opcodes = {} def set_quit(self): """Set quitting attribute to True. @@ -358,7 +653,7 @@ def set_quit(self): self.stopframe = self.botframe self.returnframe = None self.quitting = True - sys.settrace(None) + self.stop_trace() # Derived classes and clients can call the following methods # to manipulate breakpoints. These methods return an @@ -387,6 +682,14 @@ def set_break(self, filename, lineno, temporary=False, cond=None, return 'Line %s:%d does not exist' % (filename, lineno) self._add_to_breaks(filename, lineno) bp = Breakpoint(filename, lineno, temporary, cond, funcname) + # After we set a new breakpoint, we need to search through all frames + # and set f_trace to trace_dispatch if there could be a breakpoint in + # that frame. + frame = self.enterframe + while frame: + if self.break_anywhere(frame): + frame.f_trace = self.trace_dispatch + frame = frame.f_back return None def _load_breaks(self): @@ -578,6 +881,16 @@ def format_stack_entry(self, frame_lineno, lprefix=': '): s += f'{lprefix}Warning: lineno is None' return s + def disable_current_event(self): + """Disable the current event.""" + if self.backend == 'monitoring': + self.monitoring_tracer.disable_current_event() + + def restart_events(self): + """Restart all events.""" + if self.backend == 'monitoring': + self.monitoring_tracer.restart_events() + # The following methods can be called by clients to use # a debugger to debug a statement or an expression. # Both can be given as a string, or a code object. @@ -595,14 +908,14 @@ def run(self, cmd, globals=None, locals=None): self.reset() if isinstance(cmd, str): cmd = compile(cmd, "", "exec") - sys.settrace(self.trace_dispatch) + self.start_trace() try: exec(cmd, globals, locals) except BdbQuit: pass finally: self.quitting = True - sys.settrace(None) + self.stop_trace() def runeval(self, expr, globals=None, locals=None): """Debug an expression executed via the eval() function. @@ -615,14 +928,14 @@ def runeval(self, expr, globals=None, locals=None): if locals is None: locals = globals self.reset() - sys.settrace(self.trace_dispatch) + self.start_trace() try: return eval(expr, globals, locals) except BdbQuit: pass finally: self.quitting = True - sys.settrace(None) + self.stop_trace() def runctx(self, cmd, globals, locals): """For backwards-compatibility. Defers to run().""" @@ -637,7 +950,7 @@ def runcall(self, func, /, *args, **kwds): Return the result of the function call. """ self.reset() - sys.settrace(self.trace_dispatch) + self.start_trace() res = None try: res = func(*args, **kwds) @@ -645,7 +958,7 @@ def runcall(self, func, /, *args, **kwds): pass finally: self.quitting = True - sys.settrace(None) + self.stop_trace() return res diff --git a/Lib/bz2.py b/Lib/bz2.py index fabe4f73c8d..eb58f4da596 100644 --- a/Lib/bz2.py +++ b/Lib/bz2.py @@ -10,20 +10,20 @@ __author__ = "Nadeem Vawda " from builtins import open as _builtin_open +from compression._common import _streams import io import os -import _compression from _bz2 import BZ2Compressor, BZ2Decompressor -_MODE_CLOSED = 0 +# Value 0 no longer used _MODE_READ = 1 # Value 2 no longer used _MODE_WRITE = 3 -class BZ2File(_compression.BaseStream): +class BZ2File(_streams.BaseStream): """A file object providing transparent bzip2 (de)compression. @@ -54,7 +54,7 @@ def __init__(self, filename, mode="r", *, compresslevel=9): """ self._fp = None self._closefp = False - self._mode = _MODE_CLOSED + self._mode = None if not (1 <= compresslevel <= 9): raise ValueError("compresslevel must be between 1 and 9") @@ -88,7 +88,7 @@ def __init__(self, filename, mode="r", *, compresslevel=9): raise TypeError("filename must be a str, bytes, file or PathLike object") if self._mode == _MODE_READ: - raw = _compression.DecompressReader(self._fp, + raw = _streams.DecompressReader(self._fp, BZ2Decompressor, trailing_error=OSError) self._buffer = io.BufferedReader(raw) else: @@ -100,7 +100,7 @@ def close(self): May be called more than once without error. Once the file is closed, any other operation on it will raise a ValueError. """ - if self._mode == _MODE_CLOSED: + if self.closed: return try: if self._mode == _MODE_READ: @@ -115,13 +115,21 @@ def close(self): finally: self._fp = None self._closefp = False - self._mode = _MODE_CLOSED self._buffer = None @property def closed(self): """True if this file is closed.""" - return self._mode == _MODE_CLOSED + return self._fp is None + + @property + def name(self): + self._check_not_closed() + return self._fp.name + + @property + def mode(self): + return 'wb' if self._mode == _MODE_WRITE else 'rb' def fileno(self): """Return the file descriptor for the underlying file.""" @@ -240,7 +248,7 @@ def writelines(self, seq): Line separators are not added between the written byte strings. """ - return _compression.BaseStream.writelines(self, seq) + return _streams.BaseStream.writelines(self, seq) def seek(self, offset, whence=io.SEEK_SET): """Change the file position. diff --git a/Lib/calendar.py b/Lib/calendar.py index baab52a1578..18f76d52ff8 100644 --- a/Lib/calendar.py +++ b/Lib/calendar.py @@ -10,7 +10,6 @@ from enum import IntEnum, global_enum import locale as _locale from itertools import repeat -import warnings __all__ = ["IllegalMonthError", "IllegalWeekdayError", "setfirstweekday", "firstweekday", "isleap", "leapdays", "weekday", "monthrange", @@ -28,7 +27,9 @@ error = ValueError # Exceptions raised for bad input -class IllegalMonthError(ValueError): +# This is trick for backward compatibility. Since 3.13, we will raise IllegalMonthError instead of +# IndexError for bad month number(out of 1-12). But we can't remove IndexError for backward compatibility. +class IllegalMonthError(ValueError, IndexError): def __init__(self, month): self.month = month def __str__(self): @@ -44,6 +45,7 @@ def __str__(self): def __getattr__(name): if name in ('January', 'February'): + import warnings warnings.warn(f"The '{name}' attribute is deprecated, use '{name.upper()}' instead", DeprecationWarning, stacklevel=2) if name == 'January': @@ -158,11 +160,14 @@ def weekday(year, month, day): return Day(datetime.date(year, month, day).weekday()) -def monthrange(year, month): - """Return weekday (0-6 ~ Mon-Sun) and number of days (28-31) for - year, month.""" +def _validate_month(month): if not 1 <= month <= 12: raise IllegalMonthError(month) + +def monthrange(year, month): + """Return weekday of first day of month (0-6 ~ Mon-Sun) + and number of days (28-31) for year, month.""" + _validate_month(month) day1 = weekday(year, month, 1) ndays = mdays[month] + (month == FEBRUARY and isleap(year)) return day1, ndays @@ -370,6 +375,8 @@ def formatmonthname(self, theyear, themonth, width, withyear=True): """ Return a formatted month name. """ + _validate_month(themonth) + s = month_name[themonth] if withyear: s = "%s %r" % (s, theyear) @@ -421,6 +428,7 @@ def formatyear(self, theyear, w=2, l=1, c=6, m=3): headers = (header for k in months) a(formatstring(headers, colwidth, c).rstrip()) a('\n'*l) + # max number of weeks for this row height = max(len(cal) for cal in row) for j in range(height): @@ -500,6 +508,7 @@ def formatmonthname(self, theyear, themonth, withyear=True): """ Return a month name as a table row. """ + _validate_month(themonth) if withyear: s = '%s %s' % (month_name[themonth], theyear) else: @@ -585,8 +594,6 @@ def __enter__(self): _locale.setlocale(_locale.LC_TIME, self.locale) def __exit__(self, *args): - if self.oldlocale is None: - return _locale.setlocale(_locale.LC_TIME, self.oldlocale) @@ -640,6 +647,117 @@ def formatmonthname(self, theyear, themonth, withyear=True): with different_locale(self.locale): return super().formatmonthname(theyear, themonth, withyear) + +class _CLIDemoCalendar(TextCalendar): + def __init__(self, highlight_day=None, *args, **kwargs): + super().__init__(*args, **kwargs) + self.highlight_day = highlight_day + + def formatweek(self, theweek, width, *, highlight_day=None): + """ + Returns a single week in a string (no newline). + """ + if highlight_day: + from _colorize import get_colors + + ansi = get_colors() + highlight = f"{ansi.BLACK}{ansi.BACKGROUND_YELLOW}" + reset = ansi.RESET + else: + highlight = reset = "" + + return ' '.join( + ( + f"{highlight}{self.formatday(d, wd, width)}{reset}" + if d == highlight_day + else self.formatday(d, wd, width) + ) + for (d, wd) in theweek + ) + + def formatmonth(self, theyear, themonth, w=0, l=0): + """ + Return a month's calendar string (multi-line). + """ + if ( + self.highlight_day + and self.highlight_day.year == theyear + and self.highlight_day.month == themonth + ): + highlight_day = self.highlight_day.day + else: + highlight_day = None + w = max(2, w) + l = max(1, l) + s = self.formatmonthname(theyear, themonth, 7 * (w + 1) - 1) + s = s.rstrip() + s += '\n' * l + s += self.formatweekheader(w).rstrip() + s += '\n' * l + for week in self.monthdays2calendar(theyear, themonth): + s += self.formatweek(week, w, highlight_day=highlight_day).rstrip() + s += '\n' * l + return s + + def formatyear(self, theyear, w=2, l=1, c=6, m=3): + """ + Returns a year's calendar as a multi-line string. + """ + w = max(2, w) + l = max(1, l) + c = max(2, c) + colwidth = (w + 1) * 7 - 1 + v = [] + a = v.append + a(repr(theyear).center(colwidth*m+c*(m-1)).rstrip()) + a('\n'*l) + header = self.formatweekheader(w) + for (i, row) in enumerate(self.yeardays2calendar(theyear, m)): + # months in this row + months = range(m*i+1, min(m*(i+1)+1, 13)) + a('\n'*l) + names = (self.formatmonthname(theyear, k, colwidth, False) + for k in months) + a(formatstring(names, colwidth, c).rstrip()) + a('\n'*l) + headers = (header for k in months) + a(formatstring(headers, colwidth, c).rstrip()) + a('\n'*l) + + if ( + self.highlight_day + and self.highlight_day.year == theyear + and self.highlight_day.month in months + ): + month_pos = months.index(self.highlight_day.month) + else: + month_pos = None + + # max number of weeks for this row + height = max(len(cal) for cal in row) + for j in range(height): + weeks = [] + for k, cal in enumerate(row): + if j >= len(cal): + weeks.append('') + else: + day = ( + self.highlight_day.day if k == month_pos else None + ) + weeks.append( + self.formatweek(cal[j], w, highlight_day=day) + ) + a(formatstring(weeks, colwidth, c).rstrip()) + a('\n' * l) + return ''.join(v) + + +class _CLIDemoLocaleCalendar(LocaleTextCalendar, _CLIDemoCalendar): + def __init__(self, highlight_day=None, *args, **kwargs): + super().__init__(*args, **kwargs) + self.highlight_day = highlight_day + + # Support for old module level interface c = TextCalendar() @@ -690,9 +808,9 @@ def timegm(tuple): return seconds -def main(args): +def main(args=None): import argparse - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(color=True) textgroup = parser.add_argument_group('text only arguments') htmlgroup = parser.add_argument_group('html only arguments') textgroup.add_argument( @@ -736,10 +854,15 @@ def main(args): choices=("text", "html"), help="output type (text or html)" ) + parser.add_argument( + "-f", "--first-weekday", + type=int, default=0, + help="weekday (0 is Monday, 6 is Sunday) to start each week (default 0)" + ) parser.add_argument( "year", nargs='?', type=int, - help="year number (1-9999)" + help="year number" ) parser.add_argument( "month", @@ -747,42 +870,47 @@ def main(args): help="month number (1-12, text only)" ) - options = parser.parse_args(args[1:]) + options = parser.parse_args(args) if options.locale and not options.encoding: parser.error("if --locale is specified --encoding is required") sys.exit(1) locale = options.locale, options.encoding + today = datetime.date.today() if options.type == "html": + if options.month: + parser.error("incorrect number of arguments") + sys.exit(1) if options.locale: cal = LocaleHTMLCalendar(locale=locale) else: cal = HTMLCalendar() + cal.setfirstweekday(options.first_weekday) encoding = options.encoding if encoding is None: encoding = sys.getdefaultencoding() optdict = dict(encoding=encoding, css=options.css) write = sys.stdout.buffer.write if options.year is None: - write(cal.formatyearpage(datetime.date.today().year, **optdict)) - elif options.month is None: - write(cal.formatyearpage(options.year, **optdict)) + write(cal.formatyearpage(today.year, **optdict)) else: - parser.error("incorrect number of arguments") - sys.exit(1) + write(cal.formatyearpage(options.year, **optdict)) else: if options.locale: - cal = LocaleTextCalendar(locale=locale) + cal = _CLIDemoLocaleCalendar(highlight_day=today, locale=locale) else: - cal = TextCalendar() + cal = _CLIDemoCalendar(highlight_day=today) + cal.setfirstweekday(options.first_weekday) optdict = dict(w=options.width, l=options.lines) if options.month is None: optdict["c"] = options.spacing optdict["m"] = options.months + else: + _validate_month(options.month) if options.year is None: - result = cal.formatyear(datetime.date.today().year, **optdict) + result = cal.formatyear(today.year, **optdict) elif options.month is None: result = cal.formatyear(options.year, **optdict) else: @@ -795,4 +923,4 @@ def main(args): if __name__ == "__main__": - main(sys.argv) + main() diff --git a/Lib/cgi.py b/Lib/cgi.py deleted file mode 100755 index 8787567be7c..00000000000 --- a/Lib/cgi.py +++ /dev/null @@ -1,1012 +0,0 @@ -#! /usr/local/bin/python - -# NOTE: the above "/usr/local/bin/python" is NOT a mistake. It is -# intentionally NOT "/usr/bin/env python". On many systems -# (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI -# scripts, and /usr/local/bin is the default directory where Python is -# installed, so /usr/bin/env would be unable to find python. Granted, -# binary installations by Linux vendors often install Python in -# /usr/bin. So let those vendors patch cgi.py to match their choice -# of installation. - -"""Support module for CGI (Common Gateway Interface) scripts. - -This module defines a number of utilities for use by CGI scripts -written in Python. - -The global variable maxlen can be set to an integer indicating the maximum size -of a POST request. POST requests larger than this size will result in a -ValueError being raised during parsing. The default value of this variable is 0, -meaning the request size is unlimited. -""" - -# History -# ------- -# -# Michael McLay started this module. Steve Majewski changed the -# interface to SvFormContentDict and FormContentDict. The multipart -# parsing was inspired by code submitted by Andreas Paepcke. Guido van -# Rossum rewrote, reformatted and documented the module and is currently -# responsible for its maintenance. -# - -__version__ = "2.6" - - -# Imports -# ======= - -from io import StringIO, BytesIO, TextIOWrapper -from collections.abc import Mapping -import sys -import os -import urllib.parse -from email.parser import FeedParser -from email.message import Message -import html -import locale -import tempfile -import warnings - -__all__ = ["MiniFieldStorage", "FieldStorage", "parse", "parse_multipart", - "parse_header", "test", "print_exception", "print_environ", - "print_form", "print_directory", "print_arguments", - "print_environ_usage"] - - -warnings._deprecated(__name__, remove=(3,13)) - -# Logging support -# =============== - -logfile = "" # Filename to log to, if not empty -logfp = None # File object to log to, if not None - -def initlog(*allargs): - """Write a log message, if there is a log file. - - Even though this function is called initlog(), you should always - use log(); log is a variable that is set either to initlog - (initially), to dolog (once the log file has been opened), or to - nolog (when logging is disabled). - - The first argument is a format string; the remaining arguments (if - any) are arguments to the % operator, so e.g. - log("%s: %s", "a", "b") - will write "a: b" to the log file, followed by a newline. - - If the global logfp is not None, it should be a file object to - which log data is written. - - If the global logfp is None, the global logfile may be a string - giving a filename to open, in append mode. This file should be - world writable!!! If the file can't be opened, logging is - silently disabled (since there is no safe place where we could - send an error message). - - """ - global log, logfile, logfp - warnings.warn("cgi.log() is deprecated as of 3.10. Use logging instead", - DeprecationWarning, stacklevel=2) - if logfile and not logfp: - try: - logfp = open(logfile, "a", encoding="locale") - except OSError: - pass - if not logfp: - log = nolog - else: - log = dolog - log(*allargs) - -def dolog(fmt, *args): - """Write a log message to the log file. See initlog() for docs.""" - logfp.write(fmt%args + "\n") - -def nolog(*allargs): - """Dummy function, assigned to log when logging is disabled.""" - pass - -def closelog(): - """Close the log file.""" - global log, logfile, logfp - logfile = '' - if logfp: - logfp.close() - logfp = None - log = initlog - -log = initlog # The current logging function - - -# Parsing functions -# ================= - -# Maximum input we will accept when REQUEST_METHOD is POST -# 0 ==> unlimited input -maxlen = 0 - -def parse(fp=None, environ=os.environ, keep_blank_values=0, - strict_parsing=0, separator='&'): - """Parse a query in the environment or from a file (default stdin) - - Arguments, all optional: - - fp : file pointer; default: sys.stdin.buffer - - environ : environment dictionary; default: os.environ - - keep_blank_values: flag indicating whether blank values in - percent-encoded forms should be treated as blank strings. - A true value indicates that blanks should be retained as - blank strings. The default false value indicates that - blank values are to be ignored and treated as if they were - not included. - - strict_parsing: flag indicating what to do with parsing errors. - If false (the default), errors are silently ignored. - If true, errors raise a ValueError exception. - - separator: str. The symbol to use for separating the query arguments. - Defaults to &. - """ - if fp is None: - fp = sys.stdin - - # field keys and values (except for files) are returned as strings - # an encoding is required to decode the bytes read from self.fp - if hasattr(fp,'encoding'): - encoding = fp.encoding - else: - encoding = 'latin-1' - - # fp.read() must return bytes - if isinstance(fp, TextIOWrapper): - fp = fp.buffer - - if not 'REQUEST_METHOD' in environ: - environ['REQUEST_METHOD'] = 'GET' # For testing stand-alone - if environ['REQUEST_METHOD'] == 'POST': - ctype, pdict = parse_header(environ['CONTENT_TYPE']) - if ctype == 'multipart/form-data': - return parse_multipart(fp, pdict, separator=separator) - elif ctype == 'application/x-www-form-urlencoded': - clength = int(environ['CONTENT_LENGTH']) - if maxlen and clength > maxlen: - raise ValueError('Maximum content length exceeded') - qs = fp.read(clength).decode(encoding) - else: - qs = '' # Unknown content-type - if 'QUERY_STRING' in environ: - if qs: qs = qs + '&' - qs = qs + environ['QUERY_STRING'] - elif sys.argv[1:]: - if qs: qs = qs + '&' - qs = qs + sys.argv[1] - environ['QUERY_STRING'] = qs # XXX Shouldn't, really - elif 'QUERY_STRING' in environ: - qs = environ['QUERY_STRING'] - else: - if sys.argv[1:]: - qs = sys.argv[1] - else: - qs = "" - environ['QUERY_STRING'] = qs # XXX Shouldn't, really - return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing, - encoding=encoding, separator=separator) - - -def parse_multipart(fp, pdict, encoding="utf-8", errors="replace", separator='&'): - """Parse multipart input. - - Arguments: - fp : input file - pdict: dictionary containing other parameters of content-type header - encoding, errors: request encoding and error handler, passed to - FieldStorage - - Returns a dictionary just like parse_qs(): keys are the field names, each - value is a list of values for that field. For non-file fields, the value - is a list of strings. - """ - # RFC 2046, Section 5.1 : The "multipart" boundary delimiters are always - # represented as 7bit US-ASCII. - boundary = pdict['boundary'].decode('ascii') - ctype = "multipart/form-data; boundary={}".format(boundary) - headers = Message() - headers.set_type(ctype) - try: - headers['Content-Length'] = pdict['CONTENT-LENGTH'] - except KeyError: - pass - fs = FieldStorage(fp, headers=headers, encoding=encoding, errors=errors, - environ={'REQUEST_METHOD': 'POST'}, separator=separator) - return {k: fs.getlist(k) for k in fs} - -def _parseparam(s): - while s[:1] == ';': - s = s[1:] - end = s.find(';') - while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: - end = s.find(';', end + 1) - if end < 0: - end = len(s) - f = s[:end] - yield f.strip() - s = s[end:] - -def parse_header(line): - """Parse a Content-type like header. - - Return the main content-type and a dictionary of options. - - """ - parts = _parseparam(';' + line) - key = parts.__next__() - pdict = {} - for p in parts: - i = p.find('=') - if i >= 0: - name = p[:i].strip().lower() - value = p[i+1:].strip() - if len(value) >= 2 and value[0] == value[-1] == '"': - value = value[1:-1] - value = value.replace('\\\\', '\\').replace('\\"', '"') - pdict[name] = value - return key, pdict - - -# Classes for field storage -# ========================= - -class MiniFieldStorage: - - """Like FieldStorage, for use when no file uploads are possible.""" - - # Dummy attributes - filename = None - list = None - type = None - file = None - type_options = {} - disposition = None - disposition_options = {} - headers = {} - - def __init__(self, name, value): - """Constructor from field name and value.""" - self.name = name - self.value = value - # self.file = StringIO(value) - - def __repr__(self): - """Return printable representation.""" - return "MiniFieldStorage(%r, %r)" % (self.name, self.value) - - -class FieldStorage: - - """Store a sequence of fields, reading multipart/form-data. - - This class provides naming, typing, files stored on disk, and - more. At the top level, it is accessible like a dictionary, whose - keys are the field names. (Note: None can occur as a field name.) - The items are either a Python list (if there's multiple values) or - another FieldStorage or MiniFieldStorage object. If it's a single - object, it has the following attributes: - - name: the field name, if specified; otherwise None - - filename: the filename, if specified; otherwise None; this is the - client side filename, *not* the file name on which it is - stored (that's a temporary file you don't deal with) - - value: the value as a *string*; for file uploads, this - transparently reads the file every time you request the value - and returns *bytes* - - file: the file(-like) object from which you can read the data *as - bytes* ; None if the data is stored a simple string - - type: the content-type, or None if not specified - - type_options: dictionary of options specified on the content-type - line - - disposition: content-disposition, or None if not specified - - disposition_options: dictionary of corresponding options - - headers: a dictionary(-like) object (sometimes email.message.Message or a - subclass thereof) containing *all* headers - - The class is subclassable, mostly for the purpose of overriding - the make_file() method, which is called internally to come up with - a file open for reading and writing. This makes it possible to - override the default choice of storing all files in a temporary - directory and unlinking them as soon as they have been opened. - - """ - def __init__(self, fp=None, headers=None, outerboundary=b'', - environ=os.environ, keep_blank_values=0, strict_parsing=0, - limit=None, encoding='utf-8', errors='replace', - max_num_fields=None, separator='&'): - """Constructor. Read multipart/* until last part. - - Arguments, all optional: - - fp : file pointer; default: sys.stdin.buffer - (not used when the request method is GET) - Can be : - 1. a TextIOWrapper object - 2. an object whose read() and readline() methods return bytes - - headers : header dictionary-like object; default: - taken from environ as per CGI spec - - outerboundary : terminating multipart boundary - (for internal use only) - - environ : environment dictionary; default: os.environ - - keep_blank_values: flag indicating whether blank values in - percent-encoded forms should be treated as blank strings. - A true value indicates that blanks should be retained as - blank strings. The default false value indicates that - blank values are to be ignored and treated as if they were - not included. - - strict_parsing: flag indicating what to do with parsing errors. - If false (the default), errors are silently ignored. - If true, errors raise a ValueError exception. - - limit : used internally to read parts of multipart/form-data forms, - to exit from the reading loop when reached. It is the difference - between the form content-length and the number of bytes already - read - - encoding, errors : the encoding and error handler used to decode the - binary stream to strings. Must be the same as the charset defined - for the page sending the form (content-type : meta http-equiv or - header) - - max_num_fields: int. If set, then __init__ throws a ValueError - if there are more than n fields read by parse_qsl(). - - """ - method = 'GET' - self.keep_blank_values = keep_blank_values - self.strict_parsing = strict_parsing - self.max_num_fields = max_num_fields - self.separator = separator - if 'REQUEST_METHOD' in environ: - method = environ['REQUEST_METHOD'].upper() - self.qs_on_post = None - if method == 'GET' or method == 'HEAD': - if 'QUERY_STRING' in environ: - qs = environ['QUERY_STRING'] - elif sys.argv[1:]: - qs = sys.argv[1] - else: - qs = "" - qs = qs.encode(locale.getpreferredencoding(), 'surrogateescape') - fp = BytesIO(qs) - if headers is None: - headers = {'content-type': - "application/x-www-form-urlencoded"} - if headers is None: - headers = {} - if method == 'POST': - # Set default content-type for POST to what's traditional - headers['content-type'] = "application/x-www-form-urlencoded" - if 'CONTENT_TYPE' in environ: - headers['content-type'] = environ['CONTENT_TYPE'] - if 'QUERY_STRING' in environ: - self.qs_on_post = environ['QUERY_STRING'] - if 'CONTENT_LENGTH' in environ: - headers['content-length'] = environ['CONTENT_LENGTH'] - else: - if not (isinstance(headers, (Mapping, Message))): - raise TypeError("headers must be mapping or an instance of " - "email.message.Message") - self.headers = headers - if fp is None: - self.fp = sys.stdin.buffer - # self.fp.read() must return bytes - elif isinstance(fp, TextIOWrapper): - self.fp = fp.buffer - else: - if not (hasattr(fp, 'read') and hasattr(fp, 'readline')): - raise TypeError("fp must be file pointer") - self.fp = fp - - self.encoding = encoding - self.errors = errors - - if not isinstance(outerboundary, bytes): - raise TypeError('outerboundary must be bytes, not %s' - % type(outerboundary).__name__) - self.outerboundary = outerboundary - - self.bytes_read = 0 - self.limit = limit - - # Process content-disposition header - cdisp, pdict = "", {} - if 'content-disposition' in self.headers: - cdisp, pdict = parse_header(self.headers['content-disposition']) - self.disposition = cdisp - self.disposition_options = pdict - self.name = None - if 'name' in pdict: - self.name = pdict['name'] - self.filename = None - if 'filename' in pdict: - self.filename = pdict['filename'] - self._binary_file = self.filename is not None - - # Process content-type header - # - # Honor any existing content-type header. But if there is no - # content-type header, use some sensible defaults. Assume - # outerboundary is "" at the outer level, but something non-false - # inside a multi-part. The default for an inner part is text/plain, - # but for an outer part it should be urlencoded. This should catch - # bogus clients which erroneously forget to include a content-type - # header. - # - # See below for what we do if there does exist a content-type header, - # but it happens to be something we don't understand. - if 'content-type' in self.headers: - ctype, pdict = parse_header(self.headers['content-type']) - elif self.outerboundary or method != 'POST': - ctype, pdict = "text/plain", {} - else: - ctype, pdict = 'application/x-www-form-urlencoded', {} - self.type = ctype - self.type_options = pdict - if 'boundary' in pdict: - self.innerboundary = pdict['boundary'].encode(self.encoding, - self.errors) - else: - self.innerboundary = b"" - - clen = -1 - if 'content-length' in self.headers: - try: - clen = int(self.headers['content-length']) - except ValueError: - pass - if maxlen and clen > maxlen: - raise ValueError('Maximum content length exceeded') - self.length = clen - if self.limit is None and clen >= 0: - self.limit = clen - - self.list = self.file = None - self.done = 0 - if ctype == 'application/x-www-form-urlencoded': - self.read_urlencoded() - elif ctype[:10] == 'multipart/': - self.read_multi(environ, keep_blank_values, strict_parsing) - else: - self.read_single() - - def __del__(self): - try: - self.file.close() - except AttributeError: - pass - - def __enter__(self): - return self - - def __exit__(self, *args): - self.file.close() - - def __repr__(self): - """Return a printable representation.""" - return "FieldStorage(%r, %r, %r)" % ( - self.name, self.filename, self.value) - - def __iter__(self): - return iter(self.keys()) - - def __getattr__(self, name): - if name != 'value': - raise AttributeError(name) - if self.file: - self.file.seek(0) - value = self.file.read() - self.file.seek(0) - elif self.list is not None: - value = self.list - else: - value = None - return value - - def __getitem__(self, key): - """Dictionary style indexing.""" - if self.list is None: - raise TypeError("not indexable") - found = [] - for item in self.list: - if item.name == key: found.append(item) - if not found: - raise KeyError(key) - if len(found) == 1: - return found[0] - else: - return found - - def getvalue(self, key, default=None): - """Dictionary style get() method, including 'value' lookup.""" - if key in self: - value = self[key] - if isinstance(value, list): - return [x.value for x in value] - else: - return value.value - else: - return default - - def getfirst(self, key, default=None): - """ Return the first value received.""" - if key in self: - value = self[key] - if isinstance(value, list): - return value[0].value - else: - return value.value - else: - return default - - def getlist(self, key): - """ Return list of received values.""" - if key in self: - value = self[key] - if isinstance(value, list): - return [x.value for x in value] - else: - return [value.value] - else: - return [] - - def keys(self): - """Dictionary style keys() method.""" - if self.list is None: - raise TypeError("not indexable") - return list(set(item.name for item in self.list)) - - def __contains__(self, key): - """Dictionary style __contains__ method.""" - if self.list is None: - raise TypeError("not indexable") - return any(item.name == key for item in self.list) - - def __len__(self): - """Dictionary style len(x) support.""" - return len(self.keys()) - - def __bool__(self): - if self.list is None: - raise TypeError("Cannot be converted to bool.") - return bool(self.list) - - def read_urlencoded(self): - """Internal: read data in query string format.""" - qs = self.fp.read(self.length) - if not isinstance(qs, bytes): - raise ValueError("%s should return bytes, got %s" \ - % (self.fp, type(qs).__name__)) - qs = qs.decode(self.encoding, self.errors) - if self.qs_on_post: - qs += '&' + self.qs_on_post - query = urllib.parse.parse_qsl( - qs, self.keep_blank_values, self.strict_parsing, - encoding=self.encoding, errors=self.errors, - max_num_fields=self.max_num_fields, separator=self.separator) - self.list = [MiniFieldStorage(key, value) for key, value in query] - self.skip_lines() - - FieldStorageClass = None - - def read_multi(self, environ, keep_blank_values, strict_parsing): - """Internal: read a part that is itself multipart.""" - ib = self.innerboundary - if not valid_boundary(ib): - raise ValueError('Invalid boundary in multipart form: %r' % (ib,)) - self.list = [] - if self.qs_on_post: - query = urllib.parse.parse_qsl( - self.qs_on_post, self.keep_blank_values, self.strict_parsing, - encoding=self.encoding, errors=self.errors, - max_num_fields=self.max_num_fields, separator=self.separator) - self.list.extend(MiniFieldStorage(key, value) for key, value in query) - - klass = self.FieldStorageClass or self.__class__ - first_line = self.fp.readline() # bytes - if not isinstance(first_line, bytes): - raise ValueError("%s should return bytes, got %s" \ - % (self.fp, type(first_line).__name__)) - self.bytes_read += len(first_line) - - # Ensure that we consume the file until we've hit our inner boundary - while (first_line.strip() != (b"--" + self.innerboundary) and - first_line): - first_line = self.fp.readline() - self.bytes_read += len(first_line) - - # Propagate max_num_fields into the sub class appropriately - max_num_fields = self.max_num_fields - if max_num_fields is not None: - max_num_fields -= len(self.list) - - while True: - parser = FeedParser() - hdr_text = b"" - while True: - data = self.fp.readline() - hdr_text += data - if not data.strip(): - break - if not hdr_text: - break - # parser takes strings, not bytes - self.bytes_read += len(hdr_text) - parser.feed(hdr_text.decode(self.encoding, self.errors)) - headers = parser.close() - - # Some clients add Content-Length for part headers, ignore them - if 'content-length' in headers: - del headers['content-length'] - - limit = None if self.limit is None \ - else self.limit - self.bytes_read - part = klass(self.fp, headers, ib, environ, keep_blank_values, - strict_parsing, limit, - self.encoding, self.errors, max_num_fields, self.separator) - - if max_num_fields is not None: - max_num_fields -= 1 - if part.list: - max_num_fields -= len(part.list) - if max_num_fields < 0: - raise ValueError('Max number of fields exceeded') - - self.bytes_read += part.bytes_read - self.list.append(part) - if part.done or self.bytes_read >= self.length > 0: - break - self.skip_lines() - - def read_single(self): - """Internal: read an atomic part.""" - if self.length >= 0: - self.read_binary() - self.skip_lines() - else: - self.read_lines() - self.file.seek(0) - - bufsize = 8*1024 # I/O buffering size for copy to file - - def read_binary(self): - """Internal: read binary data.""" - self.file = self.make_file() - todo = self.length - if todo >= 0: - while todo > 0: - data = self.fp.read(min(todo, self.bufsize)) # bytes - if not isinstance(data, bytes): - raise ValueError("%s should return bytes, got %s" - % (self.fp, type(data).__name__)) - self.bytes_read += len(data) - if not data: - self.done = -1 - break - self.file.write(data) - todo = todo - len(data) - - def read_lines(self): - """Internal: read lines until EOF or outerboundary.""" - if self._binary_file: - self.file = self.__file = BytesIO() # store data as bytes for files - else: - self.file = self.__file = StringIO() # as strings for other fields - if self.outerboundary: - self.read_lines_to_outerboundary() - else: - self.read_lines_to_eof() - - def __write(self, line): - """line is always bytes, not string""" - if self.__file is not None: - if self.__file.tell() + len(line) > 1000: - self.file = self.make_file() - data = self.__file.getvalue() - self.file.write(data) - self.__file = None - if self._binary_file: - # keep bytes - self.file.write(line) - else: - # decode to string - self.file.write(line.decode(self.encoding, self.errors)) - - def read_lines_to_eof(self): - """Internal: read lines until EOF.""" - while 1: - line = self.fp.readline(1<<16) # bytes - self.bytes_read += len(line) - if not line: - self.done = -1 - break - self.__write(line) - - def read_lines_to_outerboundary(self): - """Internal: read lines until outerboundary. - Data is read as bytes: boundaries and line ends must be converted - to bytes for comparisons. - """ - next_boundary = b"--" + self.outerboundary - last_boundary = next_boundary + b"--" - delim = b"" - last_line_lfend = True - _read = 0 - while 1: - - if self.limit is not None and 0 <= self.limit <= _read: - break - line = self.fp.readline(1<<16) # bytes - self.bytes_read += len(line) - _read += len(line) - if not line: - self.done = -1 - break - if delim == b"\r": - line = delim + line - delim = b"" - if line.startswith(b"--") and last_line_lfend: - strippedline = line.rstrip() - if strippedline == next_boundary: - break - if strippedline == last_boundary: - self.done = 1 - break - odelim = delim - if line.endswith(b"\r\n"): - delim = b"\r\n" - line = line[:-2] - last_line_lfend = True - elif line.endswith(b"\n"): - delim = b"\n" - line = line[:-1] - last_line_lfend = True - elif line.endswith(b"\r"): - # We may interrupt \r\n sequences if they span the 2**16 - # byte boundary - delim = b"\r" - line = line[:-1] - last_line_lfend = False - else: - delim = b"" - last_line_lfend = False - self.__write(odelim + line) - - def skip_lines(self): - """Internal: skip lines until outer boundary if defined.""" - if not self.outerboundary or self.done: - return - next_boundary = b"--" + self.outerboundary - last_boundary = next_boundary + b"--" - last_line_lfend = True - while True: - line = self.fp.readline(1<<16) - self.bytes_read += len(line) - if not line: - self.done = -1 - break - if line.endswith(b"--") and last_line_lfend: - strippedline = line.strip() - if strippedline == next_boundary: - break - if strippedline == last_boundary: - self.done = 1 - break - last_line_lfend = line.endswith(b'\n') - - def make_file(self): - """Overridable: return a readable & writable file. - - The file will be used as follows: - - data is written to it - - seek(0) - - data is read from it - - The file is opened in binary mode for files, in text mode - for other fields - - This version opens a temporary file for reading and writing, - and immediately deletes (unlinks) it. The trick (on Unix!) is - that the file can still be used, but it can't be opened by - another process, and it will automatically be deleted when it - is closed or when the current process terminates. - - If you want a more permanent file, you derive a class which - overrides this method. If you want a visible temporary file - that is nevertheless automatically deleted when the script - terminates, try defining a __del__ method in a derived class - which unlinks the temporary files you have created. - - """ - if self._binary_file: - return tempfile.TemporaryFile("wb+") - else: - return tempfile.TemporaryFile("w+", - encoding=self.encoding, newline = '\n') - - -# Test/debug code -# =============== - -def test(environ=os.environ): - """Robust test CGI script, usable as main program. - - Write minimal HTTP headers and dump all information provided to - the script in HTML form. - - """ - print("Content-type: text/html") - print() - sys.stderr = sys.stdout - try: - form = FieldStorage() # Replace with other classes to test those - print_directory() - print_arguments() - print_form(form) - print_environ(environ) - print_environ_usage() - def f(): - exec("testing print_exception() -- italics?") - def g(f=f): - f() - print("

What follows is a test, not an actual exception:

") - g() - except: - print_exception() - - print("

Second try with a small maxlen...

") - - global maxlen - maxlen = 50 - try: - form = FieldStorage() # Replace with other classes to test those - print_directory() - print_arguments() - print_form(form) - print_environ(environ) - except: - print_exception() - -def print_exception(type=None, value=None, tb=None, limit=None): - if type is None: - type, value, tb = sys.exc_info() - import traceback - print() - print("

Traceback (most recent call last):

") - list = traceback.format_tb(tb, limit) + \ - traceback.format_exception_only(type, value) - print("
%s%s
" % ( - html.escape("".join(list[:-1])), - html.escape(list[-1]), - )) - del tb - -def print_environ(environ=os.environ): - """Dump the shell environment as HTML.""" - keys = sorted(environ.keys()) - print() - print("

Shell Environment:

") - print("
") - for key in keys: - print("
", html.escape(key), "
", html.escape(environ[key])) - print("
") - print() - -def print_form(form): - """Dump the contents of a form as HTML.""" - keys = sorted(form.keys()) - print() - print("

Form Contents:

") - if not keys: - print("

No form fields.") - print("

") - for key in keys: - print("
" + html.escape(key) + ":", end=' ') - value = form[key] - print("" + html.escape(repr(type(value))) + "") - print("
" + html.escape(repr(value))) - print("
") - print() - -def print_directory(): - """Dump the current directory as HTML.""" - print() - print("

Current Working Directory:

") - try: - pwd = os.getcwd() - except OSError as msg: - print("OSError:", html.escape(str(msg))) - else: - print(html.escape(pwd)) - print() - -def print_arguments(): - print() - print("

Command Line Arguments:

") - print() - print(sys.argv) - print() - -def print_environ_usage(): - """Dump a list of environment variables used by CGI as HTML.""" - print(""" -

These environment variables could have been set:

-
    -
  • AUTH_TYPE -
  • CONTENT_LENGTH -
  • CONTENT_TYPE -
  • DATE_GMT -
  • DATE_LOCAL -
  • DOCUMENT_NAME -
  • DOCUMENT_ROOT -
  • DOCUMENT_URI -
  • GATEWAY_INTERFACE -
  • LAST_MODIFIED -
  • PATH -
  • PATH_INFO -
  • PATH_TRANSLATED -
  • QUERY_STRING -
  • REMOTE_ADDR -
  • REMOTE_HOST -
  • REMOTE_IDENT -
  • REMOTE_USER -
  • REQUEST_METHOD -
  • SCRIPT_NAME -
  • SERVER_NAME -
  • SERVER_PORT -
  • SERVER_PROTOCOL -
  • SERVER_ROOT -
  • SERVER_SOFTWARE -
-In addition, HTTP headers sent by the server may be passed in the -environment as well. Here are some common variable names: -
    -
  • HTTP_ACCEPT -
  • HTTP_CONNECTION -
  • HTTP_HOST -
  • HTTP_PRAGMA -
  • HTTP_REFERER -
  • HTTP_USER_AGENT -
-""") - - -# Utilities -# ========= - -def valid_boundary(s): - import re - if isinstance(s, bytes): - _vb_pattern = b"^[ -~]{0,200}[!-~]$" - else: - _vb_pattern = "^[ -~]{0,200}[!-~]$" - return re.match(_vb_pattern, s) - -# Invoke mainline -# =============== - -# Call test() when this file is run as a script (not imported as a module) -if __name__ == '__main__': - test() diff --git a/Lib/cgitb.py b/Lib/cgitb.py deleted file mode 100644 index f6b97f25c59..00000000000 --- a/Lib/cgitb.py +++ /dev/null @@ -1,332 +0,0 @@ -"""More comprehensive traceback formatting for Python scripts. - -To enable this module, do: - - import cgitb; cgitb.enable() - -at the top of your script. The optional arguments to enable() are: - - display - if true, tracebacks are displayed in the web browser - logdir - if set, tracebacks are written to files in this directory - context - number of lines of source code to show for each stack frame - format - 'text' or 'html' controls the output format - -By default, tracebacks are displayed but not saved, the context is 5 lines -and the output format is 'html' (for backwards compatibility with the -original use of this module) - -Alternatively, if you have caught an exception and want cgitb to display it -for you, call cgitb.handler(). The optional argument to handler() is a -3-item tuple (etype, evalue, etb) just like the value of sys.exc_info(). -The default handler displays output as HTML. - -""" -import inspect -import keyword -import linecache -import os -import pydoc -import sys -import tempfile -import time -import tokenize -import traceback -import warnings -from html import escape as html_escape - -warnings._deprecated(__name__, remove=(3, 13)) - - -def reset(): - """Return a string that resets the CGI and browser to a known state.""" - return ''' - --> --> - - ''' - -__UNDEF__ = [] # a special sentinel object -def small(text): - if text: - return '' + text + '' - else: - return '' - -def strong(text): - if text: - return '' + text + '' - else: - return '' - -def grey(text): - if text: - return '' + text + '' - else: - return '' - -def lookup(name, frame, locals): - """Find the value for a given name in the given environment.""" - if name in locals: - return 'local', locals[name] - if name in frame.f_globals: - return 'global', frame.f_globals[name] - if '__builtins__' in frame.f_globals: - builtins = frame.f_globals['__builtins__'] - if isinstance(builtins, dict): - if name in builtins: - return 'builtin', builtins[name] - else: - if hasattr(builtins, name): - return 'builtin', getattr(builtins, name) - return None, __UNDEF__ - -def scanvars(reader, frame, locals): - """Scan one logical line of Python and look up values of variables used.""" - vars, lasttoken, parent, prefix, value = [], None, None, '', __UNDEF__ - for ttype, token, start, end, line in tokenize.generate_tokens(reader): - if ttype == tokenize.NEWLINE: break - if ttype == tokenize.NAME and token not in keyword.kwlist: - if lasttoken == '.': - if parent is not __UNDEF__: - value = getattr(parent, token, __UNDEF__) - vars.append((prefix + token, prefix, value)) - else: - where, value = lookup(token, frame, locals) - vars.append((token, where, value)) - elif token == '.': - prefix += lasttoken + '.' - parent = value - else: - parent, prefix = None, '' - lasttoken = token - return vars - -def html(einfo, context=5): - """Return a nice HTML document describing a given traceback.""" - etype, evalue, etb = einfo - if isinstance(etype, type): - etype = etype.__name__ - pyver = 'Python ' + sys.version.split()[0] + ': ' + sys.executable - date = time.ctime(time.time()) - head = f''' - - - - - -
 
- 
-{html_escape(str(etype))}
-{pyver}
{date}
-

A problem occurred in a Python script. Here is the sequence of -function calls leading up to the error, in the order they occurred.

''' - - indent = '' + small(' ' * 5) + ' ' - frames = [] - records = inspect.getinnerframes(etb, context) - for frame, file, lnum, func, lines, index in records: - if file: - file = os.path.abspath(file) - link = '%s' % (file, pydoc.html.escape(file)) - else: - file = link = '?' - args, varargs, varkw, locals = inspect.getargvalues(frame) - call = '' - if func != '?': - call = 'in ' + strong(pydoc.html.escape(func)) - if func != "": - call += inspect.formatargvalues(args, varargs, varkw, locals, - formatvalue=lambda value: '=' + pydoc.html.repr(value)) - - highlight = {} - def reader(lnum=[lnum]): - highlight[lnum[0]] = 1 - try: return linecache.getline(file, lnum[0]) - finally: lnum[0] += 1 - vars = scanvars(reader, frame, locals) - - rows = ['%s%s %s' % - (' ', link, call)] - if index is not None: - i = lnum - index - for line in lines: - num = small(' ' * (5-len(str(i))) + str(i)) + ' ' - if i in highlight: - line = '=>%s%s' % (num, pydoc.html.preformat(line)) - rows.append('%s' % line) - else: - line = '  %s%s' % (num, pydoc.html.preformat(line)) - rows.append('%s' % grey(line)) - i += 1 - - done, dump = {}, [] - for name, where, value in vars: - if name in done: continue - done[name] = 1 - if value is not __UNDEF__: - if where in ('global', 'builtin'): - name = ('%s ' % where) + strong(name) - elif where == 'local': - name = strong(name) - else: - name = where + strong(name.split('.')[-1]) - dump.append('%s = %s' % (name, pydoc.html.repr(value))) - else: - dump.append(name + ' undefined') - - rows.append('%s' % small(grey(', '.join(dump)))) - frames.append(''' - -%s
''' % '\n'.join(rows)) - - exception = ['

%s: %s' % (strong(pydoc.html.escape(str(etype))), - pydoc.html.escape(str(evalue)))] - for name in dir(evalue): - if name[:1] == '_': continue - value = pydoc.html.repr(getattr(evalue, name)) - exception.append('\n
%s%s =\n%s' % (indent, name, value)) - - return head + ''.join(frames) + ''.join(exception) + ''' - - - -''' % pydoc.html.escape( - ''.join(traceback.format_exception(etype, evalue, etb))) - -def text(einfo, context=5): - """Return a plain text document describing a given traceback.""" - etype, evalue, etb = einfo - if isinstance(etype, type): - etype = etype.__name__ - pyver = 'Python ' + sys.version.split()[0] + ': ' + sys.executable - date = time.ctime(time.time()) - head = "%s\n%s\n%s\n" % (str(etype), pyver, date) + ''' -A problem occurred in a Python script. Here is the sequence of -function calls leading up to the error, in the order they occurred. -''' - - frames = [] - records = inspect.getinnerframes(etb, context) - for frame, file, lnum, func, lines, index in records: - file = file and os.path.abspath(file) or '?' - args, varargs, varkw, locals = inspect.getargvalues(frame) - call = '' - if func != '?': - call = 'in ' + func - if func != "": - call += inspect.formatargvalues(args, varargs, varkw, locals, - formatvalue=lambda value: '=' + pydoc.text.repr(value)) - - highlight = {} - def reader(lnum=[lnum]): - highlight[lnum[0]] = 1 - try: return linecache.getline(file, lnum[0]) - finally: lnum[0] += 1 - vars = scanvars(reader, frame, locals) - - rows = [' %s %s' % (file, call)] - if index is not None: - i = lnum - index - for line in lines: - num = '%5d ' % i - rows.append(num+line.rstrip()) - i += 1 - - done, dump = {}, [] - for name, where, value in vars: - if name in done: continue - done[name] = 1 - if value is not __UNDEF__: - if where == 'global': name = 'global ' + name - elif where != 'local': name = where + name.split('.')[-1] - dump.append('%s = %s' % (name, pydoc.text.repr(value))) - else: - dump.append(name + ' undefined') - - rows.append('\n'.join(dump)) - frames.append('\n%s\n' % '\n'.join(rows)) - - exception = ['%s: %s' % (str(etype), str(evalue))] - for name in dir(evalue): - value = pydoc.text.repr(getattr(evalue, name)) - exception.append('\n%s%s = %s' % (" "*4, name, value)) - - return head + ''.join(frames) + ''.join(exception) + ''' - -The above is a description of an error in a Python program. Here is -the original traceback: - -%s -''' % ''.join(traceback.format_exception(etype, evalue, etb)) - -class Hook: - """A hook to replace sys.excepthook that shows tracebacks in HTML.""" - - def __init__(self, display=1, logdir=None, context=5, file=None, - format="html"): - self.display = display # send tracebacks to browser if true - self.logdir = logdir # log tracebacks to files if not None - self.context = context # number of source code lines per frame - self.file = file or sys.stdout # place to send the output - self.format = format - - def __call__(self, etype, evalue, etb): - self.handle((etype, evalue, etb)) - - def handle(self, info=None): - info = info or sys.exc_info() - if self.format == "html": - self.file.write(reset()) - - formatter = (self.format=="html") and html or text - plain = False - try: - doc = formatter(info, self.context) - except: # just in case something goes wrong - doc = ''.join(traceback.format_exception(*info)) - plain = True - - if self.display: - if plain: - doc = pydoc.html.escape(doc) - self.file.write('

' + doc + '
\n') - else: - self.file.write(doc + '\n') - else: - self.file.write('

A problem occurred in a Python script.\n') - - if self.logdir is not None: - suffix = ['.txt', '.html'][self.format=="html"] - (fd, path) = tempfile.mkstemp(suffix=suffix, dir=self.logdir) - - try: - with os.fdopen(fd, 'w') as file: - file.write(doc) - msg = '%s contains the description of this error.' % path - except: - msg = 'Tried to save traceback to %s, but failed.' % path - - if self.format == 'html': - self.file.write('

%s

\n' % msg) - else: - self.file.write(msg + '\n') - try: - self.file.flush() - except: pass - -handler = Hook().handle -def enable(display=1, logdir=None, context=5, format="html"): - """Install an exception handler that formats tracebacks as HTML. - - The optional argument 'display' can be set to 0 to suppress sending the - traceback to the browser, and 'logdir' can be set to a directory to cause - tracebacks to be written to files there.""" - sys.excepthook = Hook(display=display, logdir=logdir, - context=context, format=format) diff --git a/Lib/chunk.py b/Lib/chunk.py deleted file mode 100644 index 618781efd11..00000000000 --- a/Lib/chunk.py +++ /dev/null @@ -1,173 +0,0 @@ -"""Simple class to read IFF chunks. - -An IFF chunk (used in formats such as AIFF, TIFF, RMFF (RealMedia File -Format)) has the following structure: - -+----------------+ -| ID (4 bytes) | -+----------------+ -| size (4 bytes) | -+----------------+ -| data | -| ... | -+----------------+ - -The ID is a 4-byte string which identifies the type of chunk. - -The size field (a 32-bit value, encoded using big-endian byte order) -gives the size of the whole chunk, including the 8-byte header. - -Usually an IFF-type file consists of one or more chunks. The proposed -usage of the Chunk class defined here is to instantiate an instance at -the start of each chunk and read from the instance until it reaches -the end, after which a new instance can be instantiated. At the end -of the file, creating a new instance will fail with an EOFError -exception. - -Usage: -while True: - try: - chunk = Chunk(file) - except EOFError: - break - chunktype = chunk.getname() - while True: - data = chunk.read(nbytes) - if not data: - pass - # do something with data - -The interface is file-like. The implemented methods are: -read, close, seek, tell, isatty. -Extra methods are: skip() (called by close, skips to the end of the chunk), -getname() (returns the name (ID) of the chunk) - -The __init__ method has one required argument, a file-like object -(including a chunk instance), and one optional argument, a flag which -specifies whether or not chunks are aligned on 2-byte boundaries. The -default is 1, i.e. aligned. -""" - -import warnings - -warnings._deprecated(__name__, remove=(3, 13)) - -class Chunk: - def __init__(self, file, align=True, bigendian=True, inclheader=False): - import struct - self.closed = False - self.align = align # whether to align to word (2-byte) boundaries - if bigendian: - strflag = '>' - else: - strflag = '<' - self.file = file - self.chunkname = file.read(4) - if len(self.chunkname) < 4: - raise EOFError - try: - self.chunksize = struct.unpack_from(strflag+'L', file.read(4))[0] - except struct.error: - raise EOFError from None - if inclheader: - self.chunksize = self.chunksize - 8 # subtract header - self.size_read = 0 - try: - self.offset = self.file.tell() - except (AttributeError, OSError): - self.seekable = False - else: - self.seekable = True - - def getname(self): - """Return the name (ID) of the current chunk.""" - return self.chunkname - - def getsize(self): - """Return the size of the current chunk.""" - return self.chunksize - - def close(self): - if not self.closed: - try: - self.skip() - finally: - self.closed = True - - def isatty(self): - if self.closed: - raise ValueError("I/O operation on closed file") - return False - - def seek(self, pos, whence=0): - """Seek to specified position into the chunk. - Default position is 0 (start of chunk). - If the file is not seekable, this will result in an error. - """ - - if self.closed: - raise ValueError("I/O operation on closed file") - if not self.seekable: - raise OSError("cannot seek") - if whence == 1: - pos = pos + self.size_read - elif whence == 2: - pos = pos + self.chunksize - if pos < 0 or pos > self.chunksize: - raise RuntimeError - self.file.seek(self.offset + pos, 0) - self.size_read = pos - - def tell(self): - if self.closed: - raise ValueError("I/O operation on closed file") - return self.size_read - - def read(self, size=-1): - """Read at most size bytes from the chunk. - If size is omitted or negative, read until the end - of the chunk. - """ - - if self.closed: - raise ValueError("I/O operation on closed file") - if self.size_read >= self.chunksize: - return b'' - if size < 0: - size = self.chunksize - self.size_read - if size > self.chunksize - self.size_read: - size = self.chunksize - self.size_read - data = self.file.read(size) - self.size_read = self.size_read + len(data) - if self.size_read == self.chunksize and \ - self.align and \ - (self.chunksize & 1): - dummy = self.file.read(1) - self.size_read = self.size_read + len(dummy) - return data - - def skip(self): - """Skip the rest of the chunk. - If you are not interested in the contents of the chunk, - this method should be called so that the file points to - the start of the next chunk. - """ - - if self.closed: - raise ValueError("I/O operation on closed file") - if self.seekable: - try: - n = self.chunksize - self.size_read - # maybe fix alignment - if self.align and (self.chunksize & 1): - n = n + 1 - self.file.seek(n, 1) - self.size_read = self.size_read + n - return - except OSError: - pass - while self.size_read < self.chunksize: - n = min(8192, self.chunksize - self.size_read) - dummy = self.read(n) - if not dummy: - raise EOFError diff --git a/Lib/cmd.py b/Lib/cmd.py index 88ee7d3ddc4..51495fb3216 100644 --- a/Lib/cmd.py +++ b/Lib/cmd.py @@ -5,16 +5,16 @@ 1. End of file on input is processed as the command 'EOF'. 2. A command is parsed out of each line by collecting the prefix composed of characters in the identchars member. -3. A command `foo' is dispatched to a method 'do_foo()'; the do_ method +3. A command 'foo' is dispatched to a method 'do_foo()'; the do_ method is passed a single argument consisting of the remainder of the line. 4. Typing an empty line repeats the last command. (Actually, it calls the - method `emptyline', which may be overridden in a subclass.) -5. There is a predefined `help' method. Given an argument `topic', it - calls the command `help_topic'. With no arguments, it lists all topics + method 'emptyline', which may be overridden in a subclass.) +5. There is a predefined 'help' method. Given an argument 'topic', it + calls the command 'help_topic'. With no arguments, it lists all topics with defined help_ functions, broken into up to three topics; documented commands, miscellaneous help topics, and undocumented commands. -6. The command '?' is a synonym for `help'. The command '!' is a synonym - for `shell', if a do_shell method exists. +6. The command '?' is a synonym for 'help'. The command '!' is a synonym + for 'shell', if a do_shell method exists. 7. If completion is enabled, completing commands will be done automatically, and completing of commands args is done by calling complete_foo() with arguments text, line, begidx, endidx. text is string we are matching @@ -23,31 +23,34 @@ indexes of the text being matched, which could be used to provide different completion depending upon which position the argument is in. -The `default' method may be overridden to intercept commands for which there +The 'default' method may be overridden to intercept commands for which there is no do_ method. -The `completedefault' method may be overridden to intercept completions for +The 'completedefault' method may be overridden to intercept completions for commands that have no complete_ method. -The data member `self.ruler' sets the character used to draw separator lines +The data member 'self.ruler' sets the character used to draw separator lines in the help messages. If empty, no ruler line is drawn. It defaults to "=". -If the value of `self.intro' is nonempty when the cmdloop method is called, +If the value of 'self.intro' is nonempty when the cmdloop method is called, it is printed out on interpreter startup. This value may be overridden via an optional argument to the cmdloop() method. -The data members `self.doc_header', `self.misc_header', and -`self.undoc_header' set the headers used for the help function's +The data members 'self.doc_header', 'self.misc_header', and +'self.undoc_header' set the headers used for the help function's listings of documented functions, miscellaneous topics, and undocumented functions respectively. """ -import string, sys +import sys __all__ = ["Cmd"] PROMPT = '(Cmd) ' -IDENTCHARS = string.ascii_letters + string.digits + '_' +IDENTCHARS = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' + 'abcdefghijklmnopqrstuvwxyz' + '0123456789' + '_') class Cmd: """A simple framework for writing line-oriented command interpreters. @@ -108,7 +111,15 @@ def cmdloop(self, intro=None): import readline self.old_completer = readline.get_completer() readline.set_completer(self.complete) - readline.parse_and_bind(self.completekey+": complete") + if readline.backend == "editline": + if self.completekey == 'tab': + # libedit uses "^I" instead of "tab" + command_string = "bind ^I rl_complete" + else: + command_string = f"bind {self.completekey} rl_complete" + else: + command_string = f"{self.completekey}: complete" + readline.parse_and_bind(command_string) except ImportError: pass try: @@ -210,9 +221,8 @@ def onecmd(self, line): if cmd == '': return self.default(line) else: - try: - func = getattr(self, 'do_' + cmd) - except AttributeError: + func = getattr(self, 'do_' + cmd, None) + if func is None: return self.default(line) return func(arg) @@ -263,7 +273,7 @@ def complete(self, text, state): endidx = readline.get_endidx() - stripped if begidx>0: cmd, args, foo = self.parseline(line) - if cmd == '': + if not cmd: compfunc = self.completedefault else: try: @@ -296,8 +306,11 @@ def do_help(self, arg): try: func = getattr(self, 'help_' + arg) except AttributeError: + from inspect import cleandoc + try: doc=getattr(self, 'do_' + arg).__doc__ + doc = cleandoc(doc) if doc: self.stdout.write("%s\n"%str(doc)) return diff --git a/Lib/code.py b/Lib/code.py index 2bd5fa3e795..b134886dc26 100644 --- a/Lib/code.py +++ b/Lib/code.py @@ -5,6 +5,7 @@ # Inspired by similar code by Jeff Epler and Fredrik Lundh. +import builtins import sys import traceback from codeop import CommandCompiler, compile_command @@ -24,10 +25,10 @@ class InteractiveInterpreter: def __init__(self, locals=None): """Constructor. - The optional 'locals' argument specifies the dictionary in - which code will be executed; it defaults to a newly created - dictionary with key "__name__" set to "__console__" and key - "__doc__" set to None. + The optional 'locals' argument specifies a mapping to use as the + namespace in which code will be executed; it defaults to a newly + created dictionary with key "__name__" set to "__console__" and + key "__doc__" set to None. """ if locals is None: @@ -63,7 +64,7 @@ def runsource(self, source, filename="", symbol="single"): code = self.compile(source, filename, symbol) except (OverflowError, SyntaxError, ValueError): # Case 1 - self.showsyntaxerror(filename) + self.showsyntaxerror(filename, source=source) return False if code is None: @@ -93,7 +94,7 @@ def runcode(self, code): except: self.showtraceback() - def showsyntaxerror(self, filename=None): + def showsyntaxerror(self, filename=None, **kwargs): """Display the syntax error that just occurred. This doesn't display a stack trace because there isn't one. @@ -105,29 +106,14 @@ def showsyntaxerror(self, filename=None): The output is written by self.write(), below. """ - type, value, tb = sys.exc_info() - sys.last_exc = value - sys.last_type = type - sys.last_value = value - sys.last_traceback = tb - if filename and type is SyntaxError: - # Work hard to stuff the correct filename in the exception - try: - msg, (dummy_filename, lineno, offset, line) = value.args - except ValueError: - # Not the format we expect; leave it alone - pass - else: - # Stuff in the right filename - value = SyntaxError(msg, (filename, lineno, offset, line)) - sys.last_exc = sys.last_value = value - if sys.excepthook is sys.__excepthook__: - lines = traceback.format_exception_only(type, value) - self.write(''.join(lines)) - else: - # If someone has set sys.excepthook, we let that take precedence - # over self.write - sys.excepthook(type, value, tb) + try: + typ, value, tb = sys.exc_info() + if filename and issubclass(typ, SyntaxError): + value.filename = filename + source = kwargs.pop('source', "") + self._showtraceback(typ, value, None, source) + finally: + typ = value = tb = None def showtraceback(self): """Display the exception that just occurred. @@ -137,19 +123,46 @@ def showtraceback(self): The output is written by self.write(), below. """ - sys.last_type, sys.last_value, last_tb = ei = sys.exc_info() - sys.last_traceback = last_tb - sys.last_exc = ei[1] try: - lines = traceback.format_exception(ei[0], ei[1], last_tb.tb_next) - if sys.excepthook is sys.__excepthook__: - self.write(''.join(lines)) - else: - # If someone has set sys.excepthook, we let that take precedence - # over self.write - sys.excepthook(ei[0], ei[1], last_tb) + typ, value, tb = sys.exc_info() + self._showtraceback(typ, value, tb.tb_next, "") finally: - last_tb = ei = None + typ = value = tb = None + + def _showtraceback(self, typ, value, tb, source): + sys.last_type = typ + sys.last_traceback = tb + value = value.with_traceback(tb) + # Set the line of text that the exception refers to + lines = source.splitlines() + if (source and typ is SyntaxError + and not value.text and value.lineno is not None + and len(lines) >= value.lineno): + value.text = lines[value.lineno - 1] + sys.last_exc = sys.last_value = value + if sys.excepthook is sys.__excepthook__: + self._excepthook(typ, value, tb) + else: + # If someone has set sys.excepthook, we let that take precedence + # over self.write + try: + sys.excepthook(typ, value, tb) + except SystemExit: + raise + except BaseException as e: + e.__context__ = None + e = e.with_traceback(e.__traceback__.tb_next) + print('Error in sys.excepthook:', file=sys.stderr) + sys.__excepthook__(type(e), e, e.__traceback__) + print(file=sys.stderr) + print('Original exception was:', file=sys.stderr) + sys.__excepthook__(typ, value, tb) + + def _excepthook(self, typ, value, tb): + # This method is being overwritten in + # _pyrepl.console.InteractiveColoredConsole + lines = traceback.format_exception(typ, value, tb) + self.write(''.join(lines)) def write(self, data): """Write a string. @@ -169,7 +182,7 @@ class InteractiveConsole(InteractiveInterpreter): """ - def __init__(self, locals=None, filename=""): + def __init__(self, locals=None, filename="", *, local_exit=False): """Constructor. The optional locals argument will be passed to the @@ -181,6 +194,7 @@ def __init__(self, locals=None, filename=""): """ InteractiveInterpreter.__init__(self, locals) self.filename = filename + self.local_exit = local_exit self.resetbuffer() def resetbuffer(self): @@ -205,12 +219,17 @@ def interact(self, banner=None, exitmsg=None): """ try: sys.ps1 + delete_ps1_after = False except AttributeError: sys.ps1 = ">>> " + delete_ps1_after = True try: - sys.ps2 + _ps2 = sys.ps2 + delete_ps2_after = False except AttributeError: sys.ps2 = "... " + delete_ps2_after = True + cprt = 'Type "help", "copyright", "credits" or "license" for more information.' if banner is None: self.write("Python %s on %s\n%s\n(%s)\n" % @@ -219,29 +238,72 @@ def interact(self, banner=None, exitmsg=None): elif banner: self.write("%s\n" % str(banner)) more = 0 - while 1: - try: - if more: - prompt = sys.ps2 - else: - prompt = sys.ps1 + + # When the user uses exit() or quit() in their interactive shell + # they probably just want to exit the created shell, not the whole + # process. exit and quit in builtins closes sys.stdin which makes + # it super difficult to restore + # + # When self.local_exit is True, we overwrite the builtins so + # exit() and quit() only raises SystemExit and we can catch that + # to only exit the interactive shell + + _exit = None + _quit = None + + if self.local_exit: + if hasattr(builtins, "exit"): + _exit = builtins.exit + builtins.exit = Quitter("exit") + + if hasattr(builtins, "quit"): + _quit = builtins.quit + builtins.quit = Quitter("quit") + + try: + while True: try: - line = self.raw_input(prompt) - except EOFError: - self.write("\n") - break - else: - more = self.push(line) - except KeyboardInterrupt: - self.write("\nKeyboardInterrupt\n") - self.resetbuffer() - more = 0 - if exitmsg is None: - self.write('now exiting %s...\n' % self.__class__.__name__) - elif exitmsg != '': - self.write('%s\n' % exitmsg) - - def push(self, line): + if more: + prompt = sys.ps2 + else: + prompt = sys.ps1 + try: + line = self.raw_input(prompt) + except EOFError: + self.write("\n") + break + else: + more = self.push(line) + except KeyboardInterrupt: + self.write("\nKeyboardInterrupt\n") + self.resetbuffer() + more = 0 + except SystemExit as e: + if self.local_exit: + self.write("\n") + break + else: + raise e + finally: + # restore exit and quit in builtins if they were modified + if _exit is not None: + builtins.exit = _exit + + if _quit is not None: + builtins.quit = _quit + + if delete_ps1_after: + del sys.ps1 + + if delete_ps2_after: + del sys.ps2 + + if exitmsg is None: + self.write('now exiting %s...\n' % self.__class__.__name__) + elif exitmsg != '': + self.write('%s\n' % exitmsg) + + def push(self, line, filename=None, _symbol="single"): """Push a line to the interpreter. The line should not have a trailing newline; it may have @@ -257,7 +319,9 @@ def push(self, line): """ self.buffer.append(line) source = "\n".join(self.buffer) - more = self.runsource(source, self.filename) + if filename is None: + filename = self.filename + more = self.runsource(source, filename, symbol=_symbol) if not more: self.resetbuffer() return more @@ -276,8 +340,22 @@ def raw_input(self, prompt=""): return input(prompt) +class Quitter: + def __init__(self, name): + self.name = name + if sys.platform == "win32": + self.eof = 'Ctrl-Z plus Return' + else: + self.eof = 'Ctrl-D (i.e. EOF)' + + def __repr__(self): + return f'Use {self.name} or {self.eof} to exit' + + def __call__(self, code=None): + raise SystemExit(code) + -def interact(banner=None, readfunc=None, local=None, exitmsg=None): +def interact(banner=None, readfunc=None, local=None, exitmsg=None, local_exit=False): """Closely emulate the interactive Python interpreter. This is a backwards compatible interface to the InteractiveConsole @@ -290,14 +368,15 @@ def interact(banner=None, readfunc=None, local=None, exitmsg=None): readfunc -- if not None, replaces InteractiveConsole.raw_input() local -- passed to InteractiveInterpreter.__init__() exitmsg -- passed to InteractiveConsole.interact() + local_exit -- passed to InteractiveConsole.__init__() """ - console = InteractiveConsole(local) + console = InteractiveConsole(local, local_exit=local_exit) if readfunc is not None: console.raw_input = readfunc else: try: - import readline + import readline # noqa: F401 except ImportError: pass console.interact(banner, exitmsg) @@ -306,7 +385,7 @@ def interact(banner=None, readfunc=None, local=None, exitmsg=None): if __name__ == "__main__": import argparse - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(color=True) parser.add_argument('-q', action='store_true', help="don't print version and copyright messages") args = parser.parse_args() diff --git a/Lib/codecs.py b/Lib/codecs.py index 82f23983e71..e4a8010aba9 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -111,6 +111,9 @@ def __repr__(self): (self.__class__.__module__, self.__class__.__qualname__, self.name, id(self)) + def __getnewargs__(self): + return tuple(self) + class Codec: """ Defines the interface for stateless encoders/decoders. @@ -615,7 +618,7 @@ def readlines(self, sizehint=None, keepends=True): method and are included in the list entries. sizehint, if given, is ignored since there is no efficient - way to finding the true end-of-line. + way of finding the true end-of-line. """ data = self.read() @@ -706,13 +709,13 @@ def read(self, size=-1): return self.reader.read(size) - def readline(self, size=None): + def readline(self, size=None, keepends=True): - return self.reader.readline(size) + return self.reader.readline(size, keepends) - def readlines(self, sizehint=None): + def readlines(self, sizehint=None, keepends=True): - return self.reader.readlines(sizehint) + return self.reader.readlines(sizehint, keepends) def __next__(self): @@ -881,7 +884,6 @@ def __reduce_ex__(self, proto): ### Shortcuts def open(filename, mode='r', encoding=None, errors='strict', buffering=-1): - """ Open an encoded file using the given mode and return a wrapped version providing transparent encoding/decoding. @@ -909,8 +911,11 @@ def open(filename, mode='r', encoding=None, errors='strict', buffering=-1): .encoding which allows querying the used encoding. This attribute is only available if an encoding was specified as parameter. - """ + import warnings + warnings.warn("codecs.open() is deprecated. Use open() instead.", + DeprecationWarning, stacklevel=2) + if encoding is not None and \ 'b' not in mode: # Force opening of the file in binary mode @@ -1106,24 +1111,15 @@ def make_encoding_map(decoding_map): ### error handlers -try: - strict_errors = lookup_error("strict") - ignore_errors = lookup_error("ignore") - replace_errors = lookup_error("replace") - xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace") - backslashreplace_errors = lookup_error("backslashreplace") - namereplace_errors = lookup_error("namereplace") -except LookupError: - # In --disable-unicode builds, these error handler are missing - strict_errors = None - ignore_errors = None - replace_errors = None - xmlcharrefreplace_errors = None - backslashreplace_errors = None - namereplace_errors = None +strict_errors = lookup_error("strict") +ignore_errors = lookup_error("ignore") +replace_errors = lookup_error("replace") +xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace") +backslashreplace_errors = lookup_error("backslashreplace") +namereplace_errors = lookup_error("namereplace") # Tell modulefinder that using codecs probably needs the encodings # package _false = 0 if _false: - import encodings + import encodings # noqa: F401 diff --git a/Lib/codeop.py b/Lib/codeop.py index 4dd096574bb..8cac00442d9 100644 --- a/Lib/codeop.py +++ b/Lib/codeop.py @@ -44,9 +44,10 @@ # Caveat emptor: These flags are undocumented on purpose and depending # on their effect outside the standard library is **unsupported**. PyCF_DONT_IMPLY_DEDENT = 0x200 +PyCF_ONLY_AST = 0x400 PyCF_ALLOW_INCOMPLETE_INPUT = 0x4000 -def _maybe_compile(compiler, source, filename, symbol): +def _maybe_compile(compiler, source, filename, symbol, flags): # Check for source consisting of only blank lines and comments. for line in source.split("\n"): line = line.strip() @@ -60,36 +61,26 @@ def _maybe_compile(compiler, source, filename, symbol): with warnings.catch_warnings(): warnings.simplefilter("ignore", (SyntaxWarning, DeprecationWarning)) try: - compiler(source, filename, symbol) + compiler(source, filename, symbol, flags=flags) except SyntaxError: # Let other compile() errors propagate. try: - compiler(source + "\n", filename, symbol) + compiler(source + "\n", filename, symbol, flags=flags) + return None + except _IncompleteInputError as e: return None except SyntaxError as e: - if "incomplete input" in str(e): - return None + pass # fallthrough return compiler(source, filename, symbol, incomplete_input=False) -def _is_syntax_error(err1, err2): - rep1 = repr(err1) - rep2 = repr(err2) - if "was never closed" in rep1 and "was never closed" in rep2: - return False - if rep1 == rep2: - return True - return False - -def _compile(source, filename, symbol, incomplete_input=True): - flags = 0 +def _compile(source, filename, symbol, incomplete_input=True, *, flags=0): if incomplete_input: flags |= PyCF_ALLOW_INCOMPLETE_INPUT flags |= PyCF_DONT_IMPLY_DEDENT return compile(source, filename, symbol, flags) - -def compile_command(source, filename="", symbol="single"): +def compile_command(source, filename="", symbol="single", flags=0): r"""Compile a command and determine whether it is incomplete. Arguments: @@ -108,7 +99,7 @@ def compile_command(source, filename="", symbol="single"): syntax error (OverflowError and ValueError can be produced by malformed literals). """ - return _maybe_compile(_compile, source, filename, symbol) + return _maybe_compile(_compile, source, filename, symbol, flags) class Compile: """Instances of this class behave much like the built-in compile @@ -118,12 +109,14 @@ class Compile: def __init__(self): self.flags = PyCF_DONT_IMPLY_DEDENT | PyCF_ALLOW_INCOMPLETE_INPUT - def __call__(self, source, filename, symbol, **kwargs): - flags = self.flags + def __call__(self, source, filename, symbol, flags=0, **kwargs): + flags |= self.flags if kwargs.get('incomplete_input', True) is False: flags &= ~PyCF_DONT_IMPLY_DEDENT flags &= ~PyCF_ALLOW_INCOMPLETE_INPUT codeob = compile(source, filename, symbol, flags, True) + if flags & PyCF_ONLY_AST: + return codeob # this is an ast.Module in this case for feature in _features: if codeob.co_flags & feature.compiler_flag: self.flags |= feature.compiler_flag @@ -158,4 +151,4 @@ def __call__(self, source, filename="", symbol="single"): syntax error (OverflowError and ValueError can be produced by malformed literals). """ - return _maybe_compile(self.compiler, source, filename, symbol) + return _maybe_compile(self.compiler, source, filename, symbol, flags=self.compiler.flags) diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index f7348ee918d..3d3bbd7a39a 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -29,6 +29,9 @@ import _collections_abc import sys as _sys +_sys.modules['collections.abc'] = _collections_abc +abc = _collections_abc + from itertools import chain as _chain from itertools import repeat as _repeat from itertools import starmap as _starmap @@ -46,19 +49,19 @@ _collections_abc.MutableSequence.register(deque) try: - from _collections import _deque_iterator + # Expose _deque_iterator to support pickling deque iterators + from _collections import _deque_iterator # noqa: F401 except ImportError: pass try: from _collections import defaultdict except ImportError: - # FIXME: try to implement defaultdict in collections.rs rather than in Python - # I (coolreader18) couldn't figure out some class stuff with __new__ and - # __init__ and __missing__ and subclassing built-in types from Rust, so I went - # with this instead. + # TODO: RUSTPYTHON - implement defaultdict in Rust from ._defaultdict import defaultdict +heapq = None # Lazily imported + ################################################################################ ### OrderedDict @@ -461,7 +464,7 @@ def _make(cls, iterable): def _replace(self, /, **kwds): result = self._make(_map(kwds.pop, field_names, self)) if kwds: - raise ValueError(f'Got unexpected field names: {list(kwds)!r}') + raise TypeError(f'Got unexpected field names: {list(kwds)!r}') return result _replace.__doc__ = (f'Return a new {typename} object replacing specified ' @@ -499,6 +502,7 @@ def __getnewargs__(self): '_field_defaults': field_defaults, '__new__': __new__, '_make': _make, + '__replace__': _replace, '_replace': _replace, '__repr__': __repr__, '_asdict': _asdict, @@ -592,7 +596,7 @@ class Counter(dict): # References: # http://en.wikipedia.org/wiki/Multiset # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html - # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm + # http://www.java2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm # http://code.activestate.com/recipes/259174/ # Knuth, TAOCP Vol. II section 4.6.3 @@ -632,7 +636,10 @@ def most_common(self, n=None): return sorted(self.items(), key=_itemgetter(1), reverse=True) # Lazy import to speedup Python startup time - import heapq + global heapq + if heapq is None: + import heapq + return heapq.nlargest(n, self.items(), key=_itemgetter(1)) def elements(self): @@ -642,7 +649,8 @@ def elements(self): >>> sorted(c.elements()) ['A', 'A', 'B', 'B', 'C', 'C'] - # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1 + Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1 + >>> import math >>> prime_factors = Counter({2: 2, 3: 3, 17: 1}) >>> math.prod(prime_factors.elements()) @@ -683,7 +691,7 @@ def update(self, iterable=None, /, **kwds): ''' # The regular dict.update() operation makes no sense here because the - # replace behavior results in the some of original untouched counts + # replace behavior results in some of the original untouched counts # being mixed-in with all of the other counts for a mismash that # doesn't have a straight-forward interpretation in most counting # contexts. Instead, we implement straight-addition. Both the inputs @@ -1018,7 +1026,7 @@ def __getitem__(self, key): return self.__missing__(key) # support subclasses that define __missing__ def get(self, key, default=None): - return self[key] if key in self else default + return self[key] if key in self else default # needs to make use of __contains__ def __len__(self): return len(set().union(*self.maps)) # reuses stored hash values if possible @@ -1030,7 +1038,10 @@ def __iter__(self): return iter(d) def __contains__(self, key): - return any(key in m for m in self.maps) + for mapping in self.maps: + if key in mapping: + return True + return False def __bool__(self): return any(self.maps) @@ -1040,9 +1051,9 @@ def __repr__(self): return f'{self.__class__.__name__}({", ".join(map(repr, self.maps))})' @classmethod - def fromkeys(cls, iterable, *args): - 'Create a ChainMap with a single dict created from the iterable.' - return cls(dict.fromkeys(iterable, *args)) + def fromkeys(cls, iterable, value=None, /): + 'Create a new ChainMap with keys from iterable and values set to value.' + return cls(dict.fromkeys(iterable, value)) def copy(self): 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]' @@ -1485,6 +1496,8 @@ def format_map(self, mapping): return self.data.format_map(mapping) def index(self, sub, start=0, end=_sys.maxsize): + if isinstance(sub, UserString): + sub = sub.data return self.data.index(sub, start, end) def isalpha(self): @@ -1553,6 +1566,8 @@ def rfind(self, sub, start=0, end=_sys.maxsize): return self.data.rfind(sub, start, end) def rindex(self, sub, start=0, end=_sys.maxsize): + if isinstance(sub, UserString): + sub = sub.data return self.data.rindex(sub, start, end) def rjust(self, width, *args): diff --git a/Lib/collections/_defaultdict.py b/Lib/collections/_defaultdict.py index b9c6c496139..cb9d403c8ad 100644 --- a/Lib/collections/_defaultdict.py +++ b/Lib/collections/_defaultdict.py @@ -17,6 +17,10 @@ def __missing__(self, key): val = self.default_factory() else: raise KeyError(key) + # CPython parity: a recursive __missing__ via factory() may have + # already populated key; preserve that value instead of overwriting. + if key in self: + return self[key] self[key] = val return val diff --git a/Lib/collections/abc.py b/Lib/collections/abc.py deleted file mode 100644 index 86ca8b8a841..00000000000 --- a/Lib/collections/abc.py +++ /dev/null @@ -1,3 +0,0 @@ -from _collections_abc import * -from _collections_abc import __all__ -from _collections_abc import _CallableGenericAlias diff --git a/Lib/colorsys.py b/Lib/colorsys.py index bc897bd0f99..e97f91718a3 100644 --- a/Lib/colorsys.py +++ b/Lib/colorsys.py @@ -24,7 +24,7 @@ __all__ = ["rgb_to_yiq","yiq_to_rgb","rgb_to_hls","hls_to_rgb", "rgb_to_hsv","hsv_to_rgb"] -# Some floating point constants +# Some floating-point constants ONE_THIRD = 1.0/3.0 ONE_SIXTH = 1.0/6.0 diff --git a/Lib/compileall.py b/Lib/compileall.py index a388931fb5a..67fe370451e 100644 --- a/Lib/compileall.py +++ b/Lib/compileall.py @@ -97,9 +97,15 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False, files = _walk_dir(dir, quiet=quiet, maxlevels=maxlevels) success = True if workers != 1 and ProcessPoolExecutor is not None: + import multiprocessing + if multiprocessing.get_start_method() == 'fork': + mp_context = multiprocessing.get_context('forkserver') + else: + mp_context = None # If workers == 0, let ProcessPoolExecutor choose workers = workers or None - with ProcessPoolExecutor(max_workers=workers) as executor: + with ProcessPoolExecutor(max_workers=workers, + mp_context=mp_context) as executor: results = executor.map(partial(compile_file, ddir=ddir, force=force, rx=rx, quiet=quiet, @@ -110,7 +116,8 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False, prependdir=prependdir, limit_sl_dest=limit_sl_dest, hardlink_dupes=hardlink_dupes), - files) + files, + chunksize=4) success = min(results, default=True) else: for file in files: @@ -166,13 +173,13 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, if stripdir is not None: fullname_parts = fullname.split(os.path.sep) stripdir_parts = stripdir.split(os.path.sep) - ddir_parts = list(fullname_parts) - - for spart, opart in zip(stripdir_parts, fullname_parts): - if spart == opart: - ddir_parts.remove(spart) - dfile = os.path.join(*ddir_parts) + if stripdir_parts != fullname_parts[:len(stripdir_parts)]: + if quiet < 2: + print("The stripdir path {!r} is not a valid prefix for " + "source path {!r}; ignoring".format(stripdir, fullname)) + else: + dfile = os.path.join(*fullname_parts[len(stripdir_parts):]) if prependdir is not None: if dfile is None: @@ -310,7 +317,9 @@ def main(): import argparse parser = argparse.ArgumentParser( - description='Utilities to support installing Python libraries.') + description='Utilities to support installing Python libraries.', + color=True, + ) parser.add_argument('-l', action='store_const', const=0, default=None, dest='maxlevels', help="don't recurse into subdirectories") diff --git a/Lib/compression/__init__.py b/Lib/compression/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/Lib/compression/_common/__init__.py b/Lib/compression/_common/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/Lib/compression/_common/_streams.py b/Lib/compression/_common/_streams.py new file mode 100644 index 00000000000..9f367d4e304 --- /dev/null +++ b/Lib/compression/_common/_streams.py @@ -0,0 +1,162 @@ +"""Internal classes used by compression modules""" + +import io +import sys + +BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE # Compressed data read chunk size + + +class BaseStream(io.BufferedIOBase): + """Mode-checking helper functions.""" + + def _check_not_closed(self): + if self.closed: + raise ValueError("I/O operation on closed file") + + def _check_can_read(self): + if not self.readable(): + raise io.UnsupportedOperation("File not open for reading") + + def _check_can_write(self): + if not self.writable(): + raise io.UnsupportedOperation("File not open for writing") + + def _check_can_seek(self): + if not self.readable(): + raise io.UnsupportedOperation("Seeking is only supported " + "on files open for reading") + if not self.seekable(): + raise io.UnsupportedOperation("The underlying file object " + "does not support seeking") + + +class DecompressReader(io.RawIOBase): + """Adapts the decompressor API to a RawIOBase reader API""" + + def readable(self): + return True + + def __init__(self, fp, decomp_factory, trailing_error=(), **decomp_args): + self._fp = fp + self._eof = False + self._pos = 0 # Current offset in decompressed stream + + # Set to size of decompressed stream once it is known, for SEEK_END + self._size = -1 + + # Save the decompressor factory and arguments. + # If the file contains multiple compressed streams, each + # stream will need a separate decompressor object. A new decompressor + # object is also needed when implementing a backwards seek(). + self._decomp_factory = decomp_factory + self._decomp_args = decomp_args + self._decompressor = self._decomp_factory(**self._decomp_args) + + # Exception class to catch from decompressor signifying invalid + # trailing data to ignore + self._trailing_error = trailing_error + + def close(self): + self._decompressor = None + return super().close() + + def seekable(self): + return self._fp.seekable() + + def readinto(self, b): + with memoryview(b) as view, view.cast("B") as byte_view: + data = self.read(len(byte_view)) + byte_view[:len(data)] = data + return len(data) + + def read(self, size=-1): + if size < 0: + return self.readall() + + if not size or self._eof: + return b"" + data = None # Default if EOF is encountered + # Depending on the input data, our call to the decompressor may not + # return any data. In this case, try again after reading another block. + while True: + if self._decompressor.eof: + rawblock = (self._decompressor.unused_data or + self._fp.read(BUFFER_SIZE)) + if not rawblock: + break + # Continue to next stream. + self._decompressor = self._decomp_factory( + **self._decomp_args) + try: + data = self._decompressor.decompress(rawblock, size) + except self._trailing_error: + # Trailing data isn't a valid compressed stream; ignore it. + break + else: + if self._decompressor.needs_input: + rawblock = self._fp.read(BUFFER_SIZE) + if not rawblock: + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") + else: + rawblock = b"" + data = self._decompressor.decompress(rawblock, size) + if data: + break + if not data: + self._eof = True + self._size = self._pos + return b"" + self._pos += len(data) + return data + + def readall(self): + chunks = [] + # sys.maxsize means the max length of output buffer is unlimited, + # so that the whole input buffer can be decompressed within one + # .decompress() call. + while data := self.read(sys.maxsize): + chunks.append(data) + + return b"".join(chunks) + + # Rewind the file to the beginning of the data stream. + def _rewind(self): + self._fp.seek(0) + self._eof = False + self._pos = 0 + self._decompressor = self._decomp_factory(**self._decomp_args) + + def seek(self, offset, whence=io.SEEK_SET): + # Recalculate offset as an absolute file position. + if whence == io.SEEK_SET: + pass + elif whence == io.SEEK_CUR: + offset = self._pos + offset + elif whence == io.SEEK_END: + # Seeking relative to EOF - we need to know the file's size. + if self._size < 0: + while self.read(io.DEFAULT_BUFFER_SIZE): + pass + offset = self._size + offset + else: + raise ValueError("Invalid value for whence: {}".format(whence)) + + # Make it so that offset is the number of bytes to skip forward. + if offset < self._pos: + self._rewind() + else: + offset -= self._pos + + # Read and discard data until we reach the desired position. + while offset > 0: + data = self.read(min(io.DEFAULT_BUFFER_SIZE, offset)) + if not data: + break + offset -= len(data) + + return self._pos + + def tell(self): + """Return the current file position.""" + return self._pos diff --git a/Lib/compression/bz2.py b/Lib/compression/bz2.py new file mode 100644 index 00000000000..16815d6cd20 --- /dev/null +++ b/Lib/compression/bz2.py @@ -0,0 +1,5 @@ +import bz2 +__doc__ = bz2.__doc__ +del bz2 + +from bz2 import * diff --git a/Lib/compression/gzip.py b/Lib/compression/gzip.py new file mode 100644 index 00000000000..552f48f948a --- /dev/null +++ b/Lib/compression/gzip.py @@ -0,0 +1,5 @@ +import gzip +__doc__ = gzip.__doc__ +del gzip + +from gzip import * diff --git a/Lib/compression/lzma.py b/Lib/compression/lzma.py new file mode 100644 index 00000000000..b4bc7ccb1db --- /dev/null +++ b/Lib/compression/lzma.py @@ -0,0 +1,5 @@ +import lzma +__doc__ = lzma.__doc__ +del lzma + +from lzma import * diff --git a/Lib/compression/zlib.py b/Lib/compression/zlib.py new file mode 100644 index 00000000000..3aa7e2db90e --- /dev/null +++ b/Lib/compression/zlib.py @@ -0,0 +1,5 @@ +import zlib +__doc__ = zlib.__doc__ +del zlib + +from zlib import * diff --git a/Lib/compression/zstd/__init__.py b/Lib/compression/zstd/__init__.py new file mode 100644 index 00000000000..5326cf9b19c --- /dev/null +++ b/Lib/compression/zstd/__init__.py @@ -0,0 +1,244 @@ +"""Python bindings to the Zstandard (zstd) compression library (RFC-8878).""" + +__all__ = ( + # compression.zstd + 'COMPRESSION_LEVEL_DEFAULT', + 'compress', + 'CompressionParameter', + 'decompress', + 'DecompressionParameter', + 'finalize_dict', + 'get_frame_info', + 'Strategy', + 'train_dict', + + # compression.zstd._zstdfile + 'open', + 'ZstdFile', + + # _zstd + 'get_frame_size', + 'zstd_version', + 'zstd_version_info', + 'ZstdCompressor', + 'ZstdDecompressor', + 'ZstdDict', + 'ZstdError', +) + +import _zstd +import enum +from _zstd import (ZstdCompressor, ZstdDecompressor, ZstdDict, ZstdError, + get_frame_size, zstd_version) +from compression.zstd._zstdfile import ZstdFile, open, _nbytes + +# zstd_version_number is (MAJOR * 100 * 100 + MINOR * 100 + RELEASE) +zstd_version_info = (*divmod(_zstd.zstd_version_number // 100, 100), + _zstd.zstd_version_number % 100) +"""Version number of the runtime zstd library as a tuple of integers.""" + +COMPRESSION_LEVEL_DEFAULT = _zstd.ZSTD_CLEVEL_DEFAULT +"""The default compression level for Zstandard, currently '3'.""" + + +class FrameInfo: + """Information about a Zstandard frame.""" + + __slots__ = 'decompressed_size', 'dictionary_id' + + def __init__(self, decompressed_size, dictionary_id): + super().__setattr__('decompressed_size', decompressed_size) + super().__setattr__('dictionary_id', dictionary_id) + + def __repr__(self): + return (f'FrameInfo(decompressed_size={self.decompressed_size}, ' + f'dictionary_id={self.dictionary_id})') + + def __setattr__(self, name, _): + raise AttributeError(f"can't set attribute {name!r}") + + +def get_frame_info(frame_buffer): + """Get Zstandard frame information from a frame header. + + *frame_buffer* is a bytes-like object. It should start from the + beginning of a frame, and needs to include at least the frame header + (6 to 18 bytes). + + The returned FrameInfo object has two attributes. + 'decompressed_size' is the size in bytes of the data in the frame when + decompressed, or None when the decompressed size is unknown. + 'dictionary_id' is an int in the range (0, 2**32). The special value 0 + means that the dictionary ID was not recorded in the frame header, + the frame may or may not need a dictionary to be decoded, + and the ID of such a dictionary is not specified. + """ + return FrameInfo(*_zstd.get_frame_info(frame_buffer)) + + +def train_dict(samples, dict_size): + """Return a ZstdDict representing a trained Zstandard dictionary. + + *samples* is an iterable of samples, where a sample is a bytes-like + object representing a file. + + *dict_size* is the dictionary's maximum size, in bytes. + """ + if not isinstance(dict_size, int): + ds_cls = type(dict_size).__qualname__ + raise TypeError(f'dict_size must be an int object, not {ds_cls!r}.') + + samples = tuple(samples) + chunks = b''.join(samples) + chunk_sizes = tuple(_nbytes(sample) for sample in samples) + if not chunks: + raise ValueError("samples contained no data; can't train dictionary.") + dict_content = _zstd.train_dict(chunks, chunk_sizes, dict_size) + return ZstdDict(dict_content) + + +def finalize_dict(zstd_dict, /, samples, dict_size, level): + """Return a ZstdDict representing a finalized Zstandard dictionary. + + Given a custom content as a basis for dictionary, and a set of samples, + finalize *zstd_dict* by adding headers and statistics according to the + Zstandard dictionary format. + + You may compose an effective dictionary content by hand, which is used + as basis dictionary, and use some samples to finalize a dictionary. The + basis dictionary may be a "raw content" dictionary. See *is_raw* in + ZstdDict. + + *samples* is an iterable of samples, where a sample is a bytes-like + object representing a file. + *dict_size* is the dictionary's maximum size, in bytes. + *level* is the expected compression level. The statistics for each + compression level differ, so tuning the dictionary to the compression + level can provide improvements. + """ + + if not isinstance(zstd_dict, ZstdDict): + raise TypeError('zstd_dict argument should be a ZstdDict object.') + if not isinstance(dict_size, int): + raise TypeError('dict_size argument should be an int object.') + if not isinstance(level, int): + raise TypeError('level argument should be an int object.') + + samples = tuple(samples) + chunks = b''.join(samples) + chunk_sizes = tuple(_nbytes(sample) for sample in samples) + if not chunks: + raise ValueError("The samples are empty content, can't finalize the " + "dictionary.") + dict_content = _zstd.finalize_dict(zstd_dict.dict_content, chunks, + chunk_sizes, dict_size, level) + return ZstdDict(dict_content) + + +def compress(data, level=None, options=None, zstd_dict=None): + """Return Zstandard compressed *data* as bytes. + + *level* is an int specifying the compression level to use, defaulting to + COMPRESSION_LEVEL_DEFAULT ('3'). + *options* is a dict object that contains advanced compression + parameters. See CompressionParameter for more on options. + *zstd_dict* is a ZstdDict object, a pre-trained Zstandard dictionary. + See the function train_dict for how to train a ZstdDict on sample data. + + For incremental compression, use a ZstdCompressor instead. + """ + comp = ZstdCompressor(level=level, options=options, zstd_dict=zstd_dict) + return comp.compress(data, mode=ZstdCompressor.FLUSH_FRAME) + + +def decompress(data, zstd_dict=None, options=None): + """Decompress one or more frames of Zstandard compressed *data*. + + *zstd_dict* is a ZstdDict object, a pre-trained Zstandard dictionary. + See the function train_dict for how to train a ZstdDict on sample data. + *options* is a dict object that contains advanced compression + parameters. See DecompressionParameter for more on options. + + For incremental decompression, use a ZstdDecompressor instead. + """ + results = [] + while True: + decomp = ZstdDecompressor(options=options, zstd_dict=zstd_dict) + results.append(decomp.decompress(data)) + if not decomp.eof: + raise ZstdError('Compressed data ended before the ' + 'end-of-stream marker was reached') + data = decomp.unused_data + if not data: + break + return b''.join(results) + + +class CompressionParameter(enum.IntEnum): + """Compression parameters.""" + + compression_level = _zstd.ZSTD_c_compressionLevel + window_log = _zstd.ZSTD_c_windowLog + hash_log = _zstd.ZSTD_c_hashLog + chain_log = _zstd.ZSTD_c_chainLog + search_log = _zstd.ZSTD_c_searchLog + min_match = _zstd.ZSTD_c_minMatch + target_length = _zstd.ZSTD_c_targetLength + strategy = _zstd.ZSTD_c_strategy + + enable_long_distance_matching = _zstd.ZSTD_c_enableLongDistanceMatching + ldm_hash_log = _zstd.ZSTD_c_ldmHashLog + ldm_min_match = _zstd.ZSTD_c_ldmMinMatch + ldm_bucket_size_log = _zstd.ZSTD_c_ldmBucketSizeLog + ldm_hash_rate_log = _zstd.ZSTD_c_ldmHashRateLog + + content_size_flag = _zstd.ZSTD_c_contentSizeFlag + checksum_flag = _zstd.ZSTD_c_checksumFlag + dict_id_flag = _zstd.ZSTD_c_dictIDFlag + + nb_workers = _zstd.ZSTD_c_nbWorkers + job_size = _zstd.ZSTD_c_jobSize + overlap_log = _zstd.ZSTD_c_overlapLog + + def bounds(self): + """Return the (lower, upper) int bounds of a compression parameter. + + Both the lower and upper bounds are inclusive. + """ + return _zstd.get_param_bounds(self.value, is_compress=True) + + +class DecompressionParameter(enum.IntEnum): + """Decompression parameters.""" + + window_log_max = _zstd.ZSTD_d_windowLogMax + + def bounds(self): + """Return the (lower, upper) int bounds of a decompression parameter. + + Both the lower and upper bounds are inclusive. + """ + return _zstd.get_param_bounds(self.value, is_compress=False) + + +class Strategy(enum.IntEnum): + """Compression strategies, listed from fastest to strongest. + + Note that new strategies might be added in the future. + Only the order (from fast to strong) is guaranteed, + the numeric value might change. + """ + + fast = _zstd.ZSTD_fast + dfast = _zstd.ZSTD_dfast + greedy = _zstd.ZSTD_greedy + lazy = _zstd.ZSTD_lazy + lazy2 = _zstd.ZSTD_lazy2 + btlazy2 = _zstd.ZSTD_btlazy2 + btopt = _zstd.ZSTD_btopt + btultra = _zstd.ZSTD_btultra + btultra2 = _zstd.ZSTD_btultra2 + + +# Check validity of the CompressionParameter & DecompressionParameter types +_zstd.set_parameter_types(CompressionParameter, DecompressionParameter) diff --git a/Lib/compression/zstd/_zstdfile.py b/Lib/compression/zstd/_zstdfile.py new file mode 100644 index 00000000000..8d3358152e9 --- /dev/null +++ b/Lib/compression/zstd/_zstdfile.py @@ -0,0 +1,346 @@ +import io +from os import PathLike +from _zstd import ZstdCompressor, ZstdDecompressor, ZSTD_DStreamOutSize +from compression._common import _streams + +__all__ = ('ZstdFile', 'open') + +_MODE_CLOSED = 0 +_MODE_READ = 1 +_MODE_WRITE = 2 + + +def _nbytes(dat, /): + if isinstance(dat, (bytes, bytearray)): + return len(dat) + with memoryview(dat) as mv: + return mv.nbytes + + +class ZstdFile(_streams.BaseStream): + """A file-like object providing transparent Zstandard (de)compression. + + A ZstdFile can act as a wrapper for an existing file object, or refer + directly to a named file on disk. + + ZstdFile provides a *binary* file interface. Data is read and returned as + bytes, and may only be written to objects that support the Buffer Protocol. + """ + + FLUSH_BLOCK = ZstdCompressor.FLUSH_BLOCK + FLUSH_FRAME = ZstdCompressor.FLUSH_FRAME + + def __init__(self, file, /, mode='r', *, + level=None, options=None, zstd_dict=None): + """Open a Zstandard compressed file in binary mode. + + *file* can be either an file-like object, or a file name to open. + + *mode* can be 'r' for reading (default), 'w' for (over)writing, 'x' + for creating exclusively, or 'a' for appending. These can + equivalently be given as 'rb', 'wb', 'xb' and 'ab' respectively. + + *level* is an optional int specifying the compression level to use, + or COMPRESSION_LEVEL_DEFAULT if not given. + + *options* is an optional dict for advanced compression parameters. + See CompressionParameter and DecompressionParameter for the possible + options. + + *zstd_dict* is an optional ZstdDict object, a pre-trained Zstandard + dictionary. See train_dict() to train ZstdDict on sample data. + """ + self._fp = None + self._close_fp = False + self._mode = _MODE_CLOSED + self._buffer = None + + if not isinstance(mode, str): + raise ValueError('mode must be a str') + if options is not None and not isinstance(options, dict): + raise TypeError('options must be a dict or None') + mode = mode.removesuffix('b') # handle rb, wb, xb, ab + if mode == 'r': + if level is not None: + raise TypeError('level is illegal in read mode') + self._mode = _MODE_READ + elif mode in {'w', 'a', 'x'}: + if level is not None and not isinstance(level, int): + raise TypeError('level must be int or None') + self._mode = _MODE_WRITE + self._compressor = ZstdCompressor(level=level, options=options, + zstd_dict=zstd_dict) + self._pos = 0 + else: + raise ValueError(f'Invalid mode: {mode!r}') + + if isinstance(file, (str, bytes, PathLike)): + self._fp = io.open(file, f'{mode}b') + self._close_fp = True + elif ((mode == 'r' and hasattr(file, 'read')) + or (mode != 'r' and hasattr(file, 'write'))): + self._fp = file + else: + raise TypeError('file must be a file-like object ' + 'or a str, bytes, or PathLike object') + + if self._mode == _MODE_READ: + raw = _streams.DecompressReader( + self._fp, + ZstdDecompressor, + zstd_dict=zstd_dict, + options=options, + ) + self._buffer = io.BufferedReader(raw) + + def close(self): + """Flush and close the file. + + May be called multiple times. Once the file has been closed, + any other operation on it will raise ValueError. + """ + if self._fp is None: + return + try: + if self._mode == _MODE_READ: + if getattr(self, '_buffer', None): + self._buffer.close() + self._buffer = None + elif self._mode == _MODE_WRITE: + self.flush(self.FLUSH_FRAME) + self._compressor = None + finally: + self._mode = _MODE_CLOSED + try: + if self._close_fp: + self._fp.close() + finally: + self._fp = None + self._close_fp = False + + def write(self, data, /): + """Write a bytes-like object *data* to the file. + + Returns the number of uncompressed bytes written, which is + always the length of data in bytes. Note that due to buffering, + the file on disk may not reflect the data written until .flush() + or .close() is called. + """ + self._check_can_write() + + length = _nbytes(data) + + compressed = self._compressor.compress(data) + self._fp.write(compressed) + self._pos += length + return length + + def flush(self, mode=FLUSH_BLOCK): + """Flush remaining data to the underlying stream. + + The mode argument can be FLUSH_BLOCK or FLUSH_FRAME. Abuse of this + method will reduce compression ratio, use it only when necessary. + + If the program is interrupted afterwards, all data can be recovered. + To ensure saving to disk, also need to use os.fsync(fd). + + This method does nothing in reading mode. + """ + if self._mode == _MODE_READ: + return + self._check_not_closed() + if mode not in {self.FLUSH_BLOCK, self.FLUSH_FRAME}: + raise ValueError('Invalid mode argument, expected either ' + 'ZstdFile.FLUSH_FRAME or ' + 'ZstdFile.FLUSH_BLOCK') + if self._compressor.last_mode == mode: + return + # Flush zstd block/frame, and write. + data = self._compressor.flush(mode) + self._fp.write(data) + if hasattr(self._fp, 'flush'): + self._fp.flush() + + def read(self, size=-1): + """Read up to size uncompressed bytes from the file. + + If size is negative or omitted, read until EOF is reached. + Returns b'' if the file is already at EOF. + """ + if size is None: + size = -1 + self._check_can_read() + return self._buffer.read(size) + + def read1(self, size=-1): + """Read up to size uncompressed bytes, while trying to avoid + making multiple reads from the underlying stream. Reads up to a + buffer's worth of data if size is negative. + + Returns b'' if the file is at EOF. + """ + self._check_can_read() + if size < 0: + # Note this should *not* be io.DEFAULT_BUFFER_SIZE. + # ZSTD_DStreamOutSize is the minimum amount to read guaranteeing + # a full block is read. + size = ZSTD_DStreamOutSize + return self._buffer.read1(size) + + def readinto(self, b): + """Read bytes into b. + + Returns the number of bytes read (0 for EOF). + """ + self._check_can_read() + return self._buffer.readinto(b) + + def readinto1(self, b): + """Read bytes into b, while trying to avoid making multiple reads + from the underlying stream. + + Returns the number of bytes read (0 for EOF). + """ + self._check_can_read() + return self._buffer.readinto1(b) + + def readline(self, size=-1): + """Read a line of uncompressed bytes from the file. + + The terminating newline (if present) is retained. If size is + non-negative, no more than size bytes will be read (in which + case the line may be incomplete). Returns b'' if already at EOF. + """ + self._check_can_read() + return self._buffer.readline(size) + + def seek(self, offset, whence=io.SEEK_SET): + """Change the file position. + + The new position is specified by offset, relative to the + position indicated by whence. Possible values for whence are: + + 0: start of stream (default): offset must not be negative + 1: current stream position + 2: end of stream; offset must not be positive + + Returns the new file position. + + Note that seeking is emulated, so depending on the arguments, + this operation may be extremely slow. + """ + self._check_can_read() + + # BufferedReader.seek() checks seekable + return self._buffer.seek(offset, whence) + + def peek(self, size=-1): + """Return buffered data without advancing the file position. + + Always returns at least one byte of data, unless at EOF. + The exact number of bytes returned is unspecified. + """ + # Relies on the undocumented fact that BufferedReader.peek() always + # returns at least one byte (except at EOF) + self._check_can_read() + return self._buffer.peek(size) + + def __next__(self): + if ret := self._buffer.readline(): + return ret + raise StopIteration + + def tell(self): + """Return the current file position.""" + self._check_not_closed() + if self._mode == _MODE_READ: + return self._buffer.tell() + elif self._mode == _MODE_WRITE: + return self._pos + + def fileno(self): + """Return the file descriptor for the underlying file.""" + self._check_not_closed() + return self._fp.fileno() + + @property + def name(self): + self._check_not_closed() + return self._fp.name + + @property + def mode(self): + return 'wb' if self._mode == _MODE_WRITE else 'rb' + + @property + def closed(self): + """True if this file is closed.""" + return self._mode == _MODE_CLOSED + + def seekable(self): + """Return whether the file supports seeking.""" + return self.readable() and self._buffer.seekable() + + def readable(self): + """Return whether the file was opened for reading.""" + self._check_not_closed() + return self._mode == _MODE_READ + + def writable(self): + """Return whether the file was opened for writing.""" + self._check_not_closed() + return self._mode == _MODE_WRITE + + +def open(file, /, mode='rb', *, level=None, options=None, zstd_dict=None, + encoding=None, errors=None, newline=None): + """Open a Zstandard compressed file in binary or text mode. + + file can be either a file name (given as a str, bytes, or PathLike + object), in which case the named file is opened, or it can be + an existing file object to read from or write to. + + The mode parameter can be 'r', 'rb' (default), 'w', 'wb', 'x', 'xb', + 'a', 'ab' for binary mode, or 'rt', 'wt', 'xt', 'at' for text mode. + + The level, options, and zstd_dict parameters specify the settings the + same as ZstdFile. + + When using read mode (decompression), the options parameter is a dict + representing advanced decompression options. The level parameter is not + supported in this case. When using write mode (compression), only one + of level, an int representing the compression level, or options, a dict + representing advanced compression options, may be passed. In both + modes, zstd_dict is a ZstdDict instance containing a trained Zstandard + dictionary. + + For binary mode, this function is equivalent to the ZstdFile + constructor: ZstdFile(filename, mode, ...). In this case, the encoding, + errors and newline parameters must not be provided. + + For text mode, an ZstdFile object is created, and wrapped in an + io.TextIOWrapper instance with the specified encoding, error handling + behavior, and line ending(s). + """ + + text_mode = 't' in mode + mode = mode.replace('t', '') + + if text_mode: + if 'b' in mode: + raise ValueError(f'Invalid mode: {mode!r}') + else: + if encoding is not None: + raise ValueError('Argument "encoding" not supported in binary mode') + if errors is not None: + raise ValueError('Argument "errors" not supported in binary mode') + if newline is not None: + raise ValueError('Argument "newline" not supported in binary mode') + + binary_file = ZstdFile(file, mode, level=level, options=options, + zstd_dict=zstd_dict) + + if text_mode: + return io.TextIOWrapper(binary_file, encoding, errors, newline) + else: + return binary_file diff --git a/Lib/concurrent/futures/__init__.py b/Lib/concurrent/futures/__init__.py index d746aeac50a..72de617a5b6 100644 --- a/Lib/concurrent/futures/__init__.py +++ b/Lib/concurrent/futures/__init__.py @@ -23,6 +23,7 @@ 'ALL_COMPLETED', 'CancelledError', 'TimeoutError', + 'InvalidStateError', 'BrokenExecutor', 'Future', 'Executor', @@ -50,4 +51,4 @@ def __getattr__(name): ThreadPoolExecutor = te return te - raise AttributeError(f"module {__name__} has no attribute {name}") + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/Lib/concurrent/futures/_base.py b/Lib/concurrent/futures/_base.py index cf119ac6437..7d69a5baead 100644 --- a/Lib/concurrent/futures/_base.py +++ b/Lib/concurrent/futures/_base.py @@ -50,9 +50,7 @@ class CancelledError(Error): """The Future was cancelled.""" pass -class TimeoutError(Error): - """The operation exceeded the given deadline.""" - pass +TimeoutError = TimeoutError # make local alias for the standard exception class InvalidStateError(Error): """The operation is not allowed in this state.""" @@ -284,7 +282,7 @@ def wait(fs, timeout=None, return_when=ALL_COMPLETED): A named 2-tuple of sets. The first set, named 'done', contains the futures that completed (is finished or cancelled) before the wait completed. The second set, named 'not_done', contains uncompleted - futures. Duplicate futures given to *fs* are removed and will be + futures. Duplicate futures given to *fs* are removed and will be returned only once. """ fs = set(fs) @@ -312,6 +310,18 @@ def wait(fs, timeout=None, return_when=ALL_COMPLETED): done.update(waiter.finished_futures) return DoneAndNotDoneFutures(done, fs - done) + +def _result_or_cancel(fut, timeout=None): + try: + try: + return fut.result(timeout) + finally: + fut.cancel() + finally: + # Break a reference cycle with the exception in self._exception + del fut + + class Future(object): """Represents the result of an asynchronous computation.""" @@ -386,7 +396,7 @@ def done(self): return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED] def __get_result(self): - if self._exception: + if self._exception is not None: try: raise self._exception finally: @@ -606,9 +616,9 @@ def result_iterator(): while fs: # Careful not to keep a reference to the popped future if timeout is None: - yield fs.pop().result() + yield _result_or_cancel(fs.pop()) else: - yield fs.pop().result(end_time - time.monotonic()) + yield _result_or_cancel(fs.pop(), end_time - time.monotonic()) finally: for future in fs: future.cancel() diff --git a/Lib/concurrent/futures/process.py b/Lib/concurrent/futures/process.py index 57941e485d8..0dee8303ba2 100644 --- a/Lib/concurrent/futures/process.py +++ b/Lib/concurrent/futures/process.py @@ -49,6 +49,8 @@ from concurrent.futures import _base import queue import multiprocessing as mp +# This import is required to load the multiprocessing.connection submodule +# so that it can be accessed later as `mp.connection` import multiprocessing.connection from multiprocessing.queues import Queue import threading @@ -56,7 +58,7 @@ from functools import partial import itertools import sys -import traceback +from traceback import format_exception _threads_wakeups = weakref.WeakKeyDictionary() @@ -66,22 +68,31 @@ class _ThreadWakeup: def __init__(self): self._closed = False + self._lock = threading.Lock() self._reader, self._writer = mp.Pipe(duplex=False) def close(self): - if not self._closed: - self._closed = True - self._writer.close() - self._reader.close() + # Please note that we do not take the self._lock when + # calling clear() (to avoid deadlocking) so this method can + # only be called safely from the same thread as all calls to + # clear() even if you hold the lock. Otherwise we + # might try to read from the closed pipe. + with self._lock: + if not self._closed: + self._closed = True + self._writer.close() + self._reader.close() def wakeup(self): - if not self._closed: - self._writer.send_bytes(b"") + with self._lock: + if not self._closed: + self._writer.send_bytes(b"") def clear(self): - if not self._closed: - while self._reader.poll(): - self._reader.recv_bytes() + if self._closed: + raise RuntimeError('operation on closed _ThreadWakeup') + while self._reader.poll(): + self._reader.recv_bytes() def _python_exit(): @@ -123,8 +134,7 @@ def __str__(self): class _ExceptionWithTraceback: def __init__(self, exc, tb): - tb = traceback.format_exception(type(exc), exc, tb) - tb = ''.join(tb) + tb = ''.join(format_exception(type(exc), exc, tb)) self.exc = exc # Traceback object needs to be garbage-collected as its frames # contain references to all the objects in the exception scope @@ -145,10 +155,11 @@ def __init__(self, future, fn, args, kwargs): self.kwargs = kwargs class _ResultItem(object): - def __init__(self, work_id, exception=None, result=None): + def __init__(self, work_id, exception=None, result=None, exit_pid=None): self.work_id = work_id self.exception = exception self.result = result + self.exit_pid = exit_pid class _CallItem(object): def __init__(self, work_id, fn, args, kwargs): @@ -160,20 +171,17 @@ def __init__(self, work_id, fn, args, kwargs): class _SafeQueue(Queue): """Safe Queue set exception to the future object linked to a job""" - def __init__(self, max_size=0, *, ctx, pending_work_items, shutdown_lock, - thread_wakeup): + def __init__(self, max_size=0, *, ctx, pending_work_items, thread_wakeup): self.pending_work_items = pending_work_items - self.shutdown_lock = shutdown_lock self.thread_wakeup = thread_wakeup super().__init__(max_size, ctx=ctx) def _on_queue_feeder_error(self, e, obj): if isinstance(obj, _CallItem): - tb = traceback.format_exception(type(e), e, e.__traceback__) + tb = format_exception(type(e), e, e.__traceback__) e.__cause__ = _RemoteTraceback('\n"""\n{}"""'.format(''.join(tb))) work_item = self.pending_work_items.pop(obj.work_id, None) - with self.shutdown_lock: - self.thread_wakeup.wakeup() + self.thread_wakeup.wakeup() # work_item can be None if another process terminated. In this # case, the executor_manager_thread fails all work_items # with BrokenProcessPool @@ -183,16 +191,6 @@ def _on_queue_feeder_error(self, e, obj): super()._on_queue_feeder_error(e, obj) -def _get_chunks(*iterables, chunksize): - """ Iterates over zip()ed iterables in chunks. """ - it = zip(*iterables) - while True: - chunk = tuple(itertools.islice(it, chunksize)) - if not chunk: - return - yield chunk - - def _process_chunk(fn, chunk): """ Processes a chunk of an iterable passed to map. @@ -205,17 +203,19 @@ def _process_chunk(fn, chunk): return [fn(*args) for args in chunk] -def _sendback_result(result_queue, work_id, result=None, exception=None): +def _sendback_result(result_queue, work_id, result=None, exception=None, + exit_pid=None): """Safely send back the given result or exception""" try: result_queue.put(_ResultItem(work_id, result=result, - exception=exception)) + exception=exception, exit_pid=exit_pid)) except BaseException as e: exc = _ExceptionWithTraceback(e, e.__traceback__) - result_queue.put(_ResultItem(work_id, exception=exc)) + result_queue.put(_ResultItem(work_id, exception=exc, + exit_pid=exit_pid)) -def _process_worker(call_queue, result_queue, initializer, initargs): +def _process_worker(call_queue, result_queue, initializer, initargs, max_tasks=None): """Evaluates calls from call_queue and places the results in result_queue. This worker is run in a separate process. @@ -236,25 +236,38 @@ def _process_worker(call_queue, result_queue, initializer, initargs): # The parent will notice that the process stopped and # mark the pool broken return + num_tasks = 0 + exit_pid = None while True: call_item = call_queue.get(block=True) if call_item is None: # Wake up queue management thread result_queue.put(os.getpid()) return + + if max_tasks is not None: + num_tasks += 1 + if num_tasks >= max_tasks: + exit_pid = os.getpid() + try: r = call_item.fn(*call_item.args, **call_item.kwargs) except BaseException as e: exc = _ExceptionWithTraceback(e, e.__traceback__) - _sendback_result(result_queue, call_item.work_id, exception=exc) + _sendback_result(result_queue, call_item.work_id, exception=exc, + exit_pid=exit_pid) else: - _sendback_result(result_queue, call_item.work_id, result=r) + _sendback_result(result_queue, call_item.work_id, result=r, + exit_pid=exit_pid) del r # Liberate the resource as soon as possible, to avoid holding onto # open files or shared memory that is not needed anymore del call_item + if exit_pid is not None: + return + class _ExecutorManagerThread(threading.Thread): """Manages the communication between this process and the worker processes. @@ -284,11 +297,10 @@ def __init__(self, executor): # if there is no pending work item. def weakref_cb(_, thread_wakeup=self.thread_wakeup, - shutdown_lock=self.shutdown_lock): - mp.util.debug('Executor collected: triggering callback for' + mp_util_debug=mp.util.debug): + mp_util_debug('Executor collected: triggering callback for' ' QueueManager wakeup') - with shutdown_lock: - thread_wakeup.wakeup() + thread_wakeup.wakeup() self.executor_reference = weakref.ref(executor, weakref_cb) @@ -305,6 +317,10 @@ def weakref_cb(_, # A queue.Queue of work ids e.g. Queue([5, 6, ...]). self.work_ids_queue = executor._work_ids + # Maximum number of tasks a worker process can execute before + # exiting safely + self.max_tasks_per_child = executor._max_tasks_per_child + # A dict mapping work ids to _WorkItems e.g. # {5: <_WorkItem...>, 6: <_WorkItem...>, ...} self.pending_work_items = executor._pending_work_items @@ -315,7 +331,14 @@ def run(self): # Main loop for the executor manager thread. while True: - self.add_call_item_to_queue() + # gh-109047: During Python finalization, self.call_queue.put() + # creation of a thread can fail with RuntimeError. + try: + self.add_call_item_to_queue() + except BaseException as exc: + cause = format_exception(exc) + self.terminate_broken(cause) + return result_item, is_broken, cause = self.wait_result_broken_or_wakeup() @@ -324,19 +347,32 @@ def run(self): return if result_item is not None: self.process_result_item(result_item) + + process_exited = result_item.exit_pid is not None + if process_exited: + p = self.processes.pop(result_item.exit_pid) + p.join() + # Delete reference to result_item to avoid keeping references # while waiting on new results. del result_item - # attempt to increment idle process count - executor = self.executor_reference() - if executor is not None: - executor._idle_worker_semaphore.release() - del executor + if executor := self.executor_reference(): + if process_exited: + with self.shutdown_lock: + executor._adjust_process_count() + else: + executor._idle_worker_semaphore.release() + del executor if self.is_shutting_down(): self.flag_executor_shutting_down() + # When only canceled futures remain in pending_work_items, our + # next call to wait_result_broken_or_wakeup would hang forever. + # This makes sure we have some running futures or none at all. + self.add_call_item_to_queue() + # Since no new work items can be added, it is safe to shutdown # this thread if there are no pending work items. if not self.pending_work_items: @@ -386,14 +422,13 @@ def wait_result_broken_or_wakeup(self): try: result_item = result_reader.recv() is_broken = False - except BaseException as e: - cause = traceback.format_exception(type(e), e, e.__traceback__) + except BaseException as exc: + cause = format_exception(exc) elif wakeup_reader in ready: is_broken = False - with self.shutdown_lock: - self.thread_wakeup.clear() + self.thread_wakeup.clear() return result_item, is_broken, cause @@ -401,24 +436,14 @@ def process_result_item(self, result_item): # Process the received a result_item. This can be either the PID of a # worker that exited gracefully or a _ResultItem - if isinstance(result_item, int): - # Clean shutdown of a worker using its PID - # (avoids marking the executor broken) - assert self.is_shutting_down() - p = self.processes.pop(result_item) - p.join() - if not self.processes: - self.join_executor_internals() - return - else: - # Received a _ResultItem so mark the future as completed. - work_item = self.pending_work_items.pop(result_item.work_id, None) - # work_item can be None if another process terminated (see above) - if work_item is not None: - if result_item.exception: - work_item.future.set_exception(result_item.exception) - else: - work_item.future.set_result(result_item.result) + # Received a _ResultItem so mark the future as completed. + work_item = self.pending_work_items.pop(result_item.work_id, None) + # work_item can be None if another process terminated (see above) + if work_item is not None: + if result_item.exception is not None: + work_item.future.set_exception(result_item.exception) + else: + work_item.future.set_result(result_item.result) def is_shutting_down(self): # Check whether we should start shutting down the executor. @@ -430,7 +455,7 @@ def is_shutting_down(self): return (_global_shutdown or executor is None or executor._shutdown_thread) - def terminate_broken(self, cause): + def _terminate_broken(self, cause): # Terminate the executor because it is in a broken state. The cause # argument can be used to display more information on the error that # lead the executor into becoming broken. @@ -455,7 +480,14 @@ def terminate_broken(self, cause): # Mark pending tasks as failed. for work_id, work_item in self.pending_work_items.items(): - work_item.future.set_exception(bpe) + try: + work_item.future.set_exception(bpe) + except _base.InvalidStateError: + # set_exception() fails if the future is cancelled: ignore it. + # Trying to check if the future is cancelled before calling + # set_exception() would leave a race condition if the future is + # cancelled between the check and set_exception(). + pass # Delete references to object. See issue16284 del work_item self.pending_work_items.clear() @@ -465,8 +497,14 @@ def terminate_broken(self, cause): for p in self.processes.values(): p.terminate() + self.call_queue._terminate_broken() + # clean up resources - self.join_executor_internals() + self._join_executor_internals(broken=True) + + def terminate_broken(self, cause): + with self.shutdown_lock: + self._terminate_broken(cause) def flag_executor_shutting_down(self): # Flag the executor as shutting down and cancel remaining tasks if @@ -509,15 +547,24 @@ def shutdown_workers(self): break def join_executor_internals(self): - self.shutdown_workers() + with self.shutdown_lock: + self._join_executor_internals() + + def _join_executor_internals(self, broken=False): + # If broken, call_queue was closed and so can no longer be used. + if not broken: + self.shutdown_workers() + # Release the queue's resources as soon as possible. self.call_queue.close() self.call_queue.join_thread() - with self.shutdown_lock: - self.thread_wakeup.close() + self.thread_wakeup.close() + # If .join() is not called on the created processes then # some ctx.Queue methods may deadlock on Mac OS X. for p in self.processes.values(): + if broken: + p.terminate() p.join() def get_n_children_alive(self): @@ -582,22 +629,29 @@ class BrokenProcessPool(_base.BrokenExecutor): class ProcessPoolExecutor(_base.Executor): def __init__(self, max_workers=None, mp_context=None, - initializer=None, initargs=()): + initializer=None, initargs=(), *, max_tasks_per_child=None): """Initializes a new ProcessPoolExecutor instance. Args: max_workers: The maximum number of processes that can be used to execute the given calls. If None or not given then as many worker processes will be created as the machine has processors. - mp_context: A multiprocessing context to launch the workers. This + mp_context: A multiprocessing context to launch the workers created + using the multiprocessing.get_context('start method') API. This object should provide SimpleQueue, Queue and Process. initializer: A callable used to initialize worker processes. initargs: A tuple of arguments to pass to the initializer. + max_tasks_per_child: The maximum number of tasks a worker process + can complete before it will exit and be replaced with a fresh + worker process. The default of None means worker process will + live as long as the executor. Requires a non-'fork' mp_context + start method. When given, we default to using 'spawn' if no + mp_context is supplied. """ _check_system_limits() if max_workers is None: - self._max_workers = os.cpu_count() or 1 + self._max_workers = os.process_cpu_count() or 1 if sys.platform == 'win32': self._max_workers = min(_MAX_WINDOWS_WORKERS, self._max_workers) @@ -612,7 +666,10 @@ def __init__(self, max_workers=None, mp_context=None, self._max_workers = max_workers if mp_context is None: - mp_context = mp.get_context() + if max_tasks_per_child is not None: + mp_context = mp.get_context("spawn") + else: + mp_context = mp.get_context() self._mp_context = mp_context # https://github.com/python/cpython/issues/90622 @@ -624,6 +681,18 @@ def __init__(self, max_workers=None, mp_context=None, self._initializer = initializer self._initargs = initargs + if max_tasks_per_child is not None: + if not isinstance(max_tasks_per_child, int): + raise TypeError("max_tasks_per_child must be an integer") + elif max_tasks_per_child <= 0: + raise ValueError("max_tasks_per_child must be >= 1") + if self._mp_context.get_start_method(allow_none=False) == "fork": + # https://github.com/python/cpython/issues/90622 + raise ValueError("max_tasks_per_child is incompatible with" + " the 'fork' multiprocessing start method;" + " supply a different mp_context.") + self._max_tasks_per_child = max_tasks_per_child + # Management thread self._executor_manager_thread = None @@ -646,7 +715,9 @@ def __init__(self, max_workers=None, mp_context=None, # as it could result in a deadlock if a worker process dies with the # _result_queue write lock still acquired. # - # _shutdown_lock must be locked to access _ThreadWakeup. + # Care must be taken to only call clear and close from the + # executor_manager_thread, since _ThreadWakeup.clear() is not protected + # by a lock. self._executor_manager_thread_wakeup = _ThreadWakeup() # Create communication channels for the executor @@ -657,7 +728,6 @@ def __init__(self, max_workers=None, mp_context=None, self._call_queue = _SafeQueue( max_size=queue_size, ctx=self._mp_context, pending_work_items=self._pending_work_items, - shutdown_lock=self._shutdown_lock, thread_wakeup=self._executor_manager_thread_wakeup) # Killed worker processes can produce spurious "broken pipe" # tracebacks in the queue's own worker thread. But we detect killed @@ -677,6 +747,11 @@ def _start_executor_manager_thread(self): self._executor_manager_thread_wakeup def _adjust_process_count(self): + # gh-132969: avoid error when state is reset and executor is still running, + # which will happen when shutdown(wait=False) is called. + if self._processes is None: + return + # if there's an idle process, we don't need to spawn a new one. if self._idle_worker_semaphore.acquire(blocking=False): return @@ -705,7 +780,8 @@ def _spawn_process(self): args=(self._call_queue, self._result_queue, self._initializer, - self._initargs)) + self._initargs, + self._max_tasks_per_child)) p.start() self._processes[p.pid] = p @@ -759,7 +835,7 @@ def map(self, fn, *iterables, timeout=None, chunksize=1): raise ValueError("chunksize must be >= 1.") results = super().map(partial(_process_chunk, fn), - _get_chunks(*iterables, chunksize=chunksize), + itertools.batched(zip(*iterables), chunksize), timeout=timeout) return _chain_from_iterable_of_lists(results) diff --git a/Lib/concurrent/futures/thread.py b/Lib/concurrent/futures/thread.py index 493861d314d..9021dde48ef 100644 --- a/Lib/concurrent/futures/thread.py +++ b/Lib/concurrent/futures/thread.py @@ -37,14 +37,14 @@ def _python_exit(): threading._register_atexit(_python_exit) # At fork, reinitialize the `_global_shutdown_lock` lock in the child process -# TODO RUSTPYTHON - _at_fork_reinit is not implemented yet -if hasattr(os, 'register_at_fork') and hasattr(_global_shutdown_lock, '_at_fork_reinit'): +if hasattr(os, 'register_at_fork'): os.register_at_fork(before=_global_shutdown_lock.acquire, after_in_child=_global_shutdown_lock._at_fork_reinit, after_in_parent=_global_shutdown_lock.release) + os.register_at_fork(after_in_child=_threads_queues.clear) -class _WorkItem(object): +class _WorkItem: def __init__(self, future, fn, args, kwargs): self.future = future self.fn = fn @@ -79,17 +79,20 @@ def _worker(executor_reference, work_queue, initializer, initargs): return try: while True: - work_item = work_queue.get(block=True) - if work_item is not None: - work_item.run() - # Delete references to object. See issue16284 - del work_item - - # attempt to increment idle count + try: + work_item = work_queue.get_nowait() + except queue.Empty: + # attempt to increment idle count if queue is empty executor = executor_reference() if executor is not None: executor._idle_semaphore.release() del executor + work_item = work_queue.get(block=True) + + if work_item is not None: + work_item.run() + # Delete references to object. See GH-60488 + del work_item continue executor = executor_reference() @@ -137,10 +140,10 @@ def __init__(self, max_workers=None, thread_name_prefix='', # * CPU bound task which releases GIL # * I/O bound task (which releases GIL, of course) # - # We use cpu_count + 4 for both types of tasks. + # We use process_cpu_count + 4 for both types of tasks. # But we limit it to 32 to avoid consuming surprisingly large resource # on many core machine. - max_workers = min(32, (os.cpu_count() or 1) + 4) + max_workers = min(32, (os.process_cpu_count() or 1) + 4) if max_workers <= 0: raise ValueError("max_workers must be greater than 0") diff --git a/Lib/configparser.py b/Lib/configparser.py index e8aae217949..a53ac872764 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -18,8 +18,8 @@ delimiters=('=', ':'), comment_prefixes=('#', ';'), inline_comment_prefixes=None, strict=True, empty_lines_in_values=True, default_section='DEFAULT', - interpolation=, converters=): - + interpolation=, converters=, + allow_unnamed_section=False): Create the parser. When `defaults` is given, it is initialized into the dictionary or intrinsic defaults. The keys must be strings, the values must be appropriate for %()s string interpolation. @@ -68,6 +68,10 @@ converter gets its corresponding get*() method on the parser object and section proxies. + When `allow_unnamed_section` is True (default: False), options + without section are accepted: the section for these is + ``configparser.UNNAMED_SECTION``. + sections() Return all the configuration section names, sans DEFAULT. @@ -139,24 +143,27 @@ between keys and values are surrounded by spaces. """ -from collections.abc import MutableMapping +# Do not import dataclasses; overhead is unacceptable (gh-117703) + +from collections.abc import Iterable, MutableMapping from collections import ChainMap as _ChainMap +import contextlib import functools import io import itertools import os import re import sys -import warnings __all__ = ("NoSectionError", "DuplicateOptionError", "DuplicateSectionError", "NoOptionError", "InterpolationError", "InterpolationDepthError", "InterpolationMissingOptionError", "InterpolationSyntaxError", "ParsingError", "MissingSectionHeaderError", - "ConfigParser", "RawConfigParser", + "MultilineContinuationError", "UnnamedSectionDisabledError", + "InvalidWriteError", "ConfigParser", "RawConfigParser", "Interpolation", "BasicInterpolation", "ExtendedInterpolation", - "LegacyInterpolation", "SectionProxy", "ConverterMapping", - "DEFAULTSECT", "MAX_INTERPOLATION_DEPTH") + "SectionProxy", "ConverterMapping", + "DEFAULTSECT", "MAX_INTERPOLATION_DEPTH", "UNNAMED_SECTION") _default_dict = dict DEFAULTSECT = "DEFAULT" @@ -298,15 +305,36 @@ def __init__(self, option, section, rawval): class ParsingError(Error): """Raised when a configuration file does not follow legal syntax.""" - def __init__(self, source): + def __init__(self, source, *args): super().__init__(f'Source contains parsing errors: {source!r}') self.source = source self.errors = [] self.args = (source, ) + if args: + self.append(*args) def append(self, lineno, line): self.errors.append((lineno, line)) - self.message += '\n\t[line %2d]: %s' % (lineno, line) + self.message += f'\n\t[line {lineno:2d}]: {line!r}' + + def combine(self, others): + messages = [self.message] + for other in others: + for lineno, line in other.errors: + self.errors.append((lineno, line)) + messages.append(f'\n\t[line {lineno:2d}]: {line!r}') + self.message = "".join(messages) + return self + + @staticmethod + def _raise_all(exceptions: Iterable['ParsingError']): + """ + Combine any number of ParsingErrors into one and raise it. + """ + exceptions = iter(exceptions) + with contextlib.suppress(StopIteration): + raise next(exceptions).combine(exceptions) + class MissingSectionHeaderError(ParsingError): @@ -323,6 +351,44 @@ def __init__(self, filename, lineno, line): self.args = (filename, lineno, line) +class MultilineContinuationError(ParsingError): + """Raised when a key without value is followed by continuation line""" + def __init__(self, filename, lineno, line): + Error.__init__( + self, + "Key without value continued with an indented line.\n" + "file: %r, line: %d\n%r" + %(filename, lineno, line)) + self.source = filename + self.lineno = lineno + self.line = line + self.args = (filename, lineno, line) + + +class UnnamedSectionDisabledError(Error): + """Raised when an attempt to use UNNAMED_SECTION is made with the + feature disabled.""" + def __init__(self): + Error.__init__(self, "Support for UNNAMED_SECTION is disabled.") + + +class _UnnamedSection: + + def __repr__(self): + return "" + +class InvalidWriteError(Error): + """Raised when attempting to write data that the parser would read back differently. + ex: writing a key which begins with the section header pattern would read back as a + new section """ + + def __init__(self, msg=''): + Error.__init__(self, msg) + + +UNNAMED_SECTION = _UnnamedSection() + + # Used in parser getters to indicate the default behaviour when a specific # option is not found it to raise an exception. Created to enable `None` as # a valid fallback value. @@ -478,6 +544,8 @@ def _interpolate_some(self, parser, option, accum, rest, section, map, except (KeyError, NoSectionError, NoOptionError): raise InterpolationMissingOptionError( option, section, rawval, ":".join(path)) from None + if v is None: + continue if "$" in v: self._interpolate_some(parser, opt, accum, v, sect, dict(parser.items(sect, raw=True)), @@ -491,51 +559,51 @@ def _interpolate_some(self, parser, option, accum, rest, section, map, "found: %r" % (rest,)) -class LegacyInterpolation(Interpolation): - """Deprecated interpolation used in old versions of ConfigParser. - Use BasicInterpolation or ExtendedInterpolation instead.""" +class _ReadState: + elements_added : set[str] + cursect : dict[str, str] | None = None + sectname : str | None = None + optname : str | None = None + lineno : int = 0 + indent_level : int = 0 + errors : list[ParsingError] - _KEYCRE = re.compile(r"%\(([^)]*)\)s|.") + def __init__(self): + self.elements_added = set() + self.errors = list() - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - warnings.warn( - "LegacyInterpolation has been deprecated since Python 3.2 " - "and will be removed from the configparser module in Python 3.13. " - "Use BasicInterpolation or ExtendedInterpolation instead.", - DeprecationWarning, stacklevel=2 - ) - def before_get(self, parser, section, option, value, vars): - rawval = value - depth = MAX_INTERPOLATION_DEPTH - while depth: # Loop through this until it's done - depth -= 1 - if value and "%(" in value: - replace = functools.partial(self._interpolation_replace, - parser=parser) - value = self._KEYCRE.sub(replace, value) - try: - value = value % vars - except KeyError as e: - raise InterpolationMissingOptionError( - option, section, rawval, e.args[0]) from None - else: - break - if value and "%(" in value: - raise InterpolationDepthError(option, section, rawval) - return value +class _Line(str): + __slots__ = 'clean', 'has_comments' - def before_set(self, parser, section, option, value): - return value + def __new__(cls, val, *args, **kwargs): + return super().__new__(cls, val) + + def __init__(self, val, comments): + trimmed = val.strip() + self.clean = comments.strip(trimmed) + self.has_comments = trimmed != self.clean - @staticmethod - def _interpolation_replace(match, parser): - s = match.group(1) - if s is None: - return match.group() - else: - return "%%(%s)s" % parser.optionxform(s) + +class _CommentSpec: + def __init__(self, full_prefixes, inline_prefixes): + full_patterns = ( + # prefix at the beginning of a line + fr'^({re.escape(prefix)}).*' + for prefix in full_prefixes + ) + inline_patterns = ( + # prefix at the beginning of the line or following a space + fr'(^|\s)({re.escape(prefix)}.*)' + for prefix in inline_prefixes + ) + self.pattern = re.compile('|'.join(itertools.chain(full_patterns, inline_patterns))) + + def strip(self, text): + return self.pattern.sub('', text).rstrip() + + def wrap(self, text): + return _Line(text, self) class RawConfigParser(MutableMapping): @@ -548,7 +616,9 @@ class RawConfigParser(MutableMapping): \] # ] """ _OPT_TMPL = r""" - (?P