Skip to content
Draft
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion misc/dump-ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def dump(fname: str, python_version: tuple[int, int], quiet: bool = False) -> No
options.python_version = python_version
with open(fname, "rb") as f:
s = f.read()
tree = parse(s, fname, None, errors=Errors(options), options=options, file_exists=True)
tree = parse(s, fname, None, errors=Errors(options), options=options)
if not quiet:
print(tree)

Expand Down
3 changes: 1 addition & 2 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -1287,13 +1287,12 @@ def parse_file(

Raise CompileError if there is a parse error.
"""
file_exists = self.fscache.exists(path, real_only=True)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can:

  • Remove other two call sites to fscache.exists() in this file (and update relevant code).
  • Remove real_only parameter and related logic from fscache. IIRC it is only needed for native parser.

t0 = time.time()
if raw_data:
# If possible, deserialize from known binary data instead of parsing from scratch.
tree = load_from_raw(path, id, raw_data, self.errors, options)
else:
tree = parse(source, path, id, self.errors, options=options, file_exists=file_exists)
tree = parse(source, path, id, self.errors, options=options)
tree._fullname = id
if self.stats_enabled:
with self.stats_lock:
Expand Down
1 change: 0 additions & 1 deletion mypy/checkstrformat.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,7 +587,6 @@ def apply_field_accessors(
module=None,
options=self.chk.options,
errors=temp_errors,
file_exists=False,
eager=True,
)
if temp_errors.is_errors():
Expand Down
13 changes: 10 additions & 3 deletions mypy/nativeparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,10 @@ def add_error(


def native_parse(
filename: str, options: Options, skip_function_bodies: bool = False
filename: str,
options: Options,
source: str | bytes | None = None,
skip_function_bodies: bool = False,
) -> tuple[MypyFile, list[ParseError], TypeIgnores]:
"""Parse a Python file using the native Rust-based parser.

Expand Down Expand Up @@ -211,7 +214,7 @@ def native_parse(
uses_template_strings,
source_hash,
mypy_comments,
) = parse_to_binary_ast(filename, options, skip_function_bodies)
) = parse_to_binary_ast(filename, options, source, skip_function_bodies)
node = MypyFile([], [])
node.path = filename
node.raw_data = FileRawData(
Expand Down Expand Up @@ -248,7 +251,10 @@ def read_statements(state: State, data: ReadBuffer, n: int) -> list[Statement]:


def parse_to_binary_ast(
filename: str, options: Options, skip_function_bodies: bool = False
filename: str,
options: Options,
source: str | bytes | None = None,
skip_function_bodies: bool = False,
) -> tuple[bytes, list[ParseError], TypeIgnores, bytes, bool, bool, str, list[tuple[int, str]]]:
# This is a horrible hack to work around a mypyc bug where imported
# module may be not ready in a thread sometimes.
Expand All @@ -259,6 +265,7 @@ def parse_to_binary_ast(
raise ImportError("Cannot import ast_serialize")
ast_bytes, errors, ignores, import_bytes, ast_data = ast_serialize.parse(
filename,
source,
skip_function_bodies=skip_function_bodies,
python_version=options.python_version,
platform=options.platform,
Expand Down
35 changes: 15 additions & 20 deletions mypy/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ def parse(
module: str | None,
errors: Errors,
options: Options,
file_exists: bool,
eager: bool = False,
) -> MypyFile:
"""Parse a source file, without doing any semantic analysis.
Expand All @@ -29,25 +28,21 @@ def parse(
the parse errors, use eager=True.
"""
if options.native_parser:
# Native parser only works with actual files on disk
# Fall back to fastparse for in-memory source or non-existent files
if file_exists:
import mypy.nativeparse

ignore_errors = options.ignore_errors or fnam in errors.ignored_files
# If errors are ignored, we can drop many function bodies to speed up type checking.
strip_function_bodies = ignore_errors and not options.preserve_asts
tree, _, _ = mypy.nativeparse.native_parse(
fnam, options, skip_function_bodies=strip_function_bodies
)
# Set is_stub based on file extension
tree.is_stub = fnam.endswith(".pyi")
# Note: tree.imports is populated directly by load_from_raw() with deserialized
# import metadata, so we don't need to collect imports via AST traversal
if eager and tree.raw_data is not None:
tree = load_from_raw(fnam, module, tree.raw_data, errors, options)
return tree
# Fall through to fastparse for non-existent files
import mypy.nativeparse

ignore_errors = options.ignore_errors or fnam in errors.ignored_files
# If errors are ignored, we can drop many function bodies to speed up type checking.
strip_function_bodies = ignore_errors and not options.preserve_asts
tree, _, _ = mypy.nativeparse.native_parse(
fnam, options, source, skip_function_bodies=strip_function_bodies
)
# Set is_stub based on file extension
tree.is_stub = fnam.endswith(".pyi")
# Note: tree.imports is populated directly by load_from_raw() with deserialized
# import metadata, so we don't need to collect imports via AST traversal
if eager and tree.raw_data is not None:
tree = load_from_raw(fnam, module, tree.raw_data, errors, options)
return tree

if options.transform_source is not None:
source = options.transform_source(source)
Expand Down
8 changes: 1 addition & 7 deletions mypy/stubgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -1745,13 +1745,7 @@ def parse_source_file(mod: StubSource, mypy_options: MypyOptions) -> None:
source = mypy.util.decode_python_encoding(data)
errors = Errors(mypy_options)
mod.ast = mypy.parse.parse(
source,
fnam=mod.path,
module=mod.module,
errors=errors,
options=mypy_options,
file_exists=True,
eager=True,
source, fnam=mod.path, module=mod.module, errors=errors, options=mypy_options, eager=True
)
mod.ast._fullname = mod.module
if errors.is_blockers():
Expand Down
30 changes: 24 additions & 6 deletions mypy/test/test_nativeparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def test_parser(testcase: DataDrivenTestCase) -> None:

try:
with temp_source(source) as fnam:
node, errors, type_ignores = native_parse(fnam, options, skip_function_bodies)
node, errors, type_ignores = native_parse(fnam, options, None, skip_function_bodies)
errors += load_tree(node, options)
node.path = "main"
a = node.str_with_options(options).split("\n")
Expand Down Expand Up @@ -234,7 +234,7 @@ def format_reachable_imports(node: MypyFile) -> list[str]:

@unittest.skipUnless(has_nativeparse, "nativeparse not available")
class TestNativeParserBinaryFormat(unittest.TestCase):
def test_trivial_binary_data(self) -> None:
def _assert_trivial_binary_data(self, b: bytes, /) -> None:
# A quick sanity check to ensure the serialized data looks as expected. Only covers
# a few AST nodes.

Expand All @@ -250,9 +250,9 @@ def locs(start_line: int, start_column: int, end_line: int, end_column: int) ->
int_enc(end_column - start_column),
]

with temp_source("print('hello')") as fnam:
b, _, _, _, _, _, _, _ = parse_to_binary_ast(fnam, Options())
assert list(b) == (
self.assertEqual(
list(b),
(
[LITERAL_INT, 22, nodes.EXPR_STMT, nodes.CALL_EXPR]
+ [nodes.NAME_EXPR, LITERAL_STR]
+ [int_enc(5)]
Expand All @@ -269,7 +269,25 @@ def locs(start_line: int, start_column: int, end_line: int, end_column: int) ->
+ [LIST_GEN, 22, LITERAL_NONE]
+ locs(1, 0, 1, 14)
+ [END_TAG, END_TAG]
)
),
)

def test_trivial_binary_data_from_file(self) -> None:
with temp_source("print('hello')") as fnam:
b, _, _, _, _, _, _, _ = parse_to_binary_ast(fnam, Options())
self._assert_trivial_binary_data(b)

def test_trivial_binary_data_from_string_source(self) -> None:
b, _, _, _, _, _, _, _ = parse_to_binary_ast("", Options(), "print('hello')")
self._assert_trivial_binary_data(b)

def test_trivial_binary_data_from_bytes_source(self) -> None:
b, _, _, _, _, _, _, _ = parse_to_binary_ast("", Options(), b"print('hello')")
self._assert_trivial_binary_data(b)

def test_invalid_bytes_raises(self) -> None:
with self.assertRaises(UnicodeDecodeError):
parse_to_binary_ast("", Options(), b"\xff")


@contextlib.contextmanager
Expand Down
2 changes: 0 additions & 2 deletions mypy/test/testparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ def test_parser(testcase: DataDrivenTestCase) -> None:
module="__main__",
errors=errors,
options=options,
file_exists=False,
eager=True,
)
if errors.is_errors():
Expand Down Expand Up @@ -108,7 +107,6 @@ def test_parse_error(testcase: DataDrivenTestCase) -> None:
"__main__",
errors=errors,
options=options,
file_exists=False,
eager=True,
)
if errors.is_errors():
Expand Down
Loading