python · bzoracler · Apr 17, 2026 · Apr 17, 2026 · Apr 28, 2026 · Apr 28, 2026
diff --git a/misc/dump-ast.py b/misc/dump-ast.py
@@ -19,7 +19,7 @@ def dump(fname: str, python_version: tuple[int, int], quiet: bool = False) -> No
     options.python_version = python_version
     with open(fname, "rb") as f:
         s = f.read()
-        tree = parse(s, fname, None, errors=Errors(options), options=options, file_exists=True)
+        tree = parse(s, fname, None, errors=Errors(options), options=options)
         if not quiet:
             print(tree)
 

diff --git a/mypy/build.py b/mypy/build.py
@@ -163,7 +163,7 @@
 from mypy.modules_state import modules_state
 from mypy.nodes import Expression
 from mypy.options import Options
-from mypy.parse import load_from_raw, parse
+from mypy.parse import load_from_raw, parse, parse_native
 from mypy.plugin import ChainedPlugin, Plugin, ReportConfigContext
 from mypy.plugins.default import DefaultPlugin
 from mypy.renaming import LimitedVariableRenameVisitor, VariableRenameVisitor
@@ -1024,35 +1024,27 @@ def parse_all(self, states: list[State], post_parse: bool = True) -> None:
                 self.post_parse_all(states)
             return
 
-        sequential_states = []
         parallel_states = []
         for state in states:
             if state.tree is not None:
                 # The file was already parsed.
-                continue
-            if not self.fscache.exists(state.xpath, real_only=True):
-                # New parser only supports parsing on-disk files.
-                sequential_states.append(state)
+                state.needs_parse = False
                 continue
             parallel_states.append(state)
         if len(parallel_states) > 1:
-            self.parse_parallel(sequential_states, parallel_states)
-        else:
-            # Avoid using executor when there is no parallelism.
-            for state in states:
-                state.parse_file()
+            self.parse_parallel(parallel_states)
         if post_parse:
             self.post_parse_all(states)
 
-    def parse_parallel(self, sequential_states: list[State], parallel_states: list[State]) -> None:
+    def parse_parallel(self, parallel_states: list[State]) -> None:
         """Perform parallel parsing of states.
 
         Note: this duplicates a bit of logic from State.parse_file(). This is done
         as an optimization to parallelize only those parts of the code that can be
         parallelized efficiently.
         """
         parallel_parsed_states, parallel_parsed_states_set = self.parse_files_threaded_raw(
-            sequential_states, parallel_states
+            parallel_states
         )
 
         for state in parallel_parsed_states:
@@ -1097,12 +1089,9 @@ def parse_parallel(self, sequential_states: list[State], parallel_states: list[S
             state.check_blockers()
             state.setup_errors()
 
-    def parse_files_threaded_raw(
-        self, sequential_states: list[State], parallel_states: list[State]
-    ) -> tuple[list[State], set[State]]:
-        """Parse files using a thread pool.
+    def parse_files_threaded_raw(self, states: list[State]) -> tuple[list[State], set[State]]:
+        """Parse files in parallel using a thread pool.
 
-        Also parse sequential states while waiting for the parallel results.
         Trees from the new parser are left in raw (serialized) form.
 
         Return (list, set) of states that were actually parsed (not cached).
@@ -1118,25 +1107,23 @@ def parse_files_threaded_raw(
         # parse_file_inner() results in no visible improvement with more than 8 threads.
         # TODO: reuse thread pool and/or batch small files in single submit() call.
         with ThreadPoolExecutor(max_workers=min(available_threads, 8)) as executor:
-            for state in parallel_states:
+            for state in states:
                 state.needs_parse = False
                 if state.id not in self.ast_cache:
                     self.log(f"Parsing {state.xpath} ({state.id})")
                     ignore_errors = state.ignore_all or state.options.ignore_errors
                     if ignore_errors:
                         self.errors.ignored_files.add(state.xpath)
-                    futures.append(executor.submit(state.parse_file_inner, ""))
+                    futures.append(
+                        executor.submit(state.parse_file_inner, state.source, parallel=True)
+                    )
                     parallel_parsed_states.append(state)
                     parallel_parsed_states_set.add(state)
                 else:
                     self.log(f"Using cached AST for {state.xpath} ({state.id})")
                     state.tree, state.early_errors, source_hash = self.ast_cache[state.id]
                     state.source_hash = source_hash
 
-            # Parse sequential before waiting on parallel.
-            for state in sequential_states:
-                state.parse_file()
-
             for fut in wait(futures).done:
                 fut.result()
 
@@ -1279,21 +1266,32 @@ def parse_file(
         self,
         id: str,
         path: str,
-        source: str,
+        source: str | None,
         options: Options,
         raw_data: FileRawData | None = None,
+        parallel: bool = False,
     ) -> MypyFile:
         """Parse the source of a file with the given name.
 
         Raise CompileError if there is a parse error.
         """
-        file_exists = self.fscache.exists(path, real_only=True)
         t0 = time.time()
         if raw_data:
             # If possible, deserialize from known binary data instead of parsing from scratch.
             tree = load_from_raw(path, id, raw_data, self.errors, options)
         else:
-            tree = parse(source, path, id, self.errors, options=options, file_exists=file_exists)
+            if source is not None:
+                tree = parse(source, path, id, self.errors, options=options)
+            else:
+                assert parallel
+                if not os.path.exists(path):
+                    build_error(
+                        "Cannot read file '{}': {}".format(
+                            path.replace(os.getcwd() + os.sep, ""),
+                            os.strerror(2),  # `errno.ENOENT`
+                        )
+                    )
+                tree = parse_native(source, path, id, self.errors, options=options)
         tree._fullname = id
         if self.stats_enabled:
             with self.stats_lock:
@@ -3192,10 +3190,12 @@ def get_source(self) -> str:
         self.time_spent_us += time_spent_us(t0)
         return source
 
-    def parse_file_inner(self, source: str, raw_data: FileRawData | None = None) -> None:
+    def parse_file_inner(
+        self, source: str | None, raw_data: FileRawData | None = None, parallel: bool = False
+    ) -> None:
         t0 = time_ref()
         self.tree = self.manager.parse_file(
-            self.id, self.xpath, source, options=self.options, raw_data=raw_data
+            self.id, self.xpath, source, self.options, raw_data, parallel
         )
         self.time_spent_us += time_spent_us(t0)
 
@@ -3319,9 +3319,7 @@ def semantic_analysis_pass1(self) -> None:
         #
         # TODO: This should not be considered as a semantic analysis
         #     pass -- it's an independent pass.
-        if not options.native_parser or not self.manager.fscache.exists(
-            self.xpath, real_only=True
-        ):
+        if not options.native_parser:
             analyzer = SemanticAnalyzerPreAnalysis()
             with self.wrap_context():
                 analyzer.visit_file(self.tree, self.xpath, self.id, options)

diff --git a/mypy/checkstrformat.py b/mypy/checkstrformat.py
@@ -587,7 +587,6 @@ def apply_field_accessors(
             module=None,
             options=self.chk.options,
             errors=temp_errors,
-            file_exists=False,
             eager=True,
         )
         if temp_errors.is_errors():

diff --git a/mypy/fscache.py b/mypy/fscache.py
@@ -253,13 +253,10 @@ def isdir(self, path: str) -> bool:
             return False
         return stat.S_ISDIR(st.st_mode)
 
-    def exists(self, path: str, real_only: bool = False) -> bool:
+    def exists(self, path: str) -> bool:
         st = self.stat_or_none(path)
         if st is None:
             return False
-        if real_only:
-            dirname = os.path.dirname(path)
-            return dirname not in self.fake_package_cache
         return True
 
     def read(self, path: str) -> bytes:

diff --git a/mypy/nativeparse.py b/mypy/nativeparse.py
@@ -182,7 +182,10 @@ def add_error(
 
 
 def native_parse(
-    filename: str, options: Options, skip_function_bodies: bool = False
+    filename: str,
+    options: Options,
+    source: str | bytes | None = None,
+    skip_function_bodies: bool = False,
 ) -> tuple[MypyFile, list[ParseError], TypeIgnores]:
     """Parse a Python file using the native Rust-based parser.
 
@@ -211,7 +214,7 @@ def native_parse(
         uses_template_strings,
         source_hash,
         mypy_comments,
-    ) = parse_to_binary_ast(filename, options, skip_function_bodies)
+    ) = parse_to_binary_ast(filename, options, source, skip_function_bodies)
     node = MypyFile([], [])
     node.path = filename
     node.raw_data = FileRawData(
@@ -248,7 +251,10 @@ def read_statements(state: State, data: ReadBuffer, n: int) -> list[Statement]:
 
 
 def parse_to_binary_ast(
-    filename: str, options: Options, skip_function_bodies: bool = False
+    filename: str,
+    options: Options,
+    source: str | bytes | None = None,
+    skip_function_bodies: bool = False,
 ) -> tuple[bytes, list[ParseError], TypeIgnores, bytes, bool, bool, str, list[tuple[int, str]]]:
     # This is a horrible hack to work around a mypyc bug where imported
     # module may be not ready in a thread sometimes.
@@ -259,6 +265,7 @@ def parse_to_binary_ast(
             raise ImportError("Cannot import ast_serialize")
     ast_bytes, errors, ignores, import_bytes, ast_data = ast_serialize.parse(
         filename,
+        source,
         skip_function_bodies=skip_function_bodies,
         python_version=options.python_version,
         platform=options.platform,

diff --git a/mypy/parse.py b/mypy/parse.py
@@ -17,7 +17,6 @@ def parse(
     module: str | None,
     errors: Errors,
     options: Options,
-    file_exists: bool,
     eager: bool = False,
 ) -> MypyFile:
     """Parse a source file, without doing any semantic analysis.
@@ -29,25 +28,7 @@ def parse(
     the parse errors, use eager=True.
     """
     if options.native_parser:
-        # Native parser only works with actual files on disk
-        # Fall back to fastparse for in-memory source or non-existent files
-        if file_exists:
-            import mypy.nativeparse
-
-            ignore_errors = options.ignore_errors or fnam in errors.ignored_files
-            # If errors are ignored, we can drop many function bodies to speed up type checking.
-            strip_function_bodies = ignore_errors and not options.preserve_asts
-            tree, _, _ = mypy.nativeparse.native_parse(
-                fnam, options, skip_function_bodies=strip_function_bodies
-            )
-            # Set is_stub based on file extension
-            tree.is_stub = fnam.endswith(".pyi")
-            # Note: tree.imports is populated directly by load_from_raw() with deserialized
-            # import metadata, so we don't need to collect imports via AST traversal
-            if eager and tree.raw_data is not None:
-                tree = load_from_raw(fnam, module, tree.raw_data, errors, options)
-            return tree
-        # Fall through to fastparse for non-existent files
+        return parse_native(source, fnam, module, errors, options, eager)
 
     if options.transform_source is not None:
         source = options.transform_source(source)
@@ -102,6 +83,31 @@ def load_from_raw(
     return tree
 
 
+def parse_native(
+    source: str | bytes | None,
+    fnam: str,
+    module: str | None,
+    errors: Errors,
+    options: Options,
+    eager: bool = False,
+) -> MypyFile:
+    import mypy.nativeparse
+
+    ignore_errors = options.ignore_errors or fnam in errors.ignored_files
+    # If errors are ignored, we can drop many function bodies to speed up type checking.
+    strip_function_bodies = ignore_errors and not options.preserve_asts
+    tree, _, _ = mypy.nativeparse.native_parse(
+        fnam, options, source, skip_function_bodies=strip_function_bodies
+    )
+    # Set is_stub based on file extension
+    tree.is_stub = fnam.endswith(".pyi")
+    # Note: tree.imports is populated directly by load_from_raw() with deserialized
+    # import metadata, so we don't need to collect imports via AST traversal
+    if eager and tree.raw_data is not None:
+        tree = load_from_raw(fnam, module, tree.raw_data, errors, options)
+    return tree
+
+
 def report_parse_error(error: ParseError, errors: Errors) -> None:
     message = error["message"]
     # Standardize error message by capitalizing the first word

diff --git a/mypy/stubgen.py b/mypy/stubgen.py
@@ -1745,13 +1745,7 @@ def parse_source_file(mod: StubSource, mypy_options: MypyOptions) -> None:
     source = mypy.util.decode_python_encoding(data)
     errors = Errors(mypy_options)
     mod.ast = mypy.parse.parse(
-        source,
-        fnam=mod.path,
-        module=mod.module,
-        errors=errors,
-        options=mypy_options,
-        file_exists=True,
-        eager=True,
+        source, fnam=mod.path, module=mod.module, errors=errors, options=mypy_options, eager=True
     )
     mod.ast._fullname = mod.module
     if errors.is_blockers():

diff --git a/mypy/test/test_nativeparse.py b/mypy/test/test_nativeparse.py
@@ -98,7 +98,7 @@ def test_parser(testcase: DataDrivenTestCase) -> None:
 
     try:
         with temp_source(source) as fnam:
-            node, errors, type_ignores = native_parse(fnam, options, skip_function_bodies)
+            node, errors, type_ignores = native_parse(fnam, options, None, skip_function_bodies)
             errors += load_tree(node, options)
             node.path = "main"
             a = node.str_with_options(options).split("\n")
@@ -234,7 +234,7 @@ def format_reachable_imports(node: MypyFile) -> list[str]:
 
 @unittest.skipUnless(has_nativeparse, "nativeparse not available")
 class TestNativeParserBinaryFormat(unittest.TestCase):
-    def test_trivial_binary_data(self) -> None:
+    def _assert_trivial_binary_data(self, b: bytes, /) -> None:
         # A quick sanity check to ensure the serialized data looks as expected. Only covers
         # a few AST nodes.
 
@@ -250,9 +250,9 @@ def locs(start_line: int, start_column: int, end_line: int, end_column: int) ->
                 int_enc(end_column - start_column),
             ]
 
-        with temp_source("print('hello')") as fnam:
-            b, _, _, _, _, _, _, _ = parse_to_binary_ast(fnam, Options())
-            assert list(b) == (
+        self.assertEqual(
+            list(b),
+            (
                 [LITERAL_INT, 22, nodes.EXPR_STMT, nodes.CALL_EXPR]
                 + [nodes.NAME_EXPR, LITERAL_STR]
                 + [int_enc(5)]
@@ -269,7 +269,25 @@ def locs(start_line: int, start_column: int, end_line: int, end_column: int) ->
                 + [LIST_GEN, 22, LITERAL_NONE]
                 + locs(1, 0, 1, 14)
                 + [END_TAG, END_TAG]
-            )
+            ),
+        )
+
+    def test_trivial_binary_data_from_file(self) -> None:
+        with temp_source("print('hello')") as fnam:
+            b, _, _, _, _, _, _, _ = parse_to_binary_ast(fnam, Options())
+            self._assert_trivial_binary_data(b)
+
+    def test_trivial_binary_data_from_string_source(self) -> None:
+        b, _, _, _, _, _, _, _ = parse_to_binary_ast("", Options(), "print('hello')")
+        self._assert_trivial_binary_data(b)
+
+    def test_trivial_binary_data_from_bytes_source(self) -> None:
+        b, _, _, _, _, _, _, _ = parse_to_binary_ast("", Options(), b"print('hello')")
+        self._assert_trivial_binary_data(b)
+
+    def test_invalid_bytes_raises(self) -> None:
+        with self.assertRaises(UnicodeDecodeError):
+            parse_to_binary_ast("", Options(), b"\xff")
 
 
 @contextlib.contextmanager

diff --git a/mypy/test/testparse.py b/mypy/test/testparse.py
@@ -66,7 +66,6 @@ def test_parser(testcase: DataDrivenTestCase) -> None:
             module="__main__",
             errors=errors,
             options=options,
-            file_exists=False,
             eager=True,
         )
         if errors.is_errors():
@@ -108,7 +107,6 @@ def test_parse_error(testcase: DataDrivenTestCase) -> None:
             "__main__",
             errors=errors,
             options=options,
-            file_exists=False,
             eager=True,
         )
         if errors.is_errors():

diff --git a/test-data/unit/cmdline.test b/test-data/unit/cmdline.test
@@ -593,6 +593,7 @@ import d
 
 [case testPackageRootMultipleParallel]
 # cmd: mypy --package-root=a/ --package-root=./ a/b/c.py d.py main.py --num-workers=2
+[file a/b/__init__.py]
 [file a/b/c.py]
 [file d.py]
 [file main.py]