infra: add -p to mkdir in lint steps (#17013)

Previously, if this did not find a mypy cache then it wouldnt run this makes it always run adding mypy ignore comments with existing uncaught issues to unblock other prs --------- Co-authored-by: Erick Friis <erick@langchain.dev> Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
2025-09-16 23:13:31 +00:00 · 2024-02-05 11:22:06 -08:00
parent db6af21395
commit 4eda647fdd
103 changed files with 378 additions and 369 deletions
--- a/libs/community/langchain_community/document_loaders/assemblyai.py
+++ b/libs/community/langchain_community/document_loaders/assemblyai.py
@@ -123,7 +123,7 @@ class AssemblyAIAudioLoaderById(BaseLoader):

    """

-    def __init__(self, transcript_id, api_key, transcript_format):
+    def __init__(self, transcript_id, api_key, transcript_format):  # type: ignore[no-untyped-def]
        """
        Initializes the AssemblyAI AssemblyAIAudioLoaderById.

--- a/libs/community/langchain_community/document_loaders/astradb.py
+++ b/libs/community/langchain_community/document_loaders/astradb.py
@@ -65,7 +65,7 @@ class AstraDBLoader(BaseLoader):
        return list(self.lazy_load())

    def lazy_load(self) -> Iterator[Document]:
-        queue = Queue(self.nb_prefetched)
+        queue = Queue(self.nb_prefetched)  # type: ignore[var-annotated]
        t = threading.Thread(target=self.fetch_results, args=(queue,))
        t.start()
        while True:
@@ -95,7 +95,7 @@ class AstraDBLoader(BaseLoader):
                item = await run_in_executor(None, lambda it: next(it, done), iterator)
                if item is done:
                    break
-                yield item
+                yield item  # type: ignore[misc]
            return
        async_collection = await self.astra_env.async_astra_db.collection(
            self.collection_name
@@ -116,13 +116,13 @@ class AstraDBLoader(BaseLoader):
                },
            )

-    def fetch_results(self, queue: Queue):
+    def fetch_results(self, queue: Queue):  # type: ignore[no-untyped-def]
        self.fetch_page_result(queue)
        while self.find_options.get("pageState"):
            self.fetch_page_result(queue)
        queue.put(None)

-    def fetch_page_result(self, queue: Queue):
+    def fetch_page_result(self, queue: Queue):  # type: ignore[no-untyped-def]
        res = self.collection.find(
            filter=self.filter,
            options=self.find_options,
--- a/libs/community/langchain_community/document_loaders/base.py
+++ b/libs/community/langchain_community/document_loaders/base.py
@@ -64,10 +64,10 @@ class BaseLoader(ABC):
        iterator = await run_in_executor(None, self.lazy_load)
        done = object()
        while True:
-            doc = await run_in_executor(None, next, iterator, done)
+            doc = await run_in_executor(None, next, iterator, done)  # type: ignore[call-arg, arg-type]
            if doc is done:
                break
-            yield doc
+            yield doc  # type: ignore[misc]


 class BaseBlobParser(ABC):
--- a/libs/community/langchain_community/document_loaders/cassandra.py
+++ b/libs/community/langchain_community/document_loaders/cassandra.py
@@ -33,14 +33,14 @@ class CassandraLoader(BaseLoader):
        page_content_mapper: Callable[[Any], str] = str,
        metadata_mapper: Callable[[Any], dict] = lambda _: {},
        *,
-        query_parameters: Union[dict, Sequence] = None,
-        query_timeout: Optional[float] = _NOT_SET,
+        query_parameters: Union[dict, Sequence] = None,  # type: ignore[assignment]
+        query_timeout: Optional[float] = _NOT_SET,  # type: ignore[assignment]
        query_trace: bool = False,
-        query_custom_payload: dict = None,
+        query_custom_payload: dict = None,  # type: ignore[assignment]
        query_execution_profile: Any = _NOT_SET,
        query_paging_state: Any = None,
        query_host: Host = None,
-        query_execute_as: str = None,
+        query_execute_as: str = None,  # type: ignore[assignment]
    ) -> None:
        """
        Document Loader for Apache Cassandra.
@@ -85,7 +85,7 @@ class CassandraLoader(BaseLoader):
            self.query = f"SELECT * FROM {_keyspace}.{table};"
            self.metadata = {"table": table, "keyspace": _keyspace}
        else:
-            self.query = query
+            self.query = query  # type: ignore[assignment]
            self.metadata = {}

        self.session = session or check_resolve_session(session)
--- a/libs/community/langchain_community/document_loaders/chm.py
+++ b/libs/community/langchain_community/document_loaders/chm.py
@@ -27,7 +27,7 @@ class UnstructuredCHMLoader(UnstructuredFileLoader):
    def _get_elements(self) -> List:
        from unstructured.partition.html import partition_html

-        with CHMParser(self.file_path) as f:
+        with CHMParser(self.file_path) as f:  # type: ignore[arg-type]
            return [
                partition_html(text=item["content"], **self.unstructured_kwargs)
                for item in f.load_all()
@@ -45,10 +45,10 @@ class CHMParser(object):
        self.file = chm.CHMFile()
        self.file.LoadCHM(path)

-    def __enter__(self):
+    def __enter__(self):  # type: ignore[no-untyped-def]
        return self

-    def __exit__(self, exc_type, exc_value, traceback):
+    def __exit__(self, exc_type, exc_value, traceback):  # type: ignore[no-untyped-def]
        if self.file:
            self.file.CloseCHM()

--- a/libs/community/langchain_community/document_loaders/doc_intelligence.py
+++ b/libs/community/langchain_community/document_loaders/doc_intelligence.py
@@ -89,4 +89,4 @@ class AzureAIDocumentIntelligenceLoader(BaseLoader):
            blob = Blob.from_path(self.file_path)
            yield from self.parser.parse(blob)
        else:
-            yield from self.parser.parse_url(self.url_path)
+            yield from self.parser.parse_url(self.url_path)  # type: ignore[arg-type]
--- a/libs/community/langchain_community/document_loaders/mediawikidump.py
+++ b/libs/community/langchain_community/document_loaders/mediawikidump.py
@@ -60,7 +60,7 @@ class MWDumpLoader(BaseLoader):
        self.skip_redirects = skip_redirects
        self.stop_on_error = stop_on_error

-    def _load_dump_file(self):
+    def _load_dump_file(self):  # type: ignore[no-untyped-def]
        try:
            import mwxml
        except ImportError as e:
@@ -70,7 +70,7 @@ class MWDumpLoader(BaseLoader):

        return mwxml.Dump.from_file(open(self.file_path, encoding=self.encoding))

-    def _load_single_page_from_dump(self, page) -> Document:
+    def _load_single_page_from_dump(self, page) -> Document:  # type: ignore[no-untyped-def, return]
        """Parse a single page."""
        try:
            import mwparserfromhell
--- a/libs/community/langchain_community/document_loaders/parsers/vsdx.py
+++ b/libs/community/langchain_community/document_loaders/parsers/vsdx.py
@@ -11,7 +11,7 @@ from langchain_community.document_loaders.blob_loaders import Blob


 class VsdxParser(BaseBlobParser, ABC):
-    def parse(self, blob: Blob) -> Iterator[Document]:
+    def parse(self, blob: Blob) -> Iterator[Document]:  # type: ignore[override]
        """Parse a vsdx file."""
        return self.lazy_parse(blob)

@@ -21,7 +21,7 @@ class VsdxParser(BaseBlobParser, ABC):

        with blob.as_bytes_io() as pdf_file_obj:
            with zipfile.ZipFile(pdf_file_obj, "r") as zfile:
-                pages = self.get_pages_content(zfile, blob.source)
+                pages = self.get_pages_content(zfile, blob.source)  # type: ignore[arg-type]

        yield from [
            Document(
@@ -60,13 +60,13 @@ class VsdxParser(BaseBlobParser, ABC):

        if "visio/pages/pages.xml" not in zfile.namelist():
            print("WARNING - No pages.xml file found in {}".format(source))
-            return
+            return  # type: ignore[return-value]
        if "visio/pages/_rels/pages.xml.rels" not in zfile.namelist():
            print("WARNING - No pages.xml.rels file found in {}".format(source))
-            return
+            return  # type: ignore[return-value]
        if "docProps/app.xml" not in zfile.namelist():
            print("WARNING - No app.xml file found in {}".format(source))
-            return
+            return  # type: ignore[return-value]

        pagesxml_content: dict = xmltodict.parse(zfile.read("visio/pages/pages.xml"))
        appxml_content: dict = xmltodict.parse(zfile.read("docProps/app.xml"))
@@ -79,7 +79,7 @@ class VsdxParser(BaseBlobParser, ABC):
                rel["@Name"].strip() for rel in pagesxml_content["Pages"]["Page"]
            ]
        else:
-            disordered_names: List[str] = [
+            disordered_names: List[str] = [  # type: ignore[no-redef]
                pagesxml_content["Pages"]["Page"]["@Name"].strip()
            ]
        if isinstance(pagesxmlrels_content["Relationships"]["Relationship"], list):
@@ -88,7 +88,7 @@ class VsdxParser(BaseBlobParser, ABC):
                for rel in pagesxmlrels_content["Relationships"]["Relationship"]
            ]
        else:
-            disordered_paths: List[str] = [
+            disordered_paths: List[str] = [  # type: ignore[no-redef]
                "visio/pages/"
                + pagesxmlrels_content["Relationships"]["Relationship"]["@Target"]
            ]