infra: add -p to mkdir in lint steps (#17013)

Previously, if this did not find a mypy cache then it wouldnt run

this makes it always run

adding mypy ignore comments with existing uncaught issues to unblock other prs

---------

Co-authored-by: Erick Friis <erick@langchain.dev>
Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
This commit is contained in:
Harrison Chase
2024-02-05 11:22:06 -08:00
committed by GitHub
parent db6af21395
commit 4eda647fdd
103 changed files with 378 additions and 369 deletions

View File

@@ -123,7 +123,7 @@ class AssemblyAIAudioLoaderById(BaseLoader):
"""
def __init__(self, transcript_id, api_key, transcript_format):
def __init__(self, transcript_id, api_key, transcript_format): # type: ignore[no-untyped-def]
"""
Initializes the AssemblyAI AssemblyAIAudioLoaderById.

View File

@@ -65,7 +65,7 @@ class AstraDBLoader(BaseLoader):
return list(self.lazy_load())
def lazy_load(self) -> Iterator[Document]:
queue = Queue(self.nb_prefetched)
queue = Queue(self.nb_prefetched) # type: ignore[var-annotated]
t = threading.Thread(target=self.fetch_results, args=(queue,))
t.start()
while True:
@@ -95,7 +95,7 @@ class AstraDBLoader(BaseLoader):
item = await run_in_executor(None, lambda it: next(it, done), iterator)
if item is done:
break
yield item
yield item # type: ignore[misc]
return
async_collection = await self.astra_env.async_astra_db.collection(
self.collection_name
@@ -116,13 +116,13 @@ class AstraDBLoader(BaseLoader):
},
)
def fetch_results(self, queue: Queue):
def fetch_results(self, queue: Queue): # type: ignore[no-untyped-def]
self.fetch_page_result(queue)
while self.find_options.get("pageState"):
self.fetch_page_result(queue)
queue.put(None)
def fetch_page_result(self, queue: Queue):
def fetch_page_result(self, queue: Queue): # type: ignore[no-untyped-def]
res = self.collection.find(
filter=self.filter,
options=self.find_options,

View File

@@ -64,10 +64,10 @@ class BaseLoader(ABC):
iterator = await run_in_executor(None, self.lazy_load)
done = object()
while True:
doc = await run_in_executor(None, next, iterator, done)
doc = await run_in_executor(None, next, iterator, done) # type: ignore[call-arg, arg-type]
if doc is done:
break
yield doc
yield doc # type: ignore[misc]
class BaseBlobParser(ABC):

View File

@@ -33,14 +33,14 @@ class CassandraLoader(BaseLoader):
page_content_mapper: Callable[[Any], str] = str,
metadata_mapper: Callable[[Any], dict] = lambda _: {},
*,
query_parameters: Union[dict, Sequence] = None,
query_timeout: Optional[float] = _NOT_SET,
query_parameters: Union[dict, Sequence] = None, # type: ignore[assignment]
query_timeout: Optional[float] = _NOT_SET, # type: ignore[assignment]
query_trace: bool = False,
query_custom_payload: dict = None,
query_custom_payload: dict = None, # type: ignore[assignment]
query_execution_profile: Any = _NOT_SET,
query_paging_state: Any = None,
query_host: Host = None,
query_execute_as: str = None,
query_execute_as: str = None, # type: ignore[assignment]
) -> None:
"""
Document Loader for Apache Cassandra.
@@ -85,7 +85,7 @@ class CassandraLoader(BaseLoader):
self.query = f"SELECT * FROM {_keyspace}.{table};"
self.metadata = {"table": table, "keyspace": _keyspace}
else:
self.query = query
self.query = query # type: ignore[assignment]
self.metadata = {}
self.session = session or check_resolve_session(session)

View File

@@ -27,7 +27,7 @@ class UnstructuredCHMLoader(UnstructuredFileLoader):
def _get_elements(self) -> List:
from unstructured.partition.html import partition_html
with CHMParser(self.file_path) as f:
with CHMParser(self.file_path) as f: # type: ignore[arg-type]
return [
partition_html(text=item["content"], **self.unstructured_kwargs)
for item in f.load_all()
@@ -45,10 +45,10 @@ class CHMParser(object):
self.file = chm.CHMFile()
self.file.LoadCHM(path)
def __enter__(self):
def __enter__(self): # type: ignore[no-untyped-def]
return self
def __exit__(self, exc_type, exc_value, traceback):
def __exit__(self, exc_type, exc_value, traceback): # type: ignore[no-untyped-def]
if self.file:
self.file.CloseCHM()

View File

@@ -89,4 +89,4 @@ class AzureAIDocumentIntelligenceLoader(BaseLoader):
blob = Blob.from_path(self.file_path)
yield from self.parser.parse(blob)
else:
yield from self.parser.parse_url(self.url_path)
yield from self.parser.parse_url(self.url_path) # type: ignore[arg-type]

View File

@@ -60,7 +60,7 @@ class MWDumpLoader(BaseLoader):
self.skip_redirects = skip_redirects
self.stop_on_error = stop_on_error
def _load_dump_file(self):
def _load_dump_file(self): # type: ignore[no-untyped-def]
try:
import mwxml
except ImportError as e:
@@ -70,7 +70,7 @@ class MWDumpLoader(BaseLoader):
return mwxml.Dump.from_file(open(self.file_path, encoding=self.encoding))
def _load_single_page_from_dump(self, page) -> Document:
def _load_single_page_from_dump(self, page) -> Document: # type: ignore[no-untyped-def, return]
"""Parse a single page."""
try:
import mwparserfromhell

View File

@@ -11,7 +11,7 @@ from langchain_community.document_loaders.blob_loaders import Blob
class VsdxParser(BaseBlobParser, ABC):
def parse(self, blob: Blob) -> Iterator[Document]:
def parse(self, blob: Blob) -> Iterator[Document]: # type: ignore[override]
"""Parse a vsdx file."""
return self.lazy_parse(blob)
@@ -21,7 +21,7 @@ class VsdxParser(BaseBlobParser, ABC):
with blob.as_bytes_io() as pdf_file_obj:
with zipfile.ZipFile(pdf_file_obj, "r") as zfile:
pages = self.get_pages_content(zfile, blob.source)
pages = self.get_pages_content(zfile, blob.source) # type: ignore[arg-type]
yield from [
Document(
@@ -60,13 +60,13 @@ class VsdxParser(BaseBlobParser, ABC):
if "visio/pages/pages.xml" not in zfile.namelist():
print("WARNING - No pages.xml file found in {}".format(source))
return
return # type: ignore[return-value]
if "visio/pages/_rels/pages.xml.rels" not in zfile.namelist():
print("WARNING - No pages.xml.rels file found in {}".format(source))
return
return # type: ignore[return-value]
if "docProps/app.xml" not in zfile.namelist():
print("WARNING - No app.xml file found in {}".format(source))
return
return # type: ignore[return-value]
pagesxml_content: dict = xmltodict.parse(zfile.read("visio/pages/pages.xml"))
appxml_content: dict = xmltodict.parse(zfile.read("docProps/app.xml"))
@@ -79,7 +79,7 @@ class VsdxParser(BaseBlobParser, ABC):
rel["@Name"].strip() for rel in pagesxml_content["Pages"]["Page"]
]
else:
disordered_names: List[str] = [
disordered_names: List[str] = [ # type: ignore[no-redef]
pagesxml_content["Pages"]["Page"]["@Name"].strip()
]
if isinstance(pagesxmlrels_content["Relationships"]["Relationship"], list):
@@ -88,7 +88,7 @@ class VsdxParser(BaseBlobParser, ABC):
for rel in pagesxmlrels_content["Relationships"]["Relationship"]
]
else:
disordered_paths: List[str] = [
disordered_paths: List[str] = [ # type: ignore[no-redef]
"visio/pages/"
+ pagesxmlrels_content["Relationships"]["Relationship"]["@Target"]
]