mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-16 23:13:31 +00:00
infra: add -p to mkdir in lint steps (#17013)
Previously, if this did not find a mypy cache then it wouldnt run this makes it always run adding mypy ignore comments with existing uncaught issues to unblock other prs --------- Co-authored-by: Erick Friis <erick@langchain.dev> Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
This commit is contained in:
@@ -123,7 +123,7 @@ class AssemblyAIAudioLoaderById(BaseLoader):
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, transcript_id, api_key, transcript_format):
|
||||
def __init__(self, transcript_id, api_key, transcript_format): # type: ignore[no-untyped-def]
|
||||
"""
|
||||
Initializes the AssemblyAI AssemblyAIAudioLoaderById.
|
||||
|
||||
|
@@ -65,7 +65,7 @@ class AstraDBLoader(BaseLoader):
|
||||
return list(self.lazy_load())
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
queue = Queue(self.nb_prefetched)
|
||||
queue = Queue(self.nb_prefetched) # type: ignore[var-annotated]
|
||||
t = threading.Thread(target=self.fetch_results, args=(queue,))
|
||||
t.start()
|
||||
while True:
|
||||
@@ -95,7 +95,7 @@ class AstraDBLoader(BaseLoader):
|
||||
item = await run_in_executor(None, lambda it: next(it, done), iterator)
|
||||
if item is done:
|
||||
break
|
||||
yield item
|
||||
yield item # type: ignore[misc]
|
||||
return
|
||||
async_collection = await self.astra_env.async_astra_db.collection(
|
||||
self.collection_name
|
||||
@@ -116,13 +116,13 @@ class AstraDBLoader(BaseLoader):
|
||||
},
|
||||
)
|
||||
|
||||
def fetch_results(self, queue: Queue):
|
||||
def fetch_results(self, queue: Queue): # type: ignore[no-untyped-def]
|
||||
self.fetch_page_result(queue)
|
||||
while self.find_options.get("pageState"):
|
||||
self.fetch_page_result(queue)
|
||||
queue.put(None)
|
||||
|
||||
def fetch_page_result(self, queue: Queue):
|
||||
def fetch_page_result(self, queue: Queue): # type: ignore[no-untyped-def]
|
||||
res = self.collection.find(
|
||||
filter=self.filter,
|
||||
options=self.find_options,
|
||||
|
@@ -64,10 +64,10 @@ class BaseLoader(ABC):
|
||||
iterator = await run_in_executor(None, self.lazy_load)
|
||||
done = object()
|
||||
while True:
|
||||
doc = await run_in_executor(None, next, iterator, done)
|
||||
doc = await run_in_executor(None, next, iterator, done) # type: ignore[call-arg, arg-type]
|
||||
if doc is done:
|
||||
break
|
||||
yield doc
|
||||
yield doc # type: ignore[misc]
|
||||
|
||||
|
||||
class BaseBlobParser(ABC):
|
||||
|
@@ -33,14 +33,14 @@ class CassandraLoader(BaseLoader):
|
||||
page_content_mapper: Callable[[Any], str] = str,
|
||||
metadata_mapper: Callable[[Any], dict] = lambda _: {},
|
||||
*,
|
||||
query_parameters: Union[dict, Sequence] = None,
|
||||
query_timeout: Optional[float] = _NOT_SET,
|
||||
query_parameters: Union[dict, Sequence] = None, # type: ignore[assignment]
|
||||
query_timeout: Optional[float] = _NOT_SET, # type: ignore[assignment]
|
||||
query_trace: bool = False,
|
||||
query_custom_payload: dict = None,
|
||||
query_custom_payload: dict = None, # type: ignore[assignment]
|
||||
query_execution_profile: Any = _NOT_SET,
|
||||
query_paging_state: Any = None,
|
||||
query_host: Host = None,
|
||||
query_execute_as: str = None,
|
||||
query_execute_as: str = None, # type: ignore[assignment]
|
||||
) -> None:
|
||||
"""
|
||||
Document Loader for Apache Cassandra.
|
||||
@@ -85,7 +85,7 @@ class CassandraLoader(BaseLoader):
|
||||
self.query = f"SELECT * FROM {_keyspace}.{table};"
|
||||
self.metadata = {"table": table, "keyspace": _keyspace}
|
||||
else:
|
||||
self.query = query
|
||||
self.query = query # type: ignore[assignment]
|
||||
self.metadata = {}
|
||||
|
||||
self.session = session or check_resolve_session(session)
|
||||
|
@@ -27,7 +27,7 @@ class UnstructuredCHMLoader(UnstructuredFileLoader):
|
||||
def _get_elements(self) -> List:
|
||||
from unstructured.partition.html import partition_html
|
||||
|
||||
with CHMParser(self.file_path) as f:
|
||||
with CHMParser(self.file_path) as f: # type: ignore[arg-type]
|
||||
return [
|
||||
partition_html(text=item["content"], **self.unstructured_kwargs)
|
||||
for item in f.load_all()
|
||||
@@ -45,10 +45,10 @@ class CHMParser(object):
|
||||
self.file = chm.CHMFile()
|
||||
self.file.LoadCHM(path)
|
||||
|
||||
def __enter__(self):
|
||||
def __enter__(self): # type: ignore[no-untyped-def]
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
def __exit__(self, exc_type, exc_value, traceback): # type: ignore[no-untyped-def]
|
||||
if self.file:
|
||||
self.file.CloseCHM()
|
||||
|
||||
|
@@ -89,4 +89,4 @@ class AzureAIDocumentIntelligenceLoader(BaseLoader):
|
||||
blob = Blob.from_path(self.file_path)
|
||||
yield from self.parser.parse(blob)
|
||||
else:
|
||||
yield from self.parser.parse_url(self.url_path)
|
||||
yield from self.parser.parse_url(self.url_path) # type: ignore[arg-type]
|
||||
|
@@ -60,7 +60,7 @@ class MWDumpLoader(BaseLoader):
|
||||
self.skip_redirects = skip_redirects
|
||||
self.stop_on_error = stop_on_error
|
||||
|
||||
def _load_dump_file(self):
|
||||
def _load_dump_file(self): # type: ignore[no-untyped-def]
|
||||
try:
|
||||
import mwxml
|
||||
except ImportError as e:
|
||||
@@ -70,7 +70,7 @@ class MWDumpLoader(BaseLoader):
|
||||
|
||||
return mwxml.Dump.from_file(open(self.file_path, encoding=self.encoding))
|
||||
|
||||
def _load_single_page_from_dump(self, page) -> Document:
|
||||
def _load_single_page_from_dump(self, page) -> Document: # type: ignore[no-untyped-def, return]
|
||||
"""Parse a single page."""
|
||||
try:
|
||||
import mwparserfromhell
|
||||
|
@@ -11,7 +11,7 @@ from langchain_community.document_loaders.blob_loaders import Blob
|
||||
|
||||
|
||||
class VsdxParser(BaseBlobParser, ABC):
|
||||
def parse(self, blob: Blob) -> Iterator[Document]:
|
||||
def parse(self, blob: Blob) -> Iterator[Document]: # type: ignore[override]
|
||||
"""Parse a vsdx file."""
|
||||
return self.lazy_parse(blob)
|
||||
|
||||
@@ -21,7 +21,7 @@ class VsdxParser(BaseBlobParser, ABC):
|
||||
|
||||
with blob.as_bytes_io() as pdf_file_obj:
|
||||
with zipfile.ZipFile(pdf_file_obj, "r") as zfile:
|
||||
pages = self.get_pages_content(zfile, blob.source)
|
||||
pages = self.get_pages_content(zfile, blob.source) # type: ignore[arg-type]
|
||||
|
||||
yield from [
|
||||
Document(
|
||||
@@ -60,13 +60,13 @@ class VsdxParser(BaseBlobParser, ABC):
|
||||
|
||||
if "visio/pages/pages.xml" not in zfile.namelist():
|
||||
print("WARNING - No pages.xml file found in {}".format(source))
|
||||
return
|
||||
return # type: ignore[return-value]
|
||||
if "visio/pages/_rels/pages.xml.rels" not in zfile.namelist():
|
||||
print("WARNING - No pages.xml.rels file found in {}".format(source))
|
||||
return
|
||||
return # type: ignore[return-value]
|
||||
if "docProps/app.xml" not in zfile.namelist():
|
||||
print("WARNING - No app.xml file found in {}".format(source))
|
||||
return
|
||||
return # type: ignore[return-value]
|
||||
|
||||
pagesxml_content: dict = xmltodict.parse(zfile.read("visio/pages/pages.xml"))
|
||||
appxml_content: dict = xmltodict.parse(zfile.read("docProps/app.xml"))
|
||||
@@ -79,7 +79,7 @@ class VsdxParser(BaseBlobParser, ABC):
|
||||
rel["@Name"].strip() for rel in pagesxml_content["Pages"]["Page"]
|
||||
]
|
||||
else:
|
||||
disordered_names: List[str] = [
|
||||
disordered_names: List[str] = [ # type: ignore[no-redef]
|
||||
pagesxml_content["Pages"]["Page"]["@Name"].strip()
|
||||
]
|
||||
if isinstance(pagesxmlrels_content["Relationships"]["Relationship"], list):
|
||||
@@ -88,7 +88,7 @@ class VsdxParser(BaseBlobParser, ABC):
|
||||
for rel in pagesxmlrels_content["Relationships"]["Relationship"]
|
||||
]
|
||||
else:
|
||||
disordered_paths: List[str] = [
|
||||
disordered_paths: List[str] = [ # type: ignore[no-redef]
|
||||
"visio/pages/"
|
||||
+ pagesxmlrels_content["Relationships"]["Relationship"]["@Target"]
|
||||
]
|
||||
|
Reference in New Issue
Block a user