1
0
mirror of https://github.com/hwchase17/langchain.git synced 2025-05-02 05:45:47 +00:00
langchain/libs/community/langchain_community/document_loaders
Bagatur a0c2281540
infra: update mypy 1.10, ruff 0.5 ()
```python
"""python scripts/update_mypy_ruff.py"""
import glob
import tomllib
from pathlib import Path

import toml
import subprocess
import re

ROOT_DIR = Path(__file__).parents[1]


def main():
    for path in glob.glob(str(ROOT_DIR / "libs/**/pyproject.toml"), recursive=True):
        print(path)
        with open(path, "rb") as f:
            pyproject = tomllib.load(f)
        try:
            pyproject["tool"]["poetry"]["group"]["typing"]["dependencies"]["mypy"] = (
                "^1.10"
            )
            pyproject["tool"]["poetry"]["group"]["lint"]["dependencies"]["ruff"] = (
                "^0.5"
            )
        except KeyError:
            continue
        with open(path, "w") as f:
            toml.dump(pyproject, f)
        cwd = "/".join(path.split("/")[:-1])
        completed = subprocess.run(
            "poetry lock --no-update; poetry install --with typing; poetry run mypy . --no-color",
            cwd=cwd,
            shell=True,
            capture_output=True,
            text=True,
        )
        logs = completed.stdout.split("\n")

        to_ignore = {}
        for l in logs:
            if re.match("^(.*)\:(\d+)\: error:.*\[(.*)\]", l):
                path, line_no, error_type = re.match(
                    "^(.*)\:(\d+)\: error:.*\[(.*)\]", l
                ).groups()
                if (path, line_no) in to_ignore:
                    to_ignore[(path, line_no)].append(error_type)
                else:
                    to_ignore[(path, line_no)] = [error_type]
        print(len(to_ignore))
        for (error_path, line_no), error_types in to_ignore.items():
            all_errors = ", ".join(error_types)
            full_path = f"{cwd}/{error_path}"
            try:
                with open(full_path, "r") as f:
                    file_lines = f.readlines()
            except FileNotFoundError:
                continue
            file_lines[int(line_no) - 1] = (
                file_lines[int(line_no) - 1][:-1] + f"  # type: ignore[{all_errors}]\n"
            )
            with open(full_path, "w") as f:
                f.write("".join(file_lines))

        subprocess.run(
            "poetry run ruff format .; poetry run ruff --select I --fix .",
            cwd=cwd,
            shell=True,
            capture_output=True,
            text=True,
        )


if __name__ == "__main__":
    main()

```
2024-07-03 10:33:27 -07:00
..
blob_loaders infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
parsers infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
__init__.py infra: rm unused # noqa violations () 2024-05-22 15:21:08 -07:00
acreom.py community: better support of pathlib paths in document loaders () 2024-03-26 11:51:52 -04:00
airbyte_json.py community: better support of pathlib paths in document loaders () 2024-03-26 11:51:52 -04:00
airbyte.py
airtable.py community[patch]: Airtable to allow for addtl params () 2024-06-03 13:05:56 -07:00
apify_dataset.py community[patch]: Update root_validators to use explicit pre=True or pre=False () 2024-07-01 17:13:23 -04:00
arcgis_loader.py
arxiv.py
assemblyai.py community[patch]: docstrings update () 2024-04-11 16:23:27 -04:00
astradb.py (all): update removal in deprecation warnings from 0.2 to 0.3 () 2024-05-03 14:29:36 -04:00
async_html.py community: add **request_kwargs and expect TimeError AsyncHtmlLoader () 2024-06-18 20:02:46 -07:00
athena.py community[minor]: import fix () 2024-04-29 10:32:50 -04:00
azlyrics.py
azure_ai_data.py
azure_blob_storage_container.py
azure_blob_storage_file.py
baiducloud_bos_directory.py
baiducloud_bos_file.py
base_o365.py infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
base.py core: Move document loader interfaces to core () 2024-03-06 13:59:00 -05:00
bibtex.py
bigquery.py (all): update removal in deprecation warnings from 0.2 to 0.3 () 2024-05-03 14:29:36 -04:00
bilibili.py community[patch]: docstrings update () 2024-04-11 16:23:27 -04:00
blackboard.py infra: rm unused # noqa violations () 2024-05-22 15:21:08 -07:00
blockchain.py
brave_search.py
browserbase.py community: updated Browserbase loader () 2024-05-16 08:21:23 -07:00
browserless.py
cassandra.py community[minor]: Add Cassandra ByteStore () 2024-05-23 10:46:23 -04:00
chatgpt.py
chm.py
chromium.py community[minor]: add user agent for web scraping loaders () 2024-06-05 15:20:34 +00:00
college_confidential.py
concurrent.py community[patch]: import flattening fix () 2024-04-10 13:01:19 -04:00
confluence.py docs: Fix wrongly referenced class name in confluence.py () 2024-06-14 14:00:48 -07:00
conllu.py community: better support of pathlib paths in document loaders () 2024-03-26 11:51:52 -04:00
couchbase.py
csv_loader.py docs: Standardize DocumentLoader docstrings () 2024-06-18 03:26:36 +00:00
cube_semantic.py
datadog_logs.py
dataframe.py community[patch]: support modin document loader () 2024-03-10 18:40:04 -07:00
diffbot.py
directory.py community: glob multiple patterns when using DirectoryLoader () 2024-06-18 09:24:50 -07:00
discord.py
doc_intelligence.py docs: community docstring updates () 2024-04-29 17:40:23 -04:00
docugami.py (all): update removal in deprecation warnings from 0.2 to 0.3 () 2024-05-03 14:29:36 -04:00
docusaurus.py infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
dropbox.py
duckdb_loader.py
email.py community[patch]: Small Fix in OutlookMessageLoader (Close the Message once Open) () 2024-06-10 13:08:39 -07:00
epub.py
etherscan.py
evernote.py infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
excel.py infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
facebook_chat.py community: better support of pathlib paths in document loaders () 2024-03-26 11:51:52 -04:00
fauna.py
figma.py
firecrawl.py community[patch]: Update firecrawl api key name () 2024-05-27 21:39:29 +00:00
gcs_directory.py (all): update removal in deprecation warnings from 0.2 to 0.3 () 2024-05-03 14:29:36 -04:00
gcs_file.py (all): update removal in deprecation warnings from 0.2 to 0.3 () 2024-05-03 14:29:36 -04:00
generic.py community[patch]: import flattening fix () 2024-04-10 13:01:19 -04:00
geodataframe.py
git.py Merge pull request 2024-03-06 13:25:14 -05:00
gitbook.py
github.py
glue_catalog.py community[minor]: Add glue catalog loader () 2024-04-16 11:39:23 -04:00
google_speech_to_text.py (all): update removal in deprecation warnings from 0.2 to 0.3 () 2024-05-03 14:29:36 -04:00
googledrive.py (all): update removal in deprecation warnings from 0.2 to 0.3 () 2024-05-03 14:29:36 -04:00
gutenberg.py
helpers.py community: better support of pathlib paths in document loaders () 2024-03-26 11:51:52 -04:00
hn.py
html_bs.py community: better support of pathlib paths in document loaders () 2024-03-26 11:51:52 -04:00
html.py
hugging_face_dataset.py
hugging_face_model.py
ifixit.py
image_captions.py community: better support of pathlib paths in document loaders () 2024-03-26 11:51:52 -04:00
image.py
imsdb.py
iugu.py
joplin.py
json_loader.py docs: Standardize DocumentLoader docstrings () 2024-06-18 03:26:36 +00:00
kinetica_loader.py community[patch]: Kinetica Integrations handled error in querying; quotes in table names; updated gpudb API () 2024-06-11 10:01:26 -04:00
lakefs.py
larksuite.py community[minor]: Add LarkSuite wiki document loader. () 2024-04-29 10:37:50 -04:00
llmsherpa.py community[minor]: add support for llmsherpa () 2024-03-29 16:04:57 -07:00
markdown.py
mastodon.py Merge pull request 2024-03-06 13:23:14 -05:00
max_compute.py
mediawikidump.py
merge.py
mhtml.py community[patch]: upgrade to recent version of mypy () 2024-05-13 14:55:07 -04:00
mintbase.py community[minor]: add mintbase loader to langchain () 2024-04-30 04:11:56 +00:00
modern_treasury.py
mongodb.py community: fix issue with nested field extraction in MongodbLoader () 2024-06-24 19:29:11 +00:00
news.py infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
notebook.py infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
notion.py community: better support of pathlib paths in document loaders () 2024-03-26 11:51:52 -04:00
notiondb.py community[patch]: Fix NotionDBLoader 400 Error by conditionally adding filter parameter () 2024-03-14 13:56:57 +00:00
nuclia.py
obs_directory.py
obs_file.py
obsidian.py community: better support of pathlib paths in document loaders () 2024-03-26 11:51:52 -04:00
odt.py community: better support of pathlib paths in document loaders () 2024-03-26 11:51:52 -04:00
onedrive_file.py
onedrive.py infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
onenote.py community[patch]: upgrade to recent version of mypy () 2024-05-13 14:55:07 -04:00
open_city_data.py
oracleadb_loader.py community[minor]: add oracle autonomous database doc loader integration () 2024-03-26 17:02:18 -07:00
oracleai.py community[minor]: Oraclevs integration () 2024-05-04 03:15:35 +00:00
org_mode.py community: better support of pathlib paths in document loaders () 2024-03-26 11:51:52 -04:00
pdf.py community[patch]: upgrade to recent version of mypy () 2024-05-13 14:55:07 -04:00
pebblo.py infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
polars_dataframe.py
powerpoint.py
psychic.py multiple: Remove unnecessary Ruff suppression comments () 2024-04-30 17:13:48 +00:00
pubmed.py community[patch]: upgrade to recent version of mypy () 2024-05-13 14:55:07 -04:00
pyspark_dataframe.py
python.py community: better support of pathlib paths in document loaders () 2024-03-26 11:51:52 -04:00
quip.py community[major]: lint for usage of xml library () 2024-05-24 15:23:53 +00:00
readthedocs.py
recursive_url_loader.py docs, cli[patch]: document loaders doc template () 2024-06-13 19:28:57 -07:00
reddit.py
roam.py community: better support of pathlib paths in document loaders () 2024-03-26 11:51:52 -04:00
rocksetdb.py
rspace.py
rss.py multiple: Remove unnecessary Ruff suppression comments () 2024-04-30 17:13:48 +00:00
rst.py infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
rtf.py infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
s3_directory.py community[patch]: Skip nested directories when using S3DirectoryLoader () 2024-03-08 16:50:58 -08:00
s3_file.py community[patch]: support unstructured_kwargs for s3 loader () 2024-03-27 22:03:48 +00:00
scrapfly.py infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
sharepoint.py Enhance metadata of sharepointLoader. () 2024-06-21 17:03:38 -07:00
sitemap.py community[patch]: SitemapLoader restrict depth of parsing sitemap (CVE-2024-2965) () 2024-06-14 13:04:40 -04:00
slack_directory.py community: better support of pathlib paths in document loaders () 2024-03-26 11:51:52 -04:00
snowflake_loader.py community[patch]: upgrade to recent version of mypy () 2024-05-13 14:55:07 -04:00
spider.py doc list not empty () 2024-05-20 08:24:06 -07:00
spreedly.py
sql_database.py community[patch]: restore compatibility with SQLAlchemy 1.x () 2024-06-19 17:58:57 +00:00
srt.py community: better support of pathlib paths in document loaders () 2024-03-26 11:51:52 -04:00
stripe.py
surrealdb.py
telegram.py community: better support of pathlib paths in document loaders () 2024-03-26 11:51:52 -04:00
tencent_cos_directory.py
tencent_cos_file.py
tensorflow_datasets.py infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
text.py community: better support of pathlib paths in document loaders () 2024-03-26 11:51:52 -04:00
tidb.py
tomarkdown.py
toml.py
trello.py community: Implement lazy_load() for TrelloLoader () 2024-03-06 13:04:36 -05:00
tsv.py community: better support of pathlib paths in document loaders () 2024-03-26 11:51:52 -04:00
twitter.py
unstructured.py infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
url_playwright.py infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
url_selenium.py infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
url.py infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
vsdx.py community[patch]: import flattening fix () 2024-04-10 13:01:19 -04:00
weather.py infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
web_base.py community[minor]: add user agent for web scraping loaders () 2024-06-05 15:20:34 +00:00
whatsapp_chat.py
wikipedia.py community[patch]: upgrade to recent version of mypy () 2024-05-13 14:55:07 -04:00
word_document.py infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
xml.py infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
xorbits.py
youtube.py infra: update mypy 1.10, ruff 0.5 () 2024-07-03 10:33:27 -07:00
yuque.py