community: Bump ruff version to 0.9 (#29206)

Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
Christophe Bornet
2025-02-08 02:21:10 +01:00
committed by GitHub
parent 30f6c9f5c8
commit 723031d548
166 changed files with 5415 additions and 530 deletions

View File

@@ -21,8 +21,7 @@ class YoutubeAudioLoader(BlobLoader):
import yt_dlp
except ImportError:
raise ImportError(
"yt_dlp package not found, please install it with "
"`pip install yt_dlp`"
"yt_dlp package not found, please install it with `pip install yt_dlp`"
)
# Use yt_dlp to download audio given a YouTube url

View File

@@ -117,6 +117,10 @@ class CHMParser(object):
for item in index:
content = self.load(item["local"])
res.append(
{"name": item["name"], "local": item["local"], "content": content}
{
"name": item["name"],
"local": item["local"],
"content": content,
}
)
return res

View File

@@ -652,7 +652,7 @@ class ConfluenceLoader(BaseLoader):
from PIL import Image # noqa: F401
except ImportError:
raise ImportError(
"`Pillow` package not found, " "please run `pip install Pillow`"
"`Pillow` package not found, please run `pip install Pillow`"
)
# depending on setup you may also need to set the correct path for

View File

@@ -164,9 +164,13 @@ class CSVLoader(BaseLoader):
f"Source column '{self.source_column}' not found in CSV file."
)
content = "\n".join(
f"""{k.strip() if k is not None else k}: {v.strip()
if isinstance(v, str) else ','.join(map(str.strip, v))
if isinstance(v, list) else v}"""
f"""{k.strip() if k is not None else k}: {
v.strip()
if isinstance(v, str)
else ",".join(map(str.strip, v))
if isinstance(v, list)
else v
}"""
for k, v in row.items()
if (
k in self.content_columns

View File

@@ -89,13 +89,13 @@ class AzureAIDocumentIntelligenceLoader(BaseLoader):
file_path is not None or url_path is not None or bytes_source is not None
), "file_path, url_path or bytes_source must be provided"
assert (
api_key is not None or azure_credential is not None
), "Either api_key or azure_credential must be provided."
assert api_key is not None or azure_credential is not None, (
"Either api_key or azure_credential must be provided."
)
assert (
api_key is None or azure_credential is None
), "Only one of api_key or azure_credential should be provided."
assert api_key is None or azure_credential is None, (
"Only one of api_key or azure_credential should be provided."
)
self.file_path = file_path
self.url_path = url_path

View File

@@ -54,7 +54,7 @@ class DropboxLoader(BaseLoader, BaseModel):
try:
from dropbox import Dropbox, exceptions
except ImportError:
raise ImportError("You must run " "`pip install dropbox")
raise ImportError("You must run `pip install dropbox")
try:
dbx = Dropbox(self.dropbox_access_token)
@@ -73,7 +73,7 @@ class DropboxLoader(BaseLoader, BaseModel):
from dropbox import exceptions
from dropbox.files import FileMetadata
except ImportError:
raise ImportError("You must run " "`pip install dropbox")
raise ImportError("You must run `pip install dropbox")
try:
results = dbx.files_list_folder(folder_path, recursive=self.recursive)
@@ -98,7 +98,7 @@ class DropboxLoader(BaseLoader, BaseModel):
try:
from dropbox import exceptions
except ImportError:
raise ImportError("You must run " "`pip install dropbox")
raise ImportError("You must run `pip install dropbox")
try:
file_metadata = dbx.files_get_metadata(file_path)

View File

@@ -65,7 +65,7 @@ class MWDumpLoader(BaseLoader):
import mwxml
except ImportError as e:
raise ImportError(
"Unable to import 'mwxml'. Please install with" " `pip install mwxml`."
"Unable to import 'mwxml'. Please install with `pip install mwxml`."
) from e
return mwxml.Dump.from_file(open(self.file_path, encoding=self.encoding))

View File

@@ -98,7 +98,10 @@ class MongodbLoader(BaseLoader):
# Optionally add database and collection names to metadata
if self.include_db_collection_in_metadata:
metadata.update(
{"database": self.db_name, "collection": self.collection_name}
{
"database": self.db_name,
"collection": self.collection_name,
}
)
# Extract text content from filtered fields or use the entire document

View File

@@ -126,7 +126,7 @@ class NotionDBLoader(BaseLoader):
value = prop_data["url"]
elif prop_type == "unique_id":
value = (
f'{prop_data["unique_id"]["prefix"]}-{prop_data["unique_id"]["number"]}'
f"{prop_data['unique_id']['prefix']}-{prop_data['unique_id']['number']}"
if prop_data["unique_id"]
else None
)

View File

@@ -19,7 +19,12 @@ class NucliaLoader(BaseLoader):
def load(self) -> List[Document]:
"""Load documents."""
data = self.nua.run(
{"action": "pull", "id": self.id, "path": None, "text": None}
{
"action": "pull",
"id": self.id,
"path": None,
"text": None,
}
)
if not data:
return []

View File

@@ -82,8 +82,7 @@ class OracleAutonomousDatabaseLoader(BaseLoader):
import oracledb
except ImportError as e:
raise ImportError(
"Could not import oracledb, "
"please install with 'pip install oracledb'"
"Could not import oracledb, please install with 'pip install oracledb'"
) from e
connect_param = {"user": self.user, "password": self.password, "dsn": self.dsn}
if self.dsn == self.tns_name:

View File

@@ -148,8 +148,7 @@ class AzureOpenAIWhisperParser(BaseBlobParser):
import openai
except ImportError:
raise ImportError(
"openai package not found, please install it with "
"`pip install openai`"
"openai package not found, please install it with `pip install openai`"
)
if is_openai_v1():
@@ -278,14 +277,13 @@ class OpenAIWhisperParser(BaseBlobParser):
import openai
except ImportError:
raise ImportError(
"openai package not found, please install it with "
"`pip install openai`"
"openai package not found, please install it with `pip install openai`"
)
try:
from pydub import AudioSegment
except ImportError:
raise ImportError(
"pydub package not found, please install it with " "`pip install pydub`"
"pydub package not found, please install it with `pip install pydub`"
)
if is_openai_v1():
@@ -402,7 +400,7 @@ class OpenAIWhisperParserLocal(BaseBlobParser):
import torch
except ImportError:
raise ImportError(
"torch package not found, please install it with " "`pip install torch`"
"torch package not found, please install it with `pip install torch`"
)
# Determine the device to use
@@ -533,7 +531,7 @@ class YandexSTTParser(BaseBlobParser):
from pydub import AudioSegment
except ImportError:
raise ImportError(
"pydub package not found, please install it with " "`pip install pydub`"
"pydub package not found, please install it with `pip install pydub`"
)
if self.api_key:

View File

@@ -230,7 +230,7 @@ class DocAIParser(BaseBlobParser):
time_elapsed += check_in_interval_sec
if time_elapsed > timeout_sec:
raise TimeoutError(
"Timeout exceeded! Check operations " f"{operation_names} later!"
f"Timeout exceeded! Check operations {operation_names} later!"
)
logger.debug(".")

View File

@@ -44,7 +44,7 @@ class GrobidParser(BaseBlobParser):
from bs4 import BeautifulSoup
except ImportError:
raise ImportError(
"`bs4` package not found, please install it with " "`pip install bs4`"
"`bs4` package not found, please install it with `pip install bs4`"
)
soup = BeautifulSoup(xml_data, "xml")
sections = soup.find_all("div")

View File

@@ -120,9 +120,7 @@ def _format_inner_image(blob: Blob, content: str, format: str) -> str:
content = content.replace("]", r"\\]")
content = f"![{content}]({source})"
elif format == "html-img":
content = (
f'<img alt="{html.escape(content, quote=True)} ' f'src="{source}" />'
)
content = f'<img alt="{html.escape(content, quote=True)} src="{source}" />'
return content
@@ -361,7 +359,7 @@ class PyPDFParser(BaseBlobParser):
import pypdf
except ImportError:
raise ImportError(
"pypdf package not found, please install it with `pip install pypdf`"
"`pypdf` package not found, please install it with `pip install pypdf`"
)
def _extract_text_from_page(page: pypdf.PageObject) -> str:
@@ -1225,8 +1223,7 @@ class PDFPlumberParser(BaseBlobParser):
import PIL # noqa:F401
except ImportError:
raise ImportError(
"pillow package not found, please install it with"
" `pip install pillow`"
"pillow package not found, please install it with `pip install pillow`"
)
self.text_kwargs = text_kwargs or {}
self.dedupe = dedupe

View File

@@ -36,8 +36,7 @@ class PySparkDataFrameLoader(BaseLoader):
from pyspark.sql import DataFrame, SparkSession
except ImportError:
raise ImportError(
"pyspark is not installed. "
"Please install it with `pip install pyspark`"
"pyspark is not installed. Please install it with `pip install pyspark`"
)
self.spark = (

View File

@@ -40,7 +40,7 @@ class QuipLoader(BaseLoader):
from quip_api.quip import QuipClient
except ImportError:
raise ImportError(
"`quip_api` package not found, please run " "`pip install quip_api`"
"`quip_api` package not found, please run `pip install quip_api`"
)
self.quip_client = QuipClient(

View File

@@ -58,7 +58,7 @@ class RSpaceLoader(BaseLoader):
from rspace_client.eln import eln, field_content
except ImportError:
raise ImportError("You must run " "`pip install rspace_client`")
raise ImportError("You must run `pip install rspace_client`")
try:
eln = eln.ELNClient(self.url, self.api_key)
@@ -66,8 +66,7 @@ class RSpaceLoader(BaseLoader):
except Exception:
raise Exception(
f"Unable to initialize client - is url {self.url} or "
f"api key correct?"
f"Unable to initialize client - is url {self.url} or api key correct?"
)
return eln, field_content.FieldContent