mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-21 18:39:57 +00:00
community: Bump ruff version to 0.9 (#29206)
Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
committed by
GitHub
parent
30f6c9f5c8
commit
723031d548
@@ -21,8 +21,7 @@ class YoutubeAudioLoader(BlobLoader):
|
||||
import yt_dlp
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"yt_dlp package not found, please install it with "
|
||||
"`pip install yt_dlp`"
|
||||
"yt_dlp package not found, please install it with `pip install yt_dlp`"
|
||||
)
|
||||
|
||||
# Use yt_dlp to download audio given a YouTube url
|
||||
|
@@ -117,6 +117,10 @@ class CHMParser(object):
|
||||
for item in index:
|
||||
content = self.load(item["local"])
|
||||
res.append(
|
||||
{"name": item["name"], "local": item["local"], "content": content}
|
||||
{
|
||||
"name": item["name"],
|
||||
"local": item["local"],
|
||||
"content": content,
|
||||
}
|
||||
)
|
||||
return res
|
||||
|
@@ -652,7 +652,7 @@ class ConfluenceLoader(BaseLoader):
|
||||
from PIL import Image # noqa: F401
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`Pillow` package not found, " "please run `pip install Pillow`"
|
||||
"`Pillow` package not found, please run `pip install Pillow`"
|
||||
)
|
||||
|
||||
# depending on setup you may also need to set the correct path for
|
||||
|
@@ -164,9 +164,13 @@ class CSVLoader(BaseLoader):
|
||||
f"Source column '{self.source_column}' not found in CSV file."
|
||||
)
|
||||
content = "\n".join(
|
||||
f"""{k.strip() if k is not None else k}: {v.strip()
|
||||
if isinstance(v, str) else ','.join(map(str.strip, v))
|
||||
if isinstance(v, list) else v}"""
|
||||
f"""{k.strip() if k is not None else k}: {
|
||||
v.strip()
|
||||
if isinstance(v, str)
|
||||
else ",".join(map(str.strip, v))
|
||||
if isinstance(v, list)
|
||||
else v
|
||||
}"""
|
||||
for k, v in row.items()
|
||||
if (
|
||||
k in self.content_columns
|
||||
|
@@ -89,13 +89,13 @@ class AzureAIDocumentIntelligenceLoader(BaseLoader):
|
||||
file_path is not None or url_path is not None or bytes_source is not None
|
||||
), "file_path, url_path or bytes_source must be provided"
|
||||
|
||||
assert (
|
||||
api_key is not None or azure_credential is not None
|
||||
), "Either api_key or azure_credential must be provided."
|
||||
assert api_key is not None or azure_credential is not None, (
|
||||
"Either api_key or azure_credential must be provided."
|
||||
)
|
||||
|
||||
assert (
|
||||
api_key is None or azure_credential is None
|
||||
), "Only one of api_key or azure_credential should be provided."
|
||||
assert api_key is None or azure_credential is None, (
|
||||
"Only one of api_key or azure_credential should be provided."
|
||||
)
|
||||
|
||||
self.file_path = file_path
|
||||
self.url_path = url_path
|
||||
|
@@ -54,7 +54,7 @@ class DropboxLoader(BaseLoader, BaseModel):
|
||||
try:
|
||||
from dropbox import Dropbox, exceptions
|
||||
except ImportError:
|
||||
raise ImportError("You must run " "`pip install dropbox")
|
||||
raise ImportError("You must run `pip install dropbox")
|
||||
|
||||
try:
|
||||
dbx = Dropbox(self.dropbox_access_token)
|
||||
@@ -73,7 +73,7 @@ class DropboxLoader(BaseLoader, BaseModel):
|
||||
from dropbox import exceptions
|
||||
from dropbox.files import FileMetadata
|
||||
except ImportError:
|
||||
raise ImportError("You must run " "`pip install dropbox")
|
||||
raise ImportError("You must run `pip install dropbox")
|
||||
|
||||
try:
|
||||
results = dbx.files_list_folder(folder_path, recursive=self.recursive)
|
||||
@@ -98,7 +98,7 @@ class DropboxLoader(BaseLoader, BaseModel):
|
||||
try:
|
||||
from dropbox import exceptions
|
||||
except ImportError:
|
||||
raise ImportError("You must run " "`pip install dropbox")
|
||||
raise ImportError("You must run `pip install dropbox")
|
||||
|
||||
try:
|
||||
file_metadata = dbx.files_get_metadata(file_path)
|
||||
|
@@ -65,7 +65,7 @@ class MWDumpLoader(BaseLoader):
|
||||
import mwxml
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Unable to import 'mwxml'. Please install with" " `pip install mwxml`."
|
||||
"Unable to import 'mwxml'. Please install with `pip install mwxml`."
|
||||
) from e
|
||||
|
||||
return mwxml.Dump.from_file(open(self.file_path, encoding=self.encoding))
|
||||
|
@@ -98,7 +98,10 @@ class MongodbLoader(BaseLoader):
|
||||
# Optionally add database and collection names to metadata
|
||||
if self.include_db_collection_in_metadata:
|
||||
metadata.update(
|
||||
{"database": self.db_name, "collection": self.collection_name}
|
||||
{
|
||||
"database": self.db_name,
|
||||
"collection": self.collection_name,
|
||||
}
|
||||
)
|
||||
|
||||
# Extract text content from filtered fields or use the entire document
|
||||
|
@@ -126,7 +126,7 @@ class NotionDBLoader(BaseLoader):
|
||||
value = prop_data["url"]
|
||||
elif prop_type == "unique_id":
|
||||
value = (
|
||||
f'{prop_data["unique_id"]["prefix"]}-{prop_data["unique_id"]["number"]}'
|
||||
f"{prop_data['unique_id']['prefix']}-{prop_data['unique_id']['number']}"
|
||||
if prop_data["unique_id"]
|
||||
else None
|
||||
)
|
||||
|
@@ -19,7 +19,12 @@ class NucliaLoader(BaseLoader):
|
||||
def load(self) -> List[Document]:
|
||||
"""Load documents."""
|
||||
data = self.nua.run(
|
||||
{"action": "pull", "id": self.id, "path": None, "text": None}
|
||||
{
|
||||
"action": "pull",
|
||||
"id": self.id,
|
||||
"path": None,
|
||||
"text": None,
|
||||
}
|
||||
)
|
||||
if not data:
|
||||
return []
|
||||
|
@@ -82,8 +82,7 @@ class OracleAutonomousDatabaseLoader(BaseLoader):
|
||||
import oracledb
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Could not import oracledb, "
|
||||
"please install with 'pip install oracledb'"
|
||||
"Could not import oracledb, please install with 'pip install oracledb'"
|
||||
) from e
|
||||
connect_param = {"user": self.user, "password": self.password, "dsn": self.dsn}
|
||||
if self.dsn == self.tns_name:
|
||||
|
@@ -148,8 +148,7 @@ class AzureOpenAIWhisperParser(BaseBlobParser):
|
||||
import openai
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"openai package not found, please install it with "
|
||||
"`pip install openai`"
|
||||
"openai package not found, please install it with `pip install openai`"
|
||||
)
|
||||
|
||||
if is_openai_v1():
|
||||
@@ -278,14 +277,13 @@ class OpenAIWhisperParser(BaseBlobParser):
|
||||
import openai
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"openai package not found, please install it with "
|
||||
"`pip install openai`"
|
||||
"openai package not found, please install it with `pip install openai`"
|
||||
)
|
||||
try:
|
||||
from pydub import AudioSegment
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"pydub package not found, please install it with " "`pip install pydub`"
|
||||
"pydub package not found, please install it with `pip install pydub`"
|
||||
)
|
||||
|
||||
if is_openai_v1():
|
||||
@@ -402,7 +400,7 @@ class OpenAIWhisperParserLocal(BaseBlobParser):
|
||||
import torch
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"torch package not found, please install it with " "`pip install torch`"
|
||||
"torch package not found, please install it with `pip install torch`"
|
||||
)
|
||||
|
||||
# Determine the device to use
|
||||
@@ -533,7 +531,7 @@ class YandexSTTParser(BaseBlobParser):
|
||||
from pydub import AudioSegment
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"pydub package not found, please install it with " "`pip install pydub`"
|
||||
"pydub package not found, please install it with `pip install pydub`"
|
||||
)
|
||||
|
||||
if self.api_key:
|
||||
|
@@ -230,7 +230,7 @@ class DocAIParser(BaseBlobParser):
|
||||
time_elapsed += check_in_interval_sec
|
||||
if time_elapsed > timeout_sec:
|
||||
raise TimeoutError(
|
||||
"Timeout exceeded! Check operations " f"{operation_names} later!"
|
||||
f"Timeout exceeded! Check operations {operation_names} later!"
|
||||
)
|
||||
logger.debug(".")
|
||||
|
||||
|
@@ -44,7 +44,7 @@ class GrobidParser(BaseBlobParser):
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`bs4` package not found, please install it with " "`pip install bs4`"
|
||||
"`bs4` package not found, please install it with `pip install bs4`"
|
||||
)
|
||||
soup = BeautifulSoup(xml_data, "xml")
|
||||
sections = soup.find_all("div")
|
||||
|
@@ -120,9 +120,7 @@ def _format_inner_image(blob: Blob, content: str, format: str) -> str:
|
||||
content = content.replace("]", r"\\]")
|
||||
content = f""
|
||||
elif format == "html-img":
|
||||
content = (
|
||||
f'<img alt="{html.escape(content, quote=True)} ' f'src="{source}" />'
|
||||
)
|
||||
content = f'<img alt="{html.escape(content, quote=True)} src="{source}" />'
|
||||
return content
|
||||
|
||||
|
||||
@@ -361,7 +359,7 @@ class PyPDFParser(BaseBlobParser):
|
||||
import pypdf
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"pypdf package not found, please install it with `pip install pypdf`"
|
||||
"`pypdf` package not found, please install it with `pip install pypdf`"
|
||||
)
|
||||
|
||||
def _extract_text_from_page(page: pypdf.PageObject) -> str:
|
||||
@@ -1225,8 +1223,7 @@ class PDFPlumberParser(BaseBlobParser):
|
||||
import PIL # noqa:F401
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"pillow package not found, please install it with"
|
||||
" `pip install pillow`"
|
||||
"pillow package not found, please install it with `pip install pillow`"
|
||||
)
|
||||
self.text_kwargs = text_kwargs or {}
|
||||
self.dedupe = dedupe
|
||||
|
@@ -36,8 +36,7 @@ class PySparkDataFrameLoader(BaseLoader):
|
||||
from pyspark.sql import DataFrame, SparkSession
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"pyspark is not installed. "
|
||||
"Please install it with `pip install pyspark`"
|
||||
"pyspark is not installed. Please install it with `pip install pyspark`"
|
||||
)
|
||||
|
||||
self.spark = (
|
||||
|
@@ -40,7 +40,7 @@ class QuipLoader(BaseLoader):
|
||||
from quip_api.quip import QuipClient
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"`quip_api` package not found, please run " "`pip install quip_api`"
|
||||
"`quip_api` package not found, please run `pip install quip_api`"
|
||||
)
|
||||
|
||||
self.quip_client = QuipClient(
|
||||
|
@@ -58,7 +58,7 @@ class RSpaceLoader(BaseLoader):
|
||||
from rspace_client.eln import eln, field_content
|
||||
|
||||
except ImportError:
|
||||
raise ImportError("You must run " "`pip install rspace_client`")
|
||||
raise ImportError("You must run `pip install rspace_client`")
|
||||
|
||||
try:
|
||||
eln = eln.ELNClient(self.url, self.api_key)
|
||||
@@ -66,8 +66,7 @@ class RSpaceLoader(BaseLoader):
|
||||
|
||||
except Exception:
|
||||
raise Exception(
|
||||
f"Unable to initialize client - is url {self.url} or "
|
||||
f"api key correct?"
|
||||
f"Unable to initialize client - is url {self.url} or api key correct?"
|
||||
)
|
||||
|
||||
return eln, field_content.FieldContent
|
||||
|
Reference in New Issue
Block a user