mirror of
https://github.com/hwchase17/langchain.git
synced 2025-07-16 17:53:37 +00:00
community[patch]: support modin document loader (#18866)
Langchain community document loaders support `pyspark`, `polars`, and `pandas` dataframes but not `modin`'s. This PR addresses this point.
This commit is contained in:
parent
dee256ef5a
commit
8113d612bb
@ -1,4 +1,4 @@
|
|||||||
from typing import Any, Iterator
|
from typing import Any, Iterator, Literal
|
||||||
|
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
|
|
||||||
@ -30,7 +30,12 @@ class BaseDataFrameLoader(BaseLoader):
|
|||||||
class DataFrameLoader(BaseDataFrameLoader):
|
class DataFrameLoader(BaseDataFrameLoader):
|
||||||
"""Load `Pandas` DataFrame."""
|
"""Load `Pandas` DataFrame."""
|
||||||
|
|
||||||
def __init__(self, data_frame: Any, page_content_column: str = "text"):
|
def __init__(
|
||||||
|
self,
|
||||||
|
data_frame: Any,
|
||||||
|
page_content_column: str = "text",
|
||||||
|
engine: Literal["pandas", "modin"] = "pandas",
|
||||||
|
):
|
||||||
"""Initialize with dataframe object.
|
"""Initialize with dataframe object.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -39,7 +44,14 @@ class DataFrameLoader(BaseDataFrameLoader):
|
|||||||
Defaults to "text".
|
Defaults to "text".
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
import pandas as pd
|
if engine == "pandas":
|
||||||
|
import pandas as pd
|
||||||
|
elif engine == "modin":
|
||||||
|
import modin.pandas as pd
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f"Unsupported engine {engine}. Must be one of 'pandas', or 'modin'."
|
||||||
|
)
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"Unable to import pandas, please install with `pip install pandas`."
|
"Unable to import pandas, please install with `pip install pandas`."
|
||||||
|
Loading…
Reference in New Issue
Block a user