langchain/libs/community/langchain_community/document_loaders/tsv.py
Erick Friis 600b7bdd61
all: test 3.13 ci (#27197)
Co-authored-by: Bagatur <baskaryan@gmail.com>
Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
2024-10-25 12:56:58 -07:00

42 lines
1.4 KiB
Python

from pathlib import Path
from typing import Any, List, Union
from langchain_community.document_loaders.unstructured import (
UnstructuredFileLoader,
validate_unstructured_version,
)
class UnstructuredTSVLoader(UnstructuredFileLoader):
"""Load `TSV` files using `Unstructured`.
Like other
Unstructured loaders, UnstructuredTSVLoader can be used in both
"single" and "elements" mode. If you use the loader in "elements"
mode, the TSV file will be a single Unstructured Table element.
If you use the loader in "elements" mode, an HTML representation
of the table will be available in the "text_as_html" key in the
document metadata.
Examples
--------
from langchain_community.document_loaders.tsv import UnstructuredTSVLoader
loader = UnstructuredTSVLoader("stanley-cups.tsv", mode="elements")
docs = loader.load()
"""
def __init__(
self,
file_path: Union[str, Path],
mode: str = "single",
**unstructured_kwargs: Any,
):
validate_unstructured_version(min_unstructured_version="0.7.6")
super().__init__(file_path=file_path, mode=mode, **unstructured_kwargs)
def _get_elements(self) -> List:
from unstructured.partition.tsv import partition_tsv
return partition_tsv(filename=self.file_path, **self.unstructured_kwargs) # type: ignore[arg-type]