langchain/libs/community/tests/unit_tests/document_loaders/test_pebblo.py
Rahul Triptahi 820b713086
community[minor]: Add support for Pebblo cloud_api_key in PebbloSafeLoader (#19855)
**Description**:
_PebbloSafeLoader_: Add support for pebblo's cloud api-key in
PebbloSafeLoader

- This Pull request enables PebbloSafeLoader to accept pebblo's cloud
api-key and send the semantic classification data to pebblo cloud.

**Documentation**: Updated 
**Unit test**: Added
**Issue**: NA
**Dependencies**: - None
**Twitter handle**: @rahul_tripathi2

Signed-off-by: Rahul Tripathi <rauhl.psit.ec@gmail.com>
Co-authored-by: Rahul Tripathi <rauhl.psit.ec@gmail.com>
2024-04-08 11:10:04 -04:00

135 lines
3.6 KiB
Python

import os
from pathlib import Path
from typing import Dict
import pytest
from langchain_core.documents import Document
from pytest_mock import MockerFixture
from langchain_community.document_loaders import CSVLoader, PyPDFLoader
EXAMPLE_DOCS_DIRECTORY = str(Path(__file__).parent.parent.parent / "examples/")
class MockResponse:
def __init__(self, json_data: Dict, status_code: int):
self.json_data = json_data
self.status_code = status_code
def json(self) -> Dict:
return self.json_data
def test_pebblo_import() -> None:
"""Test that the Pebblo safe loader can be imported."""
from langchain_community.document_loaders import PebbloSafeLoader # noqa: F401
def test_empty_filebased_loader(mocker: MockerFixture) -> None:
"""Test basic file based csv loader."""
# Setup
from langchain_community.document_loaders import PebbloSafeLoader
mocker.patch.multiple(
"requests",
get=MockResponse(json_data={"data": ""}, status_code=200),
post=MockResponse(json_data={"data": ""}, status_code=200),
)
file_path = os.path.join(EXAMPLE_DOCS_DIRECTORY, "test_empty.csv")
expected_docs: list = []
# Exercise
loader = PebbloSafeLoader(
CSVLoader(file_path=file_path),
"dummy_app_name",
"dummy_owner",
"dummy_description",
)
result = loader.load()
# Assert
assert result == expected_docs
def test_csv_loader_load_valid_data(mocker: MockerFixture) -> None:
# Setup
from langchain_community.document_loaders import PebbloSafeLoader
mocker.patch.multiple(
"requests",
get=MockResponse(json_data={"data": ""}, status_code=200),
post=MockResponse(json_data={"data": ""}, status_code=200),
)
file_path = os.path.join(EXAMPLE_DOCS_DIRECTORY, "test_nominal.csv")
expected_docs = [
Document(
page_content="column1: value1\ncolumn2: value2\ncolumn3: value3",
metadata={"source": file_path, "row": 0},
),
Document(
page_content="column1: value4\ncolumn2: value5\ncolumn3: value6",
metadata={"source": file_path, "row": 1},
),
]
# Exercise
loader = PebbloSafeLoader(
CSVLoader(file_path=file_path),
"dummy_app_name",
"dummy_owner",
"dummy_description",
)
result = loader.load()
# Assert
assert result == expected_docs
@pytest.mark.requires("pypdf")
def test_pdf_lazy_load(mocker: MockerFixture) -> None:
# Setup
from langchain_community.document_loaders import PebbloSafeLoader
mocker.patch.multiple(
"requests",
get=MockResponse(json_data={"data": ""}, status_code=200),
post=MockResponse(json_data={"data": ""}, status_code=200),
)
file_path = os.path.join(
EXAMPLE_DOCS_DIRECTORY, "multi-page-forms-sample-2-page.pdf"
)
# Exercise
loader = PebbloSafeLoader(
PyPDFLoader(file_path=file_path),
"dummy_app_name",
"dummy_owner",
"dummy_description",
)
result = list(loader.lazy_load())
# Assert
assert len(result) == 2
def test_pebblo_safe_loader_api_key() -> None:
# Setup
from langchain_community.document_loaders import PebbloSafeLoader
file_path = os.path.join(EXAMPLE_DOCS_DIRECTORY, "test_empty.csv")
api_key = "dummy_api_key"
# Exercise
loader = PebbloSafeLoader(
CSVLoader(file_path=file_path),
"dummy_app_name",
"dummy_owner",
"dummy_description",
api_key=api_key,
)
# Assert
assert loader.api_key == api_key