diff --git a/libs/partners/unstructured/langchain_unstructured/document_loaders.py b/libs/partners/unstructured/langchain_unstructured/document_loaders.py index ebe8e5d3b76..39452aa2c6b 100644 --- a/libs/partners/unstructured/langchain_unstructured/document_loaders.py +++ b/libs/partners/unstructured/langchain_unstructured/document_loaders.py @@ -88,14 +88,14 @@ class UnstructuredLoader(BaseLoader): # SDK parameters api_key: Optional[str] = None, client: Optional[UnstructuredClient] = None, - server_url: Optional[str] = None, + url: Optional[str] = None, **kwargs: Any, ): """Initialize loader.""" if file_path is not None and file is not None: raise ValueError("file_path and file cannot be defined simultaneously.") if client is not None: - disallowed_params = [("api_key", api_key), ("server_url", server_url)] + disallowed_params = [("api_key", api_key), ("url", url)] bad_params = [ param for param, value in disallowed_params if value is not None ] @@ -106,8 +106,8 @@ class UnstructuredLoader(BaseLoader): f"params: {', '.join(bad_params)}." ) - unstructured_api_key = api_key or os.getenv("UNSTRUCTURED_API_KEY") - unstructured_url = server_url or os.getenv("UNSTRUCTURED_URL") or _DEFAULT_URL + unstructured_api_key = api_key or os.getenv("UNSTRUCTURED_API_KEY") or "" + unstructured_url = url or os.getenv("UNSTRUCTURED_URL") or _DEFAULT_URL self.client = client or UnstructuredClient( api_key_auth=unstructured_api_key, server_url=unstructured_url @@ -165,7 +165,6 @@ class _SingleDocumentLoader(BaseLoader): file: Optional[IO[bytes]] = None, partition_via_api: bool = False, post_processors: Optional[list[Callable[[str], str]]] = None, - # SDK parameters **kwargs: Any, ): """Initialize loader.""" diff --git a/libs/partners/unstructured/pyproject.toml b/libs/partners/unstructured/pyproject.toml index a6a41998ad4..2e8c42a23b3 100644 --- a/libs/partners/unstructured/pyproject.toml +++ b/libs/partners/unstructured/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langchain-unstructured" -version = "0.1.0" +version = "0.1.1" description = "An integration package connecting Unstructured and LangChain" authors = [] readme = "README.md" diff --git a/libs/partners/unstructured/tests/unit_tests/test_document_loaders.py b/libs/partners/unstructured/tests/unit_tests/test_document_loaders.py index 4ae5840baec..45f798e751d 100644 --- a/libs/partners/unstructured/tests/unit_tests/test_document_loaders.py +++ b/libs/partners/unstructured/tests/unit_tests/test_document_loaders.py @@ -7,6 +7,7 @@ import pytest from unstructured.documents.elements import Text # type: ignore from langchain_unstructured.document_loaders import ( + UnstructuredLoader, _SingleDocumentLoader, # type: ignore ) @@ -16,6 +17,45 @@ EXAMPLE_DOCS_DIRECTORY = str( ) +# --- UnstructuredLoader.__init__() --- + + +def test_it_initializes_with_file_path(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("UNSTRUCTURED_API_KEY", raising=False) + + loader = UnstructuredLoader(file_path="dummy_path") + + assert loader.file_path == "dummy_path" + assert loader.file is None + assert loader.partition_via_api is False + assert loader.post_processors is None + assert loader.unstructured_kwargs == {} + # A client is always created and passed to _SingleDocumentLoader, but it's not + # used unless partition_via_api=True + assert loader.client is not None + assert loader.client.sdk_configuration.security.api_key_auth == "" # type: ignore + assert ( + loader.client.sdk_configuration.server_url == "https://api.unstructuredapp.io" + ) + + +def test_it_initializes_with_env_api_key(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("UNSTRUCTURED_API_KEY", "FAKE_API_KEY") + + loader = UnstructuredLoader(file_path="dummy_path") + + assert loader.file_path == "dummy_path" + assert loader.file is None + assert loader.partition_via_api is False + assert loader.post_processors is None + assert loader.unstructured_kwargs == {} + assert loader.client is not None + assert loader.client.sdk_configuration.security.api_key_auth == "FAKE_API_KEY" # type: ignore + assert ( + loader.client.sdk_configuration.server_url == "https://api.unstructuredapp.io" + ) + + # --- _SingleDocumentLoader._get_content() ---