partners/unstructured: fix client api_url (#24680)

**Description:** Add empty string default for api_key and change
`server_url` to `url` to match existing loaders.

- [x] **Lint and test**: Run `make format`, `make lint` and `make test`
from the root of the package(s) you've modified. See contribution
guidelines for more: https://python.langchain.com/docs/contributing/

Additional guidelines:
- Make sure optional dependencies are imported within a function.
- Please do not add dependencies to pyproject.toml files (even optional
ones) unless they are required for unit tests.
- Most PRs should not touch more than one package.
- Changes should be backwards compatible.
- If you are adding something to community, do not re-import it in
langchain.

If no one reviews your PR within a few days, please @-mention one of
baskaryan, efriis, eyurtsev, ccurme, vbarda, hwchase17.
This commit is contained in:
John 2024-07-29 14:16:41 -04:00 committed by GitHub
parent bf685c242f
commit 0a2ff40fcc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 45 additions and 6 deletions

View File

@ -88,14 +88,14 @@ class UnstructuredLoader(BaseLoader):
# SDK parameters
api_key: Optional[str] = None,
client: Optional[UnstructuredClient] = None,
server_url: Optional[str] = None,
url: Optional[str] = None,
**kwargs: Any,
):
"""Initialize loader."""
if file_path is not None and file is not None:
raise ValueError("file_path and file cannot be defined simultaneously.")
if client is not None:
disallowed_params = [("api_key", api_key), ("server_url", server_url)]
disallowed_params = [("api_key", api_key), ("url", url)]
bad_params = [
param for param, value in disallowed_params if value is not None
]
@ -106,8 +106,8 @@ class UnstructuredLoader(BaseLoader):
f"params: {', '.join(bad_params)}."
)
unstructured_api_key = api_key or os.getenv("UNSTRUCTURED_API_KEY")
unstructured_url = server_url or os.getenv("UNSTRUCTURED_URL") or _DEFAULT_URL
unstructured_api_key = api_key or os.getenv("UNSTRUCTURED_API_KEY") or ""
unstructured_url = url or os.getenv("UNSTRUCTURED_URL") or _DEFAULT_URL
self.client = client or UnstructuredClient(
api_key_auth=unstructured_api_key, server_url=unstructured_url
@ -165,7 +165,6 @@ class _SingleDocumentLoader(BaseLoader):
file: Optional[IO[bytes]] = None,
partition_via_api: bool = False,
post_processors: Optional[list[Callable[[str], str]]] = None,
# SDK parameters
**kwargs: Any,
):
"""Initialize loader."""

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "langchain-unstructured"
version = "0.1.0"
version = "0.1.1"
description = "An integration package connecting Unstructured and LangChain"
authors = []
readme = "README.md"

View File

@ -7,6 +7,7 @@ import pytest
from unstructured.documents.elements import Text # type: ignore
from langchain_unstructured.document_loaders import (
UnstructuredLoader,
_SingleDocumentLoader, # type: ignore
)
@ -16,6 +17,45 @@ EXAMPLE_DOCS_DIRECTORY = str(
)
# --- UnstructuredLoader.__init__() ---
def test_it_initializes_with_file_path(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.delenv("UNSTRUCTURED_API_KEY", raising=False)
loader = UnstructuredLoader(file_path="dummy_path")
assert loader.file_path == "dummy_path"
assert loader.file is None
assert loader.partition_via_api is False
assert loader.post_processors is None
assert loader.unstructured_kwargs == {}
# A client is always created and passed to _SingleDocumentLoader, but it's not
# used unless partition_via_api=True
assert loader.client is not None
assert loader.client.sdk_configuration.security.api_key_auth == "" # type: ignore
assert (
loader.client.sdk_configuration.server_url == "https://api.unstructuredapp.io"
)
def test_it_initializes_with_env_api_key(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("UNSTRUCTURED_API_KEY", "FAKE_API_KEY")
loader = UnstructuredLoader(file_path="dummy_path")
assert loader.file_path == "dummy_path"
assert loader.file is None
assert loader.partition_via_api is False
assert loader.post_processors is None
assert loader.unstructured_kwargs == {}
assert loader.client is not None
assert loader.client.sdk_configuration.security.api_key_auth == "FAKE_API_KEY" # type: ignore
assert (
loader.client.sdk_configuration.server_url == "https://api.unstructuredapp.io"
)
# --- _SingleDocumentLoader._get_content() ---