mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-19 13:23:35 +00:00
box: Add searchoptions for BoxRetriever, documentation for BoxRetriever as agent tool (#26181)
Thank you for contributing to LangChain! - [x] **PR title**: "package: description" - Where "package" is whichever of langchain, community, core, experimental, etc. is being modified. Use "docs: ..." for purely docs changes, "templates: ..." for template changes, "infra: ..." for CI changes. - Example: "community: add foobar LLM" Added search options for BoxRetriever and added documentation to demonstrate how to use BoxRetriever as an agent tool - @BoxPlatform - [x] **Add tests and docs**: If you're adding a new integration, please include 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. - [x] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/ Additional guidelines: - Make sure optional dependencies are imported within a function. - Please do not add dependencies to pyproject.toml files (even optional ones) unless they are required for unit tests. - Most PRs should not touch more than one package. - Changes should be backwards compatible. - If you are adding something to community, do not re-import it in langchain. If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, ccurme, vbarda, hwchase17.
This commit is contained in:
parent
e0c36afc3e
commit
acbb4e4701
File diff suppressed because one or more lines are too long
@ -2,7 +2,14 @@ from importlib import metadata
|
||||
|
||||
from langchain_box.document_loaders import BoxLoader
|
||||
from langchain_box.retrievers import BoxRetriever
|
||||
from langchain_box.utilities import BoxAuth, BoxAuthType, _BoxAPIWrapper
|
||||
from langchain_box.utilities.box import (
|
||||
BoxAuth,
|
||||
BoxAuthType,
|
||||
BoxSearchOptions,
|
||||
DocumentFiles,
|
||||
SearchTypeFilter,
|
||||
_BoxAPIWrapper,
|
||||
)
|
||||
|
||||
try:
|
||||
__version__ = metadata.version(__package__)
|
||||
@ -16,6 +23,9 @@ __all__ = [
|
||||
"BoxRetriever",
|
||||
"BoxAuth",
|
||||
"BoxAuthType",
|
||||
"BoxSearchOptions",
|
||||
"DocumentFiles",
|
||||
"SearchTypeFilter",
|
||||
"_BoxAPIWrapper",
|
||||
"__version__",
|
||||
]
|
||||
|
@ -6,7 +6,7 @@ from langchain_core.retrievers import BaseRetriever
|
||||
from pydantic import ConfigDict, model_validator
|
||||
from typing_extensions import Self
|
||||
|
||||
from langchain_box.utilities import BoxAuth, _BoxAPIWrapper
|
||||
from langchain_box.utilities import BoxAuth, BoxSearchOptions, _BoxAPIWrapper
|
||||
|
||||
|
||||
class BoxRetriever(BaseRetriever):
|
||||
@ -128,7 +128,10 @@ class BoxRetriever(BaseRetriever):
|
||||
"""character_limit is an int that caps the number of characters to
|
||||
return per document."""
|
||||
|
||||
_box: Optional[_BoxAPIWrapper] = None
|
||||
box_search_options: Optional[BoxSearchOptions] = None
|
||||
"""Search options to configure BoxRetriever to narrow search results."""
|
||||
|
||||
_box: Optional[_BoxAPIWrapper]
|
||||
|
||||
model_config = ConfigDict(
|
||||
arbitrary_types_allowed=True,
|
||||
@ -150,6 +153,7 @@ class BoxRetriever(BaseRetriever):
|
||||
box_developer_token=self.box_developer_token,
|
||||
box_auth=self.box_auth,
|
||||
character_limit=self.character_limit,
|
||||
box_search_options=self.box_search_options,
|
||||
)
|
||||
|
||||
self._box = _box
|
||||
|
@ -1,5 +1,19 @@
|
||||
"""Box API Utilities."""
|
||||
|
||||
from langchain_box.utilities.box import BoxAuth, BoxAuthType, _BoxAPIWrapper
|
||||
from langchain_box.utilities.box import (
|
||||
BoxAuth,
|
||||
BoxAuthType,
|
||||
BoxSearchOptions,
|
||||
DocumentFiles,
|
||||
SearchTypeFilter,
|
||||
_BoxAPIWrapper,
|
||||
)
|
||||
|
||||
__all__ = ["BoxAuth", "BoxAuthType", "_BoxAPIWrapper"]
|
||||
__all__ = [
|
||||
"BoxAuth",
|
||||
"BoxAuthType",
|
||||
"BoxSearchOptions",
|
||||
"DocumentFiles",
|
||||
"SearchTypeFilter",
|
||||
"_BoxAPIWrapper",
|
||||
]
|
||||
|
@ -470,6 +470,128 @@ class BoxAuth(BaseModel):
|
||||
return self._box_client
|
||||
|
||||
|
||||
class SearchTypeFilter(Enum):
|
||||
"""SearchTypeFilter.
|
||||
|
||||
Enum to limit the what we search.
|
||||
"""
|
||||
|
||||
NAME = "name"
|
||||
"""The name of the item, as defined by its ``name`` field."""
|
||||
|
||||
DESCRIPTION = "description"
|
||||
"""The description of the item, as defined by its ``description`` field."""
|
||||
|
||||
FILE_CONTENT = "file_content"
|
||||
"""The actual content of the file."""
|
||||
|
||||
COMMENTS = "comments"
|
||||
"""The content of any of the comments on a file or folder."""
|
||||
|
||||
TAGS = "tags"
|
||||
"""Any tags that are applied to an item, as defined by its ``tags`` field."""
|
||||
|
||||
|
||||
class BoxSearchOptions(BaseModel):
|
||||
ancestor_folder_ids: Optional[List[str]] = None
|
||||
"""Limits the search results to items within the given list of folders,
|
||||
defined as a comma separated lists of folder IDs."""
|
||||
|
||||
search_type_filter: Optional[List[SearchTypeFilter]] = None
|
||||
"""Limits the search results to any items that match the search query for a
|
||||
specific part of the file, for example the file description.
|
||||
|
||||
Content types are defined as a comma separated lists of Box recognized
|
||||
content types. The allowed content types are as follows. Default is all."""
|
||||
|
||||
created_date_range: Optional[List[str]] = None
|
||||
"""Limits the search results to any items created within a given date range.
|
||||
|
||||
Date ranges are defined as comma separated RFC3339 timestamps.
|
||||
|
||||
If the the start date is omitted (,2014-05-17T13:35:01-07:00) anything
|
||||
created before the end date will be returned.
|
||||
|
||||
If the end date is omitted (2014-05-15T13:35:01-07:00,) the current
|
||||
date will be used as the end date instead."""
|
||||
|
||||
file_extensions: Optional[List[DocumentFiles]] = None
|
||||
"""Limits the search results to any files that match any of the provided
|
||||
file extensions. This list is a comma-separated list of
|
||||
``langchain_box.utilities.DocumentFiles`` entries"""
|
||||
|
||||
k: Optional[int] = 100
|
||||
"""Defines the maximum number of items to return. Defaults to 100, maximum
|
||||
is 200."""
|
||||
|
||||
size_range: Optional[List[int]] = None
|
||||
"""Limits the search results to any items with a size within a given file
|
||||
size range. This applied to files and folders.
|
||||
|
||||
Size ranges are defined as comma separated list of a lower and upper
|
||||
byte size limit (inclusive).
|
||||
|
||||
The upper and lower bound can be omitted to create open ranges."""
|
||||
|
||||
updated_date_range: Optional[List[str]] = None
|
||||
"""Limits the search results to any items updated within a given date range.
|
||||
|
||||
Date ranges are defined as comma separated RFC3339 timestamps.
|
||||
|
||||
If the start date is omitted (,2014-05-17T13:35:01-07:00) anything
|
||||
updated before the end date will be returned.
|
||||
|
||||
If the end date is omitted (2014-05-15T13:35:01-07:00,) the current
|
||||
date will be used as the end date instead."""
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
use_enum_values = True
|
||||
extra = "allow"
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_search_options(self) -> Self:
|
||||
"""Validate k is between 1 and 200"""
|
||||
if self.k > 200 or self.k < 1: # type: ignore[operator]
|
||||
raise ValueError(
|
||||
f"Invalid setting of k {self.k}. " "Value must be between 1 and 200."
|
||||
)
|
||||
|
||||
"""Validate created_date_range start date is before end date"""
|
||||
if self.created_date_range:
|
||||
if (
|
||||
self.created_date_range[0] is None # type: ignore[index]
|
||||
or self.created_date_range[0] == "" # type: ignore[index]
|
||||
or self.created_date_range[1] is None # type: ignore[index]
|
||||
or self.created_date_range[1] == "" # type: ignore[index]
|
||||
):
|
||||
pass
|
||||
else:
|
||||
if (
|
||||
self.created_date_range[0] # type: ignore[index]
|
||||
> self.created_date_range[1] # type: ignore[index]
|
||||
):
|
||||
raise ValueError("Start date must be before end date.")
|
||||
|
||||
"""Validate updated_date_range start date is before end date"""
|
||||
if self.updated_date_range:
|
||||
if (
|
||||
self.updated_date_range[0] is None # type: ignore[index]
|
||||
or self.updated_date_range[0] == "" # type: ignore[index]
|
||||
or self.updated_date_range[1] is None # type: ignore[index]
|
||||
or self.updated_date_range[1] == "" # type: ignore[index]
|
||||
):
|
||||
pass
|
||||
else:
|
||||
if (
|
||||
self.updated_date_range[0] # type: ignore[index]
|
||||
> self.updated_date_range[1] # type: ignore[index]
|
||||
):
|
||||
raise ValueError("Start date must be before end date.")
|
||||
|
||||
return self
|
||||
|
||||
|
||||
class _BoxAPIWrapper(BaseModel):
|
||||
"""Wrapper for Box API."""
|
||||
|
||||
@ -485,7 +607,10 @@ class _BoxAPIWrapper(BaseModel):
|
||||
"""character_limit is an int that caps the number of characters to
|
||||
return per document."""
|
||||
|
||||
_box: Optional[box_sdk_gen.BoxClient] = None
|
||||
box_search_options: Optional[BoxSearchOptions] = None
|
||||
"""Search options to configure BoxRetriever to narrow search results."""
|
||||
|
||||
_box: Optional[box_sdk_gen.BoxClient]
|
||||
|
||||
model_config = ConfigDict(
|
||||
arbitrary_types_allowed=True,
|
||||
@ -636,9 +761,25 @@ class _BoxAPIWrapper(BaseModel):
|
||||
files = []
|
||||
|
||||
try:
|
||||
results = self._box.search.search_for_content( # type: ignore[union-attr]
|
||||
query=query, fields=["id", "type", "extension"]
|
||||
)
|
||||
results = None
|
||||
|
||||
if self.box_search_options is None:
|
||||
results = self._box.search.search_for_content( # type: ignore[union-attr]
|
||||
query=query, fields=["id", "type", "extension"], type="file"
|
||||
)
|
||||
else:
|
||||
results = self._box.search.search_for_content( # type: ignore[union-attr]
|
||||
query=query,
|
||||
fields=["id", "type", "extension"],
|
||||
type="file",
|
||||
ancestor_folder_ids=self.box_search_options.ancestor_folder_ids, # type: ignore[union-attr]
|
||||
content_types=self.box_search_options.search_type_filter, # type: ignore[union-attr]
|
||||
created_at_range=self.box_search_options.created_date_range, # type: ignore[union-attr]
|
||||
file_extensions=self.box_search_options.file_extensions, # type: ignore[union-attr]
|
||||
limit=self.box_search_options.k, # type: ignore[union-attr]
|
||||
size_range=self.box_search_options.size_range, # type: ignore[union-attr]
|
||||
updated_at_range=self.box_search_options.updated_date_range, # type: ignore[union-attr]
|
||||
)
|
||||
|
||||
if results.entries is None or len(results.entries) <= 0:
|
||||
return None # type: ignore[return-value]
|
||||
|
@ -3,7 +3,13 @@ from langchain_core.documents import Document
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from langchain_box.retrievers import BoxRetriever
|
||||
from langchain_box.utilities import BoxAuth, BoxAuthType
|
||||
from langchain_box.utilities import (
|
||||
BoxAuth,
|
||||
BoxAuthType,
|
||||
BoxSearchOptions,
|
||||
DocumentFiles,
|
||||
SearchTypeFilter,
|
||||
)
|
||||
|
||||
|
||||
# Test auth types
|
||||
@ -62,6 +68,44 @@ def test_search(mocker: MockerFixture) -> None:
|
||||
]
|
||||
|
||||
|
||||
# test search options
|
||||
def test_search_options(mocker: MockerFixture) -> None:
|
||||
mocker.patch(
|
||||
"langchain_box.utilities._BoxAPIWrapper.search_box",
|
||||
return_value=(
|
||||
[
|
||||
Document(
|
||||
page_content="Test file mode\ndocument contents",
|
||||
metadata={"title": "Testing Files"},
|
||||
)
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
box_search_options = BoxSearchOptions(
|
||||
ancestor_folder_ids=["box_folder_id"],
|
||||
search_type_filter=[SearchTypeFilter.FILE_CONTENT],
|
||||
created_date_range=["2023-01-01T00:00:00-07:00", "2024-08-01T00:00:00-07:00,"],
|
||||
file_extensions=[DocumentFiles.DOCX, DocumentFiles.PDF],
|
||||
k=200,
|
||||
size_range=[1, 1000000],
|
||||
updated_date_range=None,
|
||||
)
|
||||
|
||||
retriever = BoxRetriever( # type: ignore[call-arg]
|
||||
box_developer_token="box_developer_token", box_search_options=box_search_options
|
||||
)
|
||||
|
||||
documents = retriever.invoke("query")
|
||||
|
||||
assert documents == [
|
||||
Document(
|
||||
page_content="Test file mode\ndocument contents",
|
||||
metadata={"title": "Testing Files"},
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
# test ai retrieval
|
||||
def test_ai(mocker: MockerFixture) -> None:
|
||||
mocker.patch(
|
||||
|
@ -5,6 +5,9 @@ EXPECTED_ALL = [
|
||||
"BoxRetriever",
|
||||
"BoxAuth",
|
||||
"BoxAuthType",
|
||||
"BoxSearchOptions",
|
||||
"DocumentFiles",
|
||||
"SearchTypeFilter",
|
||||
"_BoxAPIWrapper",
|
||||
"__version__",
|
||||
]
|
||||
|
Loading…
Reference in New Issue
Block a user