mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-27 00:48:45 +00:00
box: add retrievers and fix docs (#25633)
Thank you for contributing to LangChain! **Description:** Adding `BoxRetriever` for langchain_box. This retriever handles two use cases: * Retrieve all documents that match a full-text search * Retrieve the answer to a Box AI prompt as a Document **Twitter handle:** @BoxPlatform - [x] **Add tests and docs**: If you're adding a new integration, please include 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. - [x] **Lint and test**: Run `make format`, `make lint` and `make test` from the root of the package(s) you've modified. See contribution guidelines for more: https://python.langchain.com/docs/contributing/ Additional guidelines: - Make sure optional dependencies are imported within a function. - Please do not add dependencies to pyproject.toml files (even optional ones) unless they are required for unit tests. - Most PRs should not touch more than one package. - Changes should be backwards compatible. - If you are adding something to community, do not re-import it in langchain. If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, ccurme, vbarda, hwchase17. --------- Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
parent
4f347cbcb9
commit
fb1d67edf6
@ -172,3 +172,14 @@ If you wish to use OAuth2 with the authorization_code flow, please use `BoxAuthT
|
||||
from langchain_box.document_loaders import BoxLoader
|
||||
|
||||
```
|
||||
|
||||
## Retrievers
|
||||
|
||||
### BoxRetriever
|
||||
|
||||
[See usage example](/docs/integrations/retrievers/box)
|
||||
|
||||
```python
|
||||
from langchain_box.retrievers import BoxRetriever
|
||||
|
||||
```
|
||||
|
323
docs/docs/integrations/retrievers/box.ipynb
Normal file
323
docs/docs/integrations/retrievers/box.ipynb
Normal file
File diff suppressed because one or more lines are too long
@ -1,7 +1,8 @@
|
||||
from importlib import metadata
|
||||
|
||||
from langchain_box.document_loaders import BoxLoader
|
||||
from langchain_box.utilities import BoxAPIWrapper, BoxAuth, BoxAuthType
|
||||
from langchain_box.retrievers import BoxRetriever
|
||||
from langchain_box.utilities import BoxAuth, BoxAuthType, _BoxAPIWrapper
|
||||
|
||||
try:
|
||||
__version__ = metadata.version(__package__)
|
||||
@ -12,8 +13,9 @@ del metadata # optional, avoids polluting the results of dir(__package__)
|
||||
|
||||
__all__ = [
|
||||
"BoxLoader",
|
||||
"BoxRetriever",
|
||||
"BoxAuth",
|
||||
"BoxAuthType",
|
||||
"BoxAPIWrapper",
|
||||
"_BoxAPIWrapper",
|
||||
"__version__",
|
||||
]
|
||||
|
@ -3,14 +3,14 @@ from typing import Any, Dict, Iterator, List, Optional
|
||||
from box_sdk_gen import FileBaseTypeField # type: ignore
|
||||
from langchain_core.document_loaders.base import BaseLoader
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.pydantic_v1 import BaseModel, ConfigDict, root_validator
|
||||
from langchain_core.pydantic_v1 import BaseModel, root_validator
|
||||
from langchain_core.utils import get_from_dict_or_env
|
||||
|
||||
from langchain_box.utilities import BoxAPIWrapper, BoxAuth
|
||||
from langchain_box.utilities import BoxAuth, _BoxAPIWrapper
|
||||
|
||||
|
||||
class BoxLoader(BaseLoader, BaseModel):
|
||||
"""
|
||||
BoxLoader
|
||||
"""BoxLoader.
|
||||
|
||||
This class will help you load files from your Box instance. You must have a
|
||||
Box account. If you need one, you can sign up for a free developer account.
|
||||
@ -33,18 +33,18 @@ class BoxLoader(BaseLoader, BaseModel):
|
||||
pip install -U langchain-box
|
||||
export BOX_DEVELOPER_TOKEN="your-api-key"
|
||||
|
||||
|
||||
This loader returns ``Document `` objects built from text representations of files
|
||||
in Box. It will skip any document without a text representation available. You can
|
||||
provide either a ``List[str]`` containing Box file IDS, or you can provide a
|
||||
``str`` contining a Box folder ID. If providing a folder ID, you can also enable
|
||||
recursive mode to get the full tree under that folder.
|
||||
|
||||
:::info
|
||||
.. note::
|
||||
A Box instance can contain Petabytes of files, and folders can contain millions
|
||||
of files. Be intentional when choosing what folders you choose to index. And we
|
||||
recommend never getting all files from folder 0 recursively. Folder ID 0 is your
|
||||
root folder.
|
||||
:::
|
||||
|
||||
Instantiate:
|
||||
|
||||
@ -121,32 +121,36 @@ class BoxLoader(BaseLoader, BaseModel):
|
||||
Terrarium: $120\nTotal: $920')
|
||||
"""
|
||||
|
||||
model_config = ConfigDict(use_enum_values=True)
|
||||
|
||||
"""String containing the Box Developer Token generated in the developer console"""
|
||||
box_developer_token: Optional[str] = None
|
||||
"""Configured langchain_box.utilities.BoxAuth object"""
|
||||
"""String containing the Box Developer Token generated in the developer console"""
|
||||
|
||||
box_auth: Optional[BoxAuth] = None
|
||||
"""List[str] containing Box file ids"""
|
||||
"""Configured langchain_box.utilities.BoxAuth object"""
|
||||
|
||||
box_file_ids: Optional[List[str]] = None
|
||||
"""String containing box folder id to load files from"""
|
||||
"""List[str] containing Box file ids"""
|
||||
|
||||
box_folder_id: Optional[str] = None
|
||||
"""String containing box folder id to load files from"""
|
||||
|
||||
recursive: Optional[bool] = False
|
||||
"""If getting files by folder id, recursive is a bool to determine if you wish
|
||||
to traverse subfolders to return child documents. Default is False"""
|
||||
recursive: Optional[bool] = False
|
||||
|
||||
character_limit: Optional[int] = -1
|
||||
"""character_limit is an int that caps the number of characters to
|
||||
return per document."""
|
||||
character_limit: Optional[int] = -1
|
||||
|
||||
box: Optional[BoxAPIWrapper]
|
||||
_box: Optional[_BoxAPIWrapper]
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
extra = "allow"
|
||||
use_enum_values = True
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
def validate_box_loader_inputs(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
||||
box = None
|
||||
_box = None
|
||||
|
||||
"""Validate that has either box_file_ids or box_folder_id."""
|
||||
if not values.get("box_file_ids") and not values.get("box_folder_id"):
|
||||
@ -159,19 +163,30 @@ class BoxLoader(BaseLoader, BaseModel):
|
||||
)
|
||||
|
||||
"""Validate that we have either a box_developer_token or box_auth."""
|
||||
if not values.get("box_auth") and not values.get("box_developer_token"):
|
||||
raise ValueError(
|
||||
"you must provide box_developer_token or a box_auth "
|
||||
"generated with langchain_box.utilities.BoxAuth"
|
||||
if not values.get("box_auth"):
|
||||
if not get_from_dict_or_env(
|
||||
values, "box_developer_token", "BOX_DEVELOPER_TOKEN"
|
||||
):
|
||||
raise ValueError(
|
||||
"you must provide box_developer_token or a box_auth "
|
||||
"generated with langchain_box.utilities.BoxAuth"
|
||||
)
|
||||
else:
|
||||
token = get_from_dict_or_env(
|
||||
values, "box_developer_token", "BOX_DEVELOPER_TOKEN"
|
||||
)
|
||||
|
||||
_box = _BoxAPIWrapper( # type: ignore[call-arg]
|
||||
box_developer_token=token,
|
||||
character_limit=values.get("character_limit"),
|
||||
)
|
||||
else:
|
||||
_box = _BoxAPIWrapper( # type: ignore[call-arg]
|
||||
box_auth=values.get("box_auth"),
|
||||
character_limit=values.get("character_limit"),
|
||||
)
|
||||
|
||||
box = BoxAPIWrapper( # type: ignore[call-arg]
|
||||
box_developer_token=values.get("box_developer_token"),
|
||||
box_auth=values.get("box_auth"),
|
||||
character_limit=values.get("character_limit"),
|
||||
)
|
||||
|
||||
values["box"] = box
|
||||
values["_box"] = _box
|
||||
|
||||
return values
|
||||
|
||||
@ -181,7 +196,7 @@ class BoxLoader(BaseLoader, BaseModel):
|
||||
for file in folder_content:
|
||||
try:
|
||||
if file.type == FileBaseTypeField.FILE:
|
||||
doc = self.box.get_document_by_file_id(file.id)
|
||||
doc = self._box.get_document_by_file_id(file.id)
|
||||
|
||||
if doc is not None:
|
||||
yield doc
|
||||
@ -199,7 +214,7 @@ class BoxLoader(BaseLoader, BaseModel):
|
||||
if self.box_file_ids:
|
||||
for file_id in self.box_file_ids:
|
||||
try:
|
||||
file = self.box.get_document_by_file_id(file_id) # type: ignore[union-attr]
|
||||
file = self._box.get_document_by_file_id(file_id) # type: ignore[union-attr]
|
||||
|
||||
if file is not None:
|
||||
yield file
|
||||
|
5
libs/partners/box/langchain_box/retrievers/__init__.py
Normal file
5
libs/partners/box/langchain_box/retrievers/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
"""Box Document Loaders."""
|
||||
|
||||
from langchain_box.retrievers.box import BoxRetriever
|
||||
|
||||
__all__ = ["BoxRetriever"]
|
158
libs/partners/box/langchain_box/retrievers/box.py
Normal file
158
libs/partners/box/langchain_box/retrievers/box.py
Normal file
@ -0,0 +1,158 @@
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from langchain_core.callbacks import CallbackManagerForRetrieverRun
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.pydantic_v1 import root_validator
|
||||
from langchain_core.retrievers import BaseRetriever
|
||||
|
||||
from langchain_box.utilities import BoxAuth, _BoxAPIWrapper
|
||||
|
||||
|
||||
class BoxRetriever(BaseRetriever):
|
||||
"""Box retriever.
|
||||
|
||||
`BoxRetriever` provides the ability to retrieve content from
|
||||
your Box instance in a couple of ways.
|
||||
|
||||
1. You can use the Box full-text search to retrieve the
|
||||
complete document(s) that match your search query, as
|
||||
`List[Document]`
|
||||
2. You can use the Box AI Platform API to retrieve the results
|
||||
from a Box AI prompt. This can be a `Document` containing
|
||||
the result of the prompt, or you can retrieve the citations
|
||||
used to generate the prompt to include in your vectorstore.
|
||||
|
||||
Setup:
|
||||
Install ``langchain-box``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
pip install -U langchain-box
|
||||
|
||||
Instantiate:
|
||||
|
||||
To use search:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_box.retrievers import BoxRetriever
|
||||
|
||||
retriever = BoxRetriever()
|
||||
|
||||
To use Box AI:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_box.retrievers import BoxRetriever
|
||||
|
||||
file_ids=["12345","67890"]
|
||||
|
||||
retriever = BoxRetriever(file_ids)
|
||||
|
||||
|
||||
Usage:
|
||||
.. code-block:: python
|
||||
|
||||
retriever = BoxRetriever()
|
||||
retriever.invoke("victor")
|
||||
print(docs[0].page_content[:100])
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
[
|
||||
Document(
|
||||
metadata={
|
||||
'source': 'url',
|
||||
'title': 'FIVE_FEET_AND_RISING_by_Peter_Sollett_pdf'
|
||||
},
|
||||
page_content='\\n3/20/23, 5:31 PM F...'
|
||||
)
|
||||
]
|
||||
|
||||
Use within a chain:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_core.output_parsers import StrOutputParser
|
||||
from langchain_core.prompts import ChatPromptTemplate
|
||||
from langchain_core.runnables import RunnablePassthrough
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
retriever = BoxRetriever(box_developer_token=box_developer_token, character_limit=10000)
|
||||
|
||||
context="You are an actor reading scripts to learn about your role in an upcoming movie."
|
||||
question="describe the character Victor"
|
||||
|
||||
prompt = ChatPromptTemplate.from_template(
|
||||
\"""Answer the question based only on the context provided.
|
||||
|
||||
Context: {context}
|
||||
|
||||
Question: {question}\"""
|
||||
)
|
||||
|
||||
def format_docs(docs):
|
||||
return "\\n\\n".join(doc.page_content for doc in docs)
|
||||
|
||||
chain = (
|
||||
{"context": retriever | format_docs, "question": RunnablePassthrough()}
|
||||
| prompt
|
||||
| llm
|
||||
| StrOutputParser()
|
||||
)
|
||||
|
||||
chain.invoke("Victor") # search query to find files in Box
|
||||
)
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
'Victor is a skinny 12-year-old with sloppy hair who is seen
|
||||
sleeping on his fire escape in the sun. He is hesitant to go to
|
||||
the pool with his friend Carlos because he is afraid of getting
|
||||
in trouble for not letting his mother cut his hair. Ultimately,
|
||||
he decides to go to the pool with Carlos.'
|
||||
""" # noqa: E501
|
||||
|
||||
box_developer_token: Optional[str] = None
|
||||
"""String containing the Box Developer Token generated in the developer console"""
|
||||
|
||||
box_auth: Optional[BoxAuth] = None
|
||||
"""Configured langchain_box.utilities.BoxAuth object"""
|
||||
|
||||
box_file_ids: Optional[List[str]] = None
|
||||
"""List[str] containing Box file ids"""
|
||||
character_limit: Optional[int] = -1
|
||||
"""character_limit is an int that caps the number of characters to
|
||||
return per document."""
|
||||
|
||||
_box: Optional[_BoxAPIWrapper]
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
extra = "allow"
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
def validate_box_loader_inputs(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
||||
_box = None
|
||||
|
||||
"""Validate that we have either a box_developer_token or box_auth."""
|
||||
if not values.get("box_auth") and not values.get("box_developer_token"):
|
||||
raise ValueError(
|
||||
"you must provide box_developer_token or a box_auth "
|
||||
"generated with langchain_box.utilities.BoxAuth"
|
||||
)
|
||||
|
||||
_box = _BoxAPIWrapper( # type: ignore[call-arg]
|
||||
box_developer_token=values.get("box_developer_token"),
|
||||
box_auth=values.get("box_auth"),
|
||||
character_limit=values.get("character_limit"),
|
||||
)
|
||||
|
||||
values["_box"] = _box
|
||||
|
||||
return values
|
||||
|
||||
def _get_relevant_documents(
|
||||
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
||||
) -> List[Document]:
|
||||
if self.box_file_ids: # If using Box AI
|
||||
return self._box.ask_box_ai(query=query, box_file_ids=self.box_file_ids) # type: ignore[union-attr]
|
||||
else: # If using Search
|
||||
return self._box.search_box(query=query) # type: ignore[union-attr]
|
@ -1,5 +1,5 @@
|
||||
"""Box API Utilities."""
|
||||
|
||||
from langchain_box.utilities.box import BoxAPIWrapper, BoxAuth, BoxAuthType
|
||||
from langchain_box.utilities.box import BoxAuth, BoxAuthType, _BoxAPIWrapper
|
||||
|
||||
__all__ = ["BoxAuth", "BoxAuthType", "BoxAPIWrapper"]
|
||||
__all__ = ["BoxAuth", "BoxAuthType", "_BoxAPIWrapper"]
|
||||
|
@ -1,7 +1,7 @@
|
||||
"""Util that calls Box APIs."""
|
||||
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import box_sdk_gen # type: ignore
|
||||
import requests
|
||||
@ -11,6 +11,13 @@ from langchain_core.utils import get_from_dict_or_env
|
||||
|
||||
|
||||
class DocumentFiles(Enum):
|
||||
"""DocumentFiles(Enum).
|
||||
|
||||
An enum containing all of the supported extensions for files
|
||||
Box considers Documents. These files should have text
|
||||
representations.
|
||||
"""
|
||||
|
||||
DOC = "doc"
|
||||
DOCX = "docx"
|
||||
GDOC = "gdoc"
|
||||
@ -89,6 +96,12 @@ class DocumentFiles(Enum):
|
||||
|
||||
|
||||
class ImageFiles(Enum):
|
||||
"""ImageFiles(Enum).
|
||||
|
||||
An enum containing all of the supported extensions for files
|
||||
Box considers images.
|
||||
"""
|
||||
|
||||
ARW = "arw"
|
||||
BMP = "bmp"
|
||||
CR2 = "cr2"
|
||||
@ -115,8 +128,9 @@ class ImageFiles(Enum):
|
||||
SVS = "svs"
|
||||
|
||||
|
||||
"""
|
||||
BoxAuthType
|
||||
class BoxAuthType(Enum):
|
||||
"""BoxAuthType(Enum).
|
||||
|
||||
an enum to tell BoxLoader how you wish to autheticate your Box connection.
|
||||
|
||||
Options are:
|
||||
@ -128,22 +142,23 @@ class ImageFiles(Enum):
|
||||
and `box_enterprise_id` or optionally `box_user_id`.
|
||||
JWT - Use JWT for authentication. Config should be stored on the file
|
||||
system accessible to your app.
|
||||
provide `box_jwt_path`. Optionally, provide `box_user_id` to
|
||||
provide `box_jwt_path`. Optionally, provide `box_user_id` to
|
||||
act as a specific user
|
||||
"""
|
||||
|
||||
|
||||
class BoxAuthType(Enum):
|
||||
"""Use a developer token or a token retrieved from box-sdk-gen"""
|
||||
"""
|
||||
|
||||
TOKEN = "token"
|
||||
"""Use `client_credentials` type grant"""
|
||||
"""Use a developer token or a token retrieved from box-sdk-gen"""
|
||||
|
||||
CCG = "ccg"
|
||||
"""Use JWT bearer token auth"""
|
||||
"""Use `client_credentials` type grant"""
|
||||
|
||||
JWT = "jwt"
|
||||
"""Use JWT bearer token auth"""
|
||||
|
||||
|
||||
"""
|
||||
class BoxAuth(BaseModel):
|
||||
"""BoxAuth.
|
||||
|
||||
`BoxAuth` supports the following authentication methods:
|
||||
|
||||
* Token — either a developer token or any token generated through the Box SDK
|
||||
@ -152,16 +167,15 @@ class BoxAuthType(Enum):
|
||||
* CCG with a service account
|
||||
* CCG with a specified user
|
||||
|
||||
:::note
|
||||
If using JWT authentication, you will need to download the configuration from the
|
||||
Box developer console after generating your public/private key pair. Place this
|
||||
file in your application directory structure somewhere. You will use the path to
|
||||
.. note::
|
||||
If using JWT authentication, you will need to download the configuration from the
|
||||
Box developer console after generating your public/private key pair. Place this
|
||||
file in your application directory structure somewhere. You will use the path to
|
||||
this file when using the `BoxAuth` helper class.
|
||||
:::
|
||||
|
||||
For more information, learn about how to
|
||||
For more information, learn about how to
|
||||
[set up a Box application](https://developer.box.com/guides/getting-started/first-application/),
|
||||
and check out the
|
||||
and check out the
|
||||
[Box authentication guide](https://developer.box.com/guides/authentication/select/)
|
||||
for more about our different authentication options.
|
||||
|
||||
@ -169,7 +183,7 @@ class BoxAuthType(Enum):
|
||||
|
||||
To instantiate, you must provide a ``langchain_box.utilities.BoxAuthType``.
|
||||
|
||||
BoxAuthType is an enum to tell BoxLoader how you wish to autheticate your
|
||||
BoxAuthType is an enum to tell BoxLoader how you wish to autheticate your
|
||||
Box connection.
|
||||
|
||||
Options are:
|
||||
@ -181,7 +195,7 @@ class BoxAuthType(Enum):
|
||||
and `box_enterprise_id` or optionally `box_user_id`.
|
||||
JWT - Use JWT for authentication. Config should be stored on the file
|
||||
system accessible to your app.
|
||||
provide `box_jwt_path`. Optionally, provide `box_user_id` to
|
||||
provide `box_jwt_path`. Optionally, provide `box_user_id` to
|
||||
act as a specific user
|
||||
|
||||
.. code-block:: python
|
||||
@ -198,36 +212,40 @@ class BoxAuthType(Enum):
|
||||
...
|
||||
)
|
||||
|
||||
To see examples for each supported authentication methodology, visit the
|
||||
[Box providers](/docs/integrations/providers/box) page. If you want to
|
||||
use OAuth 2.0 `authorization_code` flow, use
|
||||
[box-sdk-gen](https://github.com/box/box-python-sdk-gen) SDK, get your
|
||||
To see examples for each supported authentication methodology, visit the
|
||||
[Box providers](/docs/integrations/providers/box) page. If you want to
|
||||
use OAuth 2.0 `authorization_code` flow, use
|
||||
[box-sdk-gen](https://github.com/box/box-python-sdk-gen) SDK, get your
|
||||
token, and use `BoxAuthType.TOKEN` type.
|
||||
"""
|
||||
|
||||
|
||||
class BoxAuth(BaseModel):
|
||||
"""Authentication type to use. Must pass BoxAuthType enum"""
|
||||
"""
|
||||
|
||||
auth_type: BoxAuthType
|
||||
""" If using BoxAuthType.TOKEN, provide your token here"""
|
||||
"""langchain_box.utilities.BoxAuthType. Enum describing how to
|
||||
authenticate against Box"""
|
||||
|
||||
box_developer_token: Optional[str] = None
|
||||
""" If using BoxAuthType.TOKEN, provide your token here"""
|
||||
|
||||
box_jwt_path: Optional[str] = None
|
||||
"""If using BoxAuthType.JWT, provide local path to your
|
||||
JWT configuration file"""
|
||||
box_jwt_path: Optional[str] = None
|
||||
"""If using BoxAuthType.CCG, provide your app's client ID"""
|
||||
|
||||
box_client_id: Optional[str] = None
|
||||
"""If using BoxAuthType.CCG, provide your app's client secret"""
|
||||
"""If using BoxAuthType.CCG, provide your app's client ID"""
|
||||
|
||||
box_client_secret: Optional[str] = None
|
||||
"""If using BoxAuthType.CCG, provide your app's client secret"""
|
||||
|
||||
box_enterprise_id: Optional[str] = None
|
||||
"""If using BoxAuthType.CCG, provide your enterprise ID.
|
||||
Only required if you are not sending `box_user_id`"""
|
||||
box_enterprise_id: Optional[str] = None
|
||||
|
||||
box_user_id: Optional[str] = None
|
||||
"""If using BoxAuthType.CCG or BoxAuthType.JWT, providing
|
||||
`box_user_id` will act on behalf of a specific user"""
|
||||
box_user_id: Optional[str] = None
|
||||
|
||||
box_client: Optional[box_sdk_gen.BoxClient] = None
|
||||
custom_header: Dict = dict({"x-box-ai-library": "langchain"})
|
||||
_box_client: Optional[box_sdk_gen.BoxClient] = None
|
||||
_custom_header: Dict = dict({"x-box-ai-library": "langchain"})
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
@ -276,16 +294,16 @@ class BoxAuth(BaseModel):
|
||||
|
||||
return values
|
||||
|
||||
def authorize(self) -> None:
|
||||
def _authorize(self) -> None:
|
||||
match self.auth_type:
|
||||
case "token":
|
||||
try:
|
||||
auth = box_sdk_gen.BoxDeveloperTokenAuth(
|
||||
token=self.box_developer_token
|
||||
)
|
||||
self.box_client = box_sdk_gen.BoxClient(
|
||||
self._box_client = box_sdk_gen.BoxClient(
|
||||
auth=auth
|
||||
).with_extra_headers(extra_headers=self.custom_header)
|
||||
).with_extra_headers(extra_headers=self._custom_header)
|
||||
|
||||
except box_sdk_gen.BoxSDKError as bse:
|
||||
raise RuntimeError(
|
||||
@ -304,15 +322,15 @@ class BoxAuth(BaseModel):
|
||||
)
|
||||
auth = box_sdk_gen.BoxJWTAuth(config=jwt_config)
|
||||
|
||||
self.box_client = box_sdk_gen.BoxClient(
|
||||
self._box_client = box_sdk_gen.BoxClient(
|
||||
auth=auth
|
||||
).with_extra_headers(extra_headers=self.custom_header)
|
||||
).with_extra_headers(extra_headers=self._custom_header)
|
||||
|
||||
if self.box_user_id is not None:
|
||||
user_auth = auth.with_user_subject(self.box_user_id)
|
||||
self.box_client = box_sdk_gen.BoxClient(
|
||||
self._box_client = box_sdk_gen.BoxClient(
|
||||
auth=user_auth
|
||||
).with_extra_headers(extra_headers=self.custom_header)
|
||||
).with_extra_headers(extra_headers=self._custom_header)
|
||||
|
||||
except box_sdk_gen.BoxSDKError as bse:
|
||||
raise RuntimeError(
|
||||
@ -340,9 +358,9 @@ class BoxAuth(BaseModel):
|
||||
)
|
||||
auth = box_sdk_gen.BoxCCGAuth(config=ccg_config)
|
||||
|
||||
self.box_client = box_sdk_gen.BoxClient(
|
||||
self._box_client = box_sdk_gen.BoxClient(
|
||||
auth=auth
|
||||
).with_extra_headers(extra_headers=self.custom_header)
|
||||
).with_extra_headers(extra_headers=self._custom_header)
|
||||
|
||||
except box_sdk_gen.BoxSDKError as bse:
|
||||
raise RuntimeError(
|
||||
@ -363,25 +381,26 @@ class BoxAuth(BaseModel):
|
||||
|
||||
def get_client(self) -> box_sdk_gen.BoxClient:
|
||||
"""Instantiate the Box SDK."""
|
||||
if self.box_client is None:
|
||||
self.authorize()
|
||||
if self._box_client is None:
|
||||
self._authorize()
|
||||
|
||||
return self.box_client
|
||||
return self._box_client
|
||||
|
||||
|
||||
class BoxAPIWrapper(BaseModel):
|
||||
class _BoxAPIWrapper(BaseModel):
|
||||
"""Wrapper for Box API."""
|
||||
|
||||
"""String containing the Box Developer Token generated in the developer console"""
|
||||
box_developer_token: Optional[str] = None
|
||||
"""Configured langchain_box.utilities.BoxAuth object"""
|
||||
"""String containing the Box Developer Token generated in the developer console"""
|
||||
|
||||
box_auth: Optional[BoxAuth] = None
|
||||
"""Configured langchain_box.utilities.BoxAuth object"""
|
||||
|
||||
character_limit: Optional[int] = -1
|
||||
"""character_limit is an int that caps the number of characters to
|
||||
return per document."""
|
||||
character_limit: Optional[int] = -1
|
||||
|
||||
box: Optional[box_sdk_gen.BoxClient]
|
||||
file_count: int = 0
|
||||
_box: Optional[box_sdk_gen.BoxClient]
|
||||
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
@ -390,7 +409,7 @@ class BoxAPIWrapper(BaseModel):
|
||||
|
||||
@root_validator(allow_reuse=True)
|
||||
def validate_box_api_inputs(cls, values: Dict[str, Any]) -> Dict[str, Any]:
|
||||
values["box"] = None
|
||||
values["_box"] = None
|
||||
|
||||
"""Validate that TOKEN auth type provides box_developer_token."""
|
||||
if not values.get("box_auth"):
|
||||
@ -402,7 +421,7 @@ class BoxAPIWrapper(BaseModel):
|
||||
)
|
||||
else:
|
||||
box_auth = values.get("box_auth")
|
||||
values["box"] = box_auth.get_client() # type: ignore[union-attr]
|
||||
values["_box"] = box_auth.get_client() # type: ignore[union-attr]
|
||||
|
||||
return values
|
||||
|
||||
@ -411,11 +430,11 @@ class BoxAPIWrapper(BaseModel):
|
||||
auth_type=BoxAuthType.TOKEN, box_developer_token=self.box_developer_token
|
||||
)
|
||||
|
||||
self.box = box_auth.get_client()
|
||||
self._box = box_auth.get_client()
|
||||
|
||||
def _do_request(self, url: str) -> Any:
|
||||
try:
|
||||
access_token = self.box.auth.retrieve_token().access_token # type: ignore[union-attr]
|
||||
access_token = self._box.auth.retrieve_token().access_token # type: ignore[union-attr]
|
||||
except box_sdk_gen.BoxSDKError as bse:
|
||||
raise RuntimeError(f"Error getting client from jwt token: {bse.message}")
|
||||
|
||||
@ -423,38 +442,17 @@ class BoxAPIWrapper(BaseModel):
|
||||
resp.raise_for_status()
|
||||
return resp.content
|
||||
|
||||
def get_folder_items(self, folder_id: str) -> box_sdk_gen.Items:
|
||||
"""Get all the items in a folder. Accepts folder_id as str.
|
||||
returns box_sdk_gen.Items"""
|
||||
if self.box is None:
|
||||
self.get_box_client()
|
||||
|
||||
try:
|
||||
folder_contents = self.box.folders.get_folder_items( # type: ignore[union-attr]
|
||||
folder_id, fields=["id", "type", "name"]
|
||||
)
|
||||
except box_sdk_gen.BoxAPIError as bae:
|
||||
raise RuntimeError(
|
||||
f"BoxAPIError: Error getting folder content: {bae.message}"
|
||||
)
|
||||
except box_sdk_gen.BoxSDKError as bse:
|
||||
raise RuntimeError(
|
||||
f"BoxSDKError: Error getting folder content: {bse.message}"
|
||||
)
|
||||
|
||||
return folder_contents.entries
|
||||
|
||||
def get_text_representation(self, file_id: str = "") -> tuple[str, str, str]:
|
||||
def _get_text_representation(self, file_id: str = "") -> tuple[str, str, str]:
|
||||
try:
|
||||
from box_sdk_gen import BoxAPIError, BoxSDKError
|
||||
except ImportError:
|
||||
raise ImportError("You must run `pip install box-sdk-gen`")
|
||||
|
||||
if self.box is None:
|
||||
if self._box is None:
|
||||
self.get_box_client()
|
||||
|
||||
try:
|
||||
file = self.box.files.get_file_by_id( # type: ignore[union-attr]
|
||||
file = self._box.files.get_file_by_id( # type: ignore[union-attr]
|
||||
file_id,
|
||||
x_rep_hints="[extracted_text]",
|
||||
fields=["name", "representations", "type"],
|
||||
@ -486,8 +484,10 @@ class BoxAPIWrapper(BaseModel):
|
||||
except requests.exceptions.HTTPError:
|
||||
return None, None, None # type: ignore[return-value]
|
||||
|
||||
if self.character_limit > 0: # type: ignore[operator]
|
||||
content = raw_content[0 : self.character_limit]
|
||||
if (
|
||||
self.character_limit is not None and self.character_limit > 0 # type: ignore[operator]
|
||||
):
|
||||
content = raw_content[0 : (self.character_limit - 1)]
|
||||
else:
|
||||
content = raw_content
|
||||
|
||||
@ -499,16 +499,16 @@ class BoxAPIWrapper(BaseModel):
|
||||
"""Load a file from a Box id. Accepts file_id as str.
|
||||
Returns `Document`"""
|
||||
|
||||
if self.box is None:
|
||||
if self._box is None:
|
||||
self.get_box_client()
|
||||
|
||||
file = self.box.files.get_file_by_id( # type: ignore[union-attr]
|
||||
file = self._box.files.get_file_by_id( # type: ignore[union-attr]
|
||||
file_id, fields=["name", "type", "extension"]
|
||||
)
|
||||
|
||||
if file.type == "file":
|
||||
if hasattr(DocumentFiles, file.extension.upper()):
|
||||
file_name, content, url = self.get_text_representation(file_id=file_id)
|
||||
file_name, content, url = self._get_text_representation(file_id=file_id)
|
||||
|
||||
if file_name is None or content is None or url is None:
|
||||
return None
|
||||
@ -523,3 +523,95 @@ class BoxAPIWrapper(BaseModel):
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
def get_folder_items(self, folder_id: str) -> box_sdk_gen.Items:
|
||||
"""Get all the items in a folder. Accepts folder_id as str.
|
||||
returns box_sdk_gen.Items"""
|
||||
if self._box is None:
|
||||
self.get_box_client()
|
||||
|
||||
try:
|
||||
folder_contents = self._box.folders.get_folder_items( # type: ignore[union-attr]
|
||||
folder_id, fields=["id", "type", "name"]
|
||||
)
|
||||
except box_sdk_gen.BoxAPIError as bae:
|
||||
raise RuntimeError(
|
||||
f"BoxAPIError: Error getting folder content: {bae.message}"
|
||||
)
|
||||
except box_sdk_gen.BoxSDKError as bse:
|
||||
raise RuntimeError(
|
||||
f"BoxSDKError: Error getting folder content: {bse.message}"
|
||||
)
|
||||
|
||||
return folder_contents.entries
|
||||
|
||||
def search_box(self, query: str) -> List[Document]:
|
||||
if self._box is None:
|
||||
self.get_box_client()
|
||||
|
||||
files = []
|
||||
|
||||
try:
|
||||
results = self._box.search.search_for_content( # type: ignore[union-attr]
|
||||
query=query, fields=["id", "type", "extension"]
|
||||
)
|
||||
|
||||
if results.entries is None or len(results.entries) <= 0:
|
||||
return None # type: ignore[return-value]
|
||||
|
||||
for file in results.entries:
|
||||
if (
|
||||
file is not None
|
||||
and file.type == "file"
|
||||
and hasattr(DocumentFiles, file.extension.upper())
|
||||
):
|
||||
doc = self.get_document_by_file_id(file.id)
|
||||
|
||||
if doc is not None:
|
||||
files.append(doc)
|
||||
|
||||
return files
|
||||
except box_sdk_gen.BoxAPIError as bae:
|
||||
raise RuntimeError(
|
||||
f"BoxAPIError: Error getting search results: {bae.message}"
|
||||
)
|
||||
except box_sdk_gen.BoxSDKError as bse:
|
||||
raise RuntimeError(
|
||||
f"BoxSDKError: Error getting search results: {bse.message}"
|
||||
)
|
||||
|
||||
def ask_box_ai(self, query: str, box_file_ids: List[str]) -> List[Document]:
|
||||
if self._box is None:
|
||||
self.get_box_client()
|
||||
|
||||
ai_mode = box_sdk_gen.CreateAiAskMode.SINGLE_ITEM_QA.value
|
||||
|
||||
if len(box_file_ids) > 1:
|
||||
ai_mode = box_sdk_gen.CreateAiAskMode.MULTIPLE_ITEM_QA.value
|
||||
elif len(box_file_ids) <= 0:
|
||||
raise ValueError("BOX_AI_ASK requires at least one file ID")
|
||||
|
||||
items = []
|
||||
|
||||
for file_id in box_file_ids:
|
||||
item = box_sdk_gen.CreateAiAskItems(
|
||||
id=file_id, type=box_sdk_gen.CreateAiAskItemsTypeField.FILE.value
|
||||
)
|
||||
items.append(item)
|
||||
|
||||
try:
|
||||
response = self._box.ai.create_ai_ask(ai_mode, query, items) # type: ignore[union-attr]
|
||||
except box_sdk_gen.BoxAPIError as bae:
|
||||
raise RuntimeError(
|
||||
f"BoxAPIError: Error getting Box AI result: {bae.message}"
|
||||
)
|
||||
except box_sdk_gen.BoxSDKError as bse:
|
||||
raise RuntimeError(
|
||||
f"BoxSDKError: Error getting Box AI result: {bse.message}"
|
||||
)
|
||||
|
||||
content = response.answer
|
||||
|
||||
metadata = {"source": "Box AI", "title": f"Box AI {query}"}
|
||||
|
||||
return [Document(page_content=content, metadata=metadata)]
|
||||
|
@ -1,42 +1,3 @@
|
||||
from langchain_core.documents import Document
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from langchain_box.document_loaders import BoxLoader
|
||||
|
||||
|
||||
# test Document retrieval
|
||||
def test_file_load(mocker: MockerFixture) -> None:
|
||||
mocker.patch(
|
||||
"langchain_box.utilities.BoxAPIWrapper.get_document_by_file_id", return_value=[]
|
||||
)
|
||||
|
||||
loader = BoxLoader( # type: ignore[call-arg]
|
||||
box_developer_token="box_developer_token",
|
||||
box_file_ids=["box_file_ids"],
|
||||
)
|
||||
|
||||
documents = loader.load()
|
||||
assert documents
|
||||
|
||||
mocker.patch(
|
||||
"langchain_box.utilities.BoxAPIWrapper.get_document_by_file_id",
|
||||
return_value=(
|
||||
Document(
|
||||
page_content="Test file mode\ndocument contents",
|
||||
metadata={"title": "Testing Files"},
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
loader = BoxLoader( # type: ignore[call-arg]
|
||||
box_developer_token="box_developer_token",
|
||||
box_file_ids=["box_file_ids"],
|
||||
)
|
||||
|
||||
documents = loader.load()
|
||||
assert documents == [
|
||||
Document(
|
||||
page_content="Test file mode\ndocument contents",
|
||||
metadata={"title": "Testing Files"},
|
||||
)
|
||||
]
|
||||
"""
|
||||
TODO: build live integration tests
|
||||
"""
|
||||
|
@ -0,0 +1,3 @@
|
||||
"""
|
||||
TODO: build live integration tests
|
||||
"""
|
@ -1,47 +1,3 @@
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from langchain_box.utilities import BoxAPIWrapper
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mock_worker(mocker: MockerFixture) -> None:
|
||||
mocker.patch("langchain_box.utilities.BoxAuth.authorize", return_value=Mock())
|
||||
mocker.patch("langchain_box.utilities.BoxAuth.get_client", return_value=Mock())
|
||||
mocker.patch(
|
||||
"langchain_box.utilities.BoxAPIWrapper.get_text_representation",
|
||||
return_value=("filename", "content", "url"),
|
||||
)
|
||||
|
||||
|
||||
def test_get_documents_by_file_ids(mock_worker, mocker: MockerFixture) -> None: # type: ignore[no-untyped-def]
|
||||
mocker.patch(
|
||||
"langchain_box.utilities.BoxAPIWrapper.get_document_by_file_id",
|
||||
return_value=(
|
||||
Document(
|
||||
page_content="content", metadata={"source": "url", "title": "filename"}
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
box = BoxAPIWrapper(box_developer_token="box_developer_token") # type: ignore[call-arg]
|
||||
|
||||
documents = box.get_document_by_file_id("box_file_id")
|
||||
assert documents == Document(
|
||||
page_content="content", metadata={"source": "url", "title": "filename"}
|
||||
)
|
||||
|
||||
|
||||
def test_get_documents_by_folder_id(mock_worker, mocker: MockerFixture) -> None: # type: ignore[no-untyped-def]
|
||||
mocker.patch(
|
||||
"langchain_box.utilities.BoxAPIWrapper.get_folder_items",
|
||||
return_value=([{"id": "file_id", "type": "file"}]),
|
||||
)
|
||||
|
||||
box = BoxAPIWrapper(box_developer_token="box_developer_token") # type: ignore[call-arg]
|
||||
|
||||
folder_contents = box.get_folder_items("box_folder_id")
|
||||
assert folder_contents == [{"id": "file_id", "type": "file"}]
|
||||
"""
|
||||
TODO: build live integration tests
|
||||
"""
|
||||
|
@ -1,4 +1,6 @@
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from langchain_box.document_loaders import BoxLoader
|
||||
from langchain_box.utilities import BoxAuth, BoxAuthType
|
||||
@ -56,3 +58,42 @@ def test_failed_initialization_files_and_folders() -> None:
|
||||
box_folder_id="box_folder_id",
|
||||
box_file_ids=["box_file_ids"],
|
||||
)
|
||||
|
||||
|
||||
# test Document retrieval
|
||||
def test_file_load(mocker: MockerFixture) -> None:
|
||||
mocker.patch(
|
||||
"langchain_box.utilities._BoxAPIWrapper.get_document_by_file_id",
|
||||
return_value=[],
|
||||
)
|
||||
|
||||
loader = BoxLoader( # type: ignore[call-arg]
|
||||
box_developer_token="box_developer_token",
|
||||
box_file_ids=["box_file_ids"],
|
||||
)
|
||||
|
||||
documents = loader.load()
|
||||
assert documents
|
||||
|
||||
mocker.patch(
|
||||
"langchain_box.utilities._BoxAPIWrapper.get_document_by_file_id",
|
||||
return_value=(
|
||||
Document(
|
||||
page_content="Test file mode\ndocument contents",
|
||||
metadata={"title": "Testing Files"},
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
loader = BoxLoader( # type: ignore[call-arg]
|
||||
box_developer_token="box_developer_token",
|
||||
box_file_ids=["box_file_ids"],
|
||||
)
|
||||
|
||||
documents = loader.load()
|
||||
assert documents == [
|
||||
Document(
|
||||
page_content="Test file mode\ndocument contents",
|
||||
metadata={"title": "Testing Files"},
|
||||
)
|
||||
]
|
||||
|
@ -0,0 +1,89 @@
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from langchain_box.retrievers import BoxRetriever
|
||||
from langchain_box.utilities import BoxAuth, BoxAuthType
|
||||
|
||||
|
||||
# Test auth types
|
||||
def test_direct_token_initialization() -> None:
|
||||
retriever = BoxRetriever( # type: ignore[call-arg]
|
||||
box_developer_token="box_developer_token",
|
||||
box_file_ids=["box_file_ids"],
|
||||
)
|
||||
|
||||
assert retriever.box_developer_token == "box_developer_token"
|
||||
assert retriever.box_file_ids == ["box_file_ids"]
|
||||
|
||||
|
||||
def test_failed_direct_token_initialization() -> None:
|
||||
with pytest.raises(ValueError):
|
||||
retriever = BoxRetriever(box_file_ids=["box_file_ids"]) # type: ignore[call-arg] # noqa: F841
|
||||
|
||||
|
||||
def test_auth_initialization() -> None:
|
||||
auth = BoxAuth(
|
||||
auth_type=BoxAuthType.TOKEN, box_developer_token="box_developer_token"
|
||||
)
|
||||
|
||||
retriever = BoxRetriever( # type: ignore[call-arg]
|
||||
box_auth=auth,
|
||||
box_file_ids=["box_file_ids"],
|
||||
)
|
||||
|
||||
assert retriever.box_file_ids == ["box_file_ids"]
|
||||
|
||||
|
||||
# test search retrieval
|
||||
def test_search(mocker: MockerFixture) -> None:
|
||||
mocker.patch(
|
||||
"langchain_box.utilities._BoxAPIWrapper.search_box",
|
||||
return_value=(
|
||||
[
|
||||
Document(
|
||||
page_content="Test file mode\ndocument contents",
|
||||
metadata={"title": "Testing Files"},
|
||||
)
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
retriever = BoxRetriever( # type: ignore[call-arg]
|
||||
box_developer_token="box_developer_token"
|
||||
)
|
||||
|
||||
documents = retriever.invoke("query")
|
||||
assert documents == [
|
||||
Document(
|
||||
page_content="Test file mode\ndocument contents",
|
||||
metadata={"title": "Testing Files"},
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
# test ai retrieval
|
||||
def test_ai(mocker: MockerFixture) -> None:
|
||||
mocker.patch(
|
||||
"langchain_box.utilities._BoxAPIWrapper.ask_box_ai",
|
||||
return_value=(
|
||||
[
|
||||
Document(
|
||||
page_content="Test file mode\ndocument contents",
|
||||
metadata={"title": "Testing Files"},
|
||||
)
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
retriever = BoxRetriever( # type: ignore[call-arg]
|
||||
box_developer_token="box_developer_token", box_file_ids=["box_file_ids"]
|
||||
)
|
||||
|
||||
documents = retriever.invoke("query")
|
||||
assert documents == [
|
||||
Document(
|
||||
page_content="Test file mode\ndocument contents",
|
||||
metadata={"title": "Testing Files"},
|
||||
)
|
||||
]
|
@ -2,9 +2,10 @@ from langchain_box import __all__
|
||||
|
||||
EXPECTED_ALL = [
|
||||
"BoxLoader",
|
||||
"BoxRetriever",
|
||||
"BoxAuth",
|
||||
"BoxAuthType",
|
||||
"BoxAPIWrapper",
|
||||
"_BoxAPIWrapper",
|
||||
"__version__",
|
||||
]
|
||||
|
||||
|
@ -1,7 +1,21 @@
|
||||
import pytest
|
||||
from pydantic.v1.error_wrappers import ValidationError
|
||||
from unittest.mock import Mock
|
||||
|
||||
from langchain_box.utilities import BoxAPIWrapper, BoxAuth, BoxAuthType
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
from pydantic.v1.error_wrappers import ValidationError
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
from langchain_box.utilities import BoxAuth, BoxAuthType, _BoxAPIWrapper
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mock_worker(mocker: MockerFixture) -> None:
|
||||
mocker.patch("langchain_box.utilities.BoxAuth._authorize", return_value=Mock())
|
||||
mocker.patch("langchain_box.utilities.BoxAuth.get_client", return_value=Mock())
|
||||
mocker.patch(
|
||||
"langchain_box.utilities._BoxAPIWrapper._get_text_representation",
|
||||
return_value=("filename", "content", "url"),
|
||||
)
|
||||
|
||||
|
||||
# Test auth types
|
||||
@ -79,7 +93,7 @@ def test_failed_ccg_initialization() -> None:
|
||||
|
||||
|
||||
def test_direct_token_initialization() -> None:
|
||||
box = BoxAPIWrapper( # type: ignore[call-arg]
|
||||
box = _BoxAPIWrapper( # type: ignore[call-arg]
|
||||
box_developer_token="box_developer_token"
|
||||
)
|
||||
|
||||
@ -91,11 +105,126 @@ def test_auth_initialization() -> None:
|
||||
auth_type=BoxAuthType.TOKEN, box_developer_token="box_developer_token"
|
||||
)
|
||||
|
||||
box = BoxAPIWrapper(box_auth=auth) # type: ignore[call-arg] # noqa: F841
|
||||
box = _BoxAPIWrapper(box_auth=auth) # type: ignore[call-arg] # noqa: F841
|
||||
|
||||
assert auth.box_developer_token == "box_developer_token"
|
||||
|
||||
|
||||
def test_failed_initialization_no_auth() -> None:
|
||||
with pytest.raises(ValidationError):
|
||||
box = BoxAPIWrapper() # type: ignore[call-arg] # noqa: F841
|
||||
box = _BoxAPIWrapper() # type: ignore[call-arg] # noqa: F841
|
||||
|
||||
|
||||
def test_get_documents_by_file_ids(mock_worker, mocker: MockerFixture) -> None: # type: ignore[no-untyped-def]
|
||||
mocker.patch(
|
||||
"langchain_box.utilities._BoxAPIWrapper.get_document_by_file_id",
|
||||
return_value=(
|
||||
Document(
|
||||
page_content="content", metadata={"source": "url", "title": "filename"}
|
||||
)
|
||||
),
|
||||
)
|
||||
|
||||
box = _BoxAPIWrapper(box_developer_token="box_developer_token") # type: ignore[call-arg]
|
||||
|
||||
documents = box.get_document_by_file_id("box_file_id")
|
||||
assert documents == Document(
|
||||
page_content="content", metadata={"source": "url", "title": "filename"}
|
||||
)
|
||||
|
||||
|
||||
def test_get_documents_by_folder_id(mock_worker, mocker: MockerFixture) -> None: # type: ignore[no-untyped-def]
|
||||
mocker.patch(
|
||||
"langchain_box.utilities._BoxAPIWrapper.get_folder_items",
|
||||
return_value=([{"id": "file_id", "type": "file"}]),
|
||||
)
|
||||
|
||||
box = _BoxAPIWrapper(box_developer_token="box_developer_token") # type: ignore[call-arg]
|
||||
|
||||
folder_contents = box.get_folder_items("box_folder_id")
|
||||
assert folder_contents == [{"id": "file_id", "type": "file"}]
|
||||
|
||||
|
||||
def test_box_search(mock_worker, mocker: MockerFixture) -> None: # type: ignore[no-untyped-def]
|
||||
mocker.patch(
|
||||
"langchain_box.utilities._BoxAPIWrapper.search_box",
|
||||
return_value=(
|
||||
[
|
||||
Document(
|
||||
page_content="Test file mode\ndocument contents",
|
||||
metadata={"title": "Testing Files"},
|
||||
)
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
box = _BoxAPIWrapper(box_developer_token="box_developer_token") # type: ignore[call-arg]
|
||||
|
||||
documents = box.search_box("query")
|
||||
assert documents == [
|
||||
Document(
|
||||
page_content="Test file mode\ndocument contents",
|
||||
metadata={"title": "Testing Files"},
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def test_ask_box_ai_single_file(mock_worker, mocker: MockerFixture) -> None: # type: ignore[no-untyped-def]
|
||||
mocker.patch(
|
||||
"langchain_box.utilities._BoxAPIWrapper.ask_box_ai",
|
||||
return_value=(
|
||||
[
|
||||
Document(
|
||||
page_content="Test file mode\ndocument contents",
|
||||
metadata={"title": "Testing Files"},
|
||||
)
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
box = _BoxAPIWrapper( # type: ignore[call-arg]
|
||||
box_developer_token="box_developer_token", box_file_ids=["box_file_ids"]
|
||||
)
|
||||
|
||||
documents = box.ask_box_ai("query") # type: ignore[call-arg]
|
||||
assert documents == [
|
||||
Document(
|
||||
page_content="Test file mode\ndocument contents",
|
||||
metadata={"title": "Testing Files"},
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def test_ask_box_ai_multiple_files(mock_worker, mocker: MockerFixture) -> None: # type: ignore[no-untyped-def]
|
||||
mocker.patch(
|
||||
"langchain_box.utilities._BoxAPIWrapper.ask_box_ai",
|
||||
return_value=(
|
||||
[
|
||||
Document(
|
||||
page_content="Test file 1 mode\ndocument contents",
|
||||
metadata={"title": "Test File 1"},
|
||||
),
|
||||
Document(
|
||||
page_content="Test file 2 mode\ndocument contents",
|
||||
metadata={"title": "Test File 2"},
|
||||
),
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
box = _BoxAPIWrapper( # type: ignore[call-arg]
|
||||
box_developer_token="box_developer_token",
|
||||
box_file_ids=["box_file_id 1", "box_file_id 2"],
|
||||
)
|
||||
|
||||
documents = box.ask_box_ai("query") # type: ignore[call-arg]
|
||||
assert documents == [
|
||||
Document(
|
||||
page_content="Test file 1 mode\ndocument contents",
|
||||
metadata={"title": "Test File 1"},
|
||||
),
|
||||
Document(
|
||||
page_content="Test file 2 mode\ndocument contents",
|
||||
metadata={"title": "Test File 2"},
|
||||
),
|
||||
]
|
||||
|
Loading…
Reference in New Issue
Block a user