mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-29 18:08:36 +00:00
box: migrate to repo (#27969)
This commit is contained in:
parent
1ad49957f5
commit
8a5b9bf2ad
@ -141,5 +141,5 @@ packages:
|
|||||||
repo: langchain-ai/langchain
|
repo: langchain-ai/langchain
|
||||||
path: libs/partners/ollama
|
path: libs/partners/ollama
|
||||||
- name: langchain-box
|
- name: langchain-box
|
||||||
repo: langchain-ai/langchain
|
repo: langchain-ai/langchain-box
|
||||||
path: libs/partners/box
|
path: libs/box
|
||||||
|
1
libs/partners/box/.gitignore
vendored
1
libs/partners/box/.gitignore
vendored
@ -1 +0,0 @@
|
|||||||
__pycache__
|
|
@ -1,21 +0,0 @@
|
|||||||
MIT License
|
|
||||||
|
|
||||||
Copyright (c) 2024 LangChain, Inc.
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
|
||||||
in the Software without restriction, including without limitation the rights
|
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
|
||||||
furnished to do so, subject to the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in all
|
|
||||||
copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
SOFTWARE.
|
|
@ -1,65 +0,0 @@
|
|||||||
.PHONY: all format lint test tests integration_tests docker_tests help extended_tests
|
|
||||||
|
|
||||||
# Default target executed when no arguments are given to make.
|
|
||||||
all: help
|
|
||||||
|
|
||||||
# Define a variable for the test file path.
|
|
||||||
TEST_FILE ?= tests/unit_tests/
|
|
||||||
integration_test integration_tests: TEST_FILE = tests/integration_tests/
|
|
||||||
|
|
||||||
|
|
||||||
# unit tests are run with the --disable-socket flag to prevent network calls
|
|
||||||
test tests:
|
|
||||||
poetry run pytest --disable-socket --allow-unix-socket $(TEST_FILE)
|
|
||||||
|
|
||||||
test_watch:
|
|
||||||
poetry run ptw --snapshot-update --now . -- -vv $(TEST_FILE)
|
|
||||||
|
|
||||||
# integration tests are run without the --disable-socket flag to allow network calls
|
|
||||||
integration_test integration_tests:
|
|
||||||
poetry run pytest $(TEST_FILE)
|
|
||||||
|
|
||||||
######################
|
|
||||||
# LINTING AND FORMATTING
|
|
||||||
######################
|
|
||||||
|
|
||||||
# Define a variable for Python and notebook files.
|
|
||||||
PYTHON_FILES=.
|
|
||||||
MYPY_CACHE=.mypy_cache
|
|
||||||
lint format: PYTHON_FILES=.
|
|
||||||
lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/partners/box --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
|
|
||||||
lint_package: PYTHON_FILES=langchain_box
|
|
||||||
lint_tests: PYTHON_FILES=tests
|
|
||||||
lint_tests: MYPY_CACHE=.mypy_cache_test
|
|
||||||
|
|
||||||
lint lint_diff lint_package lint_tests:
|
|
||||||
poetry run ruff .
|
|
||||||
poetry run ruff format $(PYTHON_FILES) --diff
|
|
||||||
poetry run ruff --select I $(PYTHON_FILES)
|
|
||||||
mkdir -p $(MYPY_CACHE); poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
|
|
||||||
|
|
||||||
format format_diff:
|
|
||||||
poetry run ruff format $(PYTHON_FILES)
|
|
||||||
poetry run ruff --select I --fix $(PYTHON_FILES)
|
|
||||||
|
|
||||||
spell_check:
|
|
||||||
poetry run codespell --toml pyproject.toml
|
|
||||||
|
|
||||||
spell_fix:
|
|
||||||
poetry run codespell --toml pyproject.toml -w
|
|
||||||
|
|
||||||
check_imports: $(shell find langchain_box -name '*.py')
|
|
||||||
poetry run python ./scripts/check_imports.py $^
|
|
||||||
|
|
||||||
######################
|
|
||||||
# HELP
|
|
||||||
######################
|
|
||||||
|
|
||||||
help:
|
|
||||||
@echo '----'
|
|
||||||
@echo 'check_imports - check imports'
|
|
||||||
@echo 'format - run code formatters'
|
|
||||||
@echo 'lint - run linters'
|
|
||||||
@echo 'test - run unit tests'
|
|
||||||
@echo 'tests - run unit tests'
|
|
||||||
@echo 'test TEST_FILE=<test_file> - run all tests in file'
|
|
@ -1,195 +1,3 @@
|
|||||||
# langchain-box
|
This package has moved!
|
||||||
|
|
||||||
This package contains the LangChain integration with Box. For more information about
|
https://github.com/langchain-ai/langchain-box/tree/main/libs/box
|
||||||
Box, check out our [developer documentation](https://developer.box.com).
|
|
||||||
|
|
||||||
## Pre-requisites
|
|
||||||
|
|
||||||
In order to integrate with Box, you need a few things:
|
|
||||||
|
|
||||||
* A Box instance — if you are not a current Box customer, sign up for a
|
|
||||||
[free dev account](https://account.box.com/signup/n/developer#ty9l3).
|
|
||||||
* A Box app — more on how to
|
|
||||||
[create an app](https://developer.box.com/guides/getting-started/first-application/)
|
|
||||||
* Your app approved in your Box instance — This is done by your admin.
|
|
||||||
The good news is if you are using a free developer account, you are the admin.
|
|
||||||
[Authorize your app](https://developer.box.com/guides/authorization/custom-app-approval/#manual-approval)
|
|
||||||
|
|
||||||
## Installation
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install -U langchain-box
|
|
||||||
```
|
|
||||||
|
|
||||||
## Authentication
|
|
||||||
|
|
||||||
The `box-langchain` package offers some flexibility to authentication. The
|
|
||||||
most basic authentication method is by using a developer token. This can be
|
|
||||||
found in the [Box developer console](https://account.box.com/developers/console)
|
|
||||||
on the configuration screen. This token is purposely short-lived (1 hour) and is
|
|
||||||
intended for development. With this token, you can add it to your environment as
|
|
||||||
`BOX_DEVELOPER_TOKEN`, you can pass it directly to the loader, or you can use the
|
|
||||||
`BoxAuth` authentication helper class.
|
|
||||||
|
|
||||||
We will cover passing it directly to the loader in the section below.
|
|
||||||
|
|
||||||
### BoxAuth helper class
|
|
||||||
|
|
||||||
`BoxAuth` supports the following authentication methods:
|
|
||||||
|
|
||||||
* Token — either a developer token or any token generated through the Box SDK
|
|
||||||
* JWT with a service account
|
|
||||||
* JWT with a specified user
|
|
||||||
* CCG with a service account
|
|
||||||
* CCG with a specified user
|
|
||||||
|
|
||||||
> [!NOTE]
|
|
||||||
> If using JWT authentication, you will need to download the configuration from the Box
|
|
||||||
> developer console after generating your public/private key pair. Place this file in your
|
|
||||||
> application directory structure somewhere. You will use the path to this file when using
|
|
||||||
> the `BoxAuth` helper class.
|
|
||||||
|
|
||||||
For more information, learn about how to
|
|
||||||
[set up a Box application](https://developer.box.com/guides/getting-started/first-application/),
|
|
||||||
and check out the
|
|
||||||
[Box authentication guide](https://developer.box.com/guides/authentication/select/)
|
|
||||||
for more about our different authentication options.
|
|
||||||
|
|
||||||
Examples:
|
|
||||||
|
|
||||||
**Token**
|
|
||||||
|
|
||||||
```python
|
|
||||||
from langchain_box.document_loaders import BoxLoader
|
|
||||||
from langchain_box.utilities import BoxAuth, BoxAuthType
|
|
||||||
|
|
||||||
auth = BoxAuth(
|
|
||||||
auth_type=BoxAuthType.TOKEN,
|
|
||||||
box_developer_token=box_developer_token
|
|
||||||
)
|
|
||||||
|
|
||||||
loader = BoxLoader(
|
|
||||||
box_auth=auth,
|
|
||||||
...
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
**JWT with a service account**
|
|
||||||
|
|
||||||
```python
|
|
||||||
from langchain_box.document_loaders import BoxLoader
|
|
||||||
from langchain_box.utilities import BoxAuth, BoxAuthType
|
|
||||||
|
|
||||||
auth = BoxAuth(
|
|
||||||
auth_type=BoxAuthType.JWT,
|
|
||||||
box_jwt_path=box_jwt_path
|
|
||||||
)
|
|
||||||
|
|
||||||
loader = BoxLoader(
|
|
||||||
box_auth=auth,
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
**JWT with a specified user**
|
|
||||||
|
|
||||||
```python
|
|
||||||
from langchain_box.document_loaders import BoxLoader
|
|
||||||
from langchain_box.utilities import BoxAuth, BoxAuthType
|
|
||||||
|
|
||||||
auth = BoxAuth(
|
|
||||||
auth_type=BoxAuthType.JWT,
|
|
||||||
box_jwt_path=box_jwt_path,
|
|
||||||
box_user_id=box_user_id
|
|
||||||
)
|
|
||||||
|
|
||||||
loader = BoxLoader(
|
|
||||||
box_auth=auth,
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
**CCG with a service account**
|
|
||||||
|
|
||||||
```python
|
|
||||||
from langchain_box.document_loaders import BoxLoader
|
|
||||||
from langchain_box.utilities import BoxAuth, BoxAuthType
|
|
||||||
|
|
||||||
auth = BoxAuth(
|
|
||||||
auth_type=BoxAuthType.CCG,
|
|
||||||
box_client_id=box_client_id,
|
|
||||||
box_client_secret=box_client_secret,
|
|
||||||
box_enterprise_id=box_enterprise_id
|
|
||||||
)
|
|
||||||
|
|
||||||
loader = BoxLoader(
|
|
||||||
box_auth=auth,
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
**CCG with a specified user**
|
|
||||||
|
|
||||||
```python
|
|
||||||
from langchain_box.document_loaders import BoxLoader
|
|
||||||
from langchain_box.utilities import BoxAuth, BoxAuthType
|
|
||||||
|
|
||||||
auth = BoxAuth(
|
|
||||||
auth_type=BoxAuthType.CCG,
|
|
||||||
box_client_id=box_client_id,
|
|
||||||
box_client_secret=box_client_secret,
|
|
||||||
box_user_id=box_user_id
|
|
||||||
)
|
|
||||||
|
|
||||||
loader = BoxLoader(
|
|
||||||
box_auth=auth,
|
|
||||||
...
|
|
||||||
```
|
|
||||||
|
|
||||||
## Document Loaders
|
|
||||||
|
|
||||||
The `BoxLoader` class helps you get your unstructured content from Box
|
|
||||||
in Langchain's `Document` format. You can do this with either a `List[str]`
|
|
||||||
containing Box file IDs, or with a `str` containing a Box folder ID.
|
|
||||||
|
|
||||||
If getting files from a folder with folder ID, you can also set a `Bool` to
|
|
||||||
tell the loader to get all sub-folders in that folder, as well.
|
|
||||||
|
|
||||||
:::info
|
|
||||||
A Box instance can contain Petabytes of files, and folders can contain millions
|
|
||||||
of files. Be intentional when choosing what folders you choose to index. And we
|
|
||||||
recommend never getting all files from folder 0 recursively. Folder ID 0 is your
|
|
||||||
root folder.
|
|
||||||
:::
|
|
||||||
|
|
||||||
### Load files
|
|
||||||
|
|
||||||
```python
|
|
||||||
import os
|
|
||||||
|
|
||||||
from langchain_box.document_loaders import BoxLoader
|
|
||||||
|
|
||||||
os.environ["BOX_DEVELOPER_TOKEN"] = "df21df2df21df2d1f21df2df1"
|
|
||||||
|
|
||||||
loader = BoxLoader(
|
|
||||||
box_file_ids=["12345", "67890"],
|
|
||||||
character_limit=10000 # Optional. Defaults to no limit
|
|
||||||
)
|
|
||||||
|
|
||||||
docs = loader.lazy_load()
|
|
||||||
```
|
|
||||||
|
|
||||||
### Load from folder
|
|
||||||
|
|
||||||
```python
|
|
||||||
import os
|
|
||||||
|
|
||||||
from langchain_box.document_loaders import BoxLoader
|
|
||||||
|
|
||||||
os.environ["BOX_DEVELOPER_TOKEN"] = "df21df2df21df2d1f21df2df1"
|
|
||||||
|
|
||||||
loader = BoxLoader(
|
|
||||||
box_folder_id="12345",
|
|
||||||
recursive=False, # Optional. return entire tree, defaults to False
|
|
||||||
character_limit=10000 # Optional. Defaults to no limit
|
|
||||||
)
|
|
||||||
|
|
||||||
docs = loader.lazy_load()
|
|
||||||
```
|
|
||||||
|
@ -1,31 +0,0 @@
|
|||||||
from importlib import metadata
|
|
||||||
|
|
||||||
from langchain_box.document_loaders import BoxLoader
|
|
||||||
from langchain_box.retrievers import BoxRetriever
|
|
||||||
from langchain_box.utilities.box import (
|
|
||||||
BoxAuth,
|
|
||||||
BoxAuthType,
|
|
||||||
BoxSearchOptions,
|
|
||||||
DocumentFiles,
|
|
||||||
SearchTypeFilter,
|
|
||||||
_BoxAPIWrapper,
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
__version__ = metadata.version(__package__)
|
|
||||||
except metadata.PackageNotFoundError:
|
|
||||||
# Case where package metadata is not available.
|
|
||||||
__version__ = ""
|
|
||||||
del metadata # optional, avoids polluting the results of dir(__package__)
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
"BoxLoader",
|
|
||||||
"BoxRetriever",
|
|
||||||
"BoxAuth",
|
|
||||||
"BoxAuthType",
|
|
||||||
"BoxSearchOptions",
|
|
||||||
"DocumentFiles",
|
|
||||||
"SearchTypeFilter",
|
|
||||||
"_BoxAPIWrapper",
|
|
||||||
"__version__",
|
|
||||||
]
|
|
@ -1,5 +0,0 @@
|
|||||||
"""Box Document Loaders."""
|
|
||||||
|
|
||||||
from langchain_box.document_loaders.box import BoxLoader
|
|
||||||
|
|
||||||
__all__ = ["BoxLoader"]
|
|
@ -1,260 +0,0 @@
|
|||||||
from typing import Iterator, List, Optional
|
|
||||||
|
|
||||||
from box_sdk_gen import FileBaseTypeField # type: ignore
|
|
||||||
from langchain_core.document_loaders.base import BaseLoader
|
|
||||||
from langchain_core.documents import Document
|
|
||||||
from langchain_core.utils import from_env
|
|
||||||
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
|
||||||
from typing_extensions import Self
|
|
||||||
|
|
||||||
from langchain_box.utilities import BoxAuth, _BoxAPIWrapper
|
|
||||||
|
|
||||||
|
|
||||||
class BoxLoader(BaseLoader, BaseModel):
|
|
||||||
"""BoxLoader.
|
|
||||||
|
|
||||||
This class will help you load files from your Box instance. You must have a
|
|
||||||
Box account. If you need one, you can sign up for a free developer account.
|
|
||||||
You will also need a Box application created in the developer portal, where
|
|
||||||
you can select your authorization type.
|
|
||||||
|
|
||||||
If you wish to use either of the Box AI options, you must be on an Enterprise
|
|
||||||
Plus plan or above. The free developer account does not have access to Box AI.
|
|
||||||
|
|
||||||
In addition, using the Box AI API requires a few prerequisite steps:
|
|
||||||
|
|
||||||
* Your administrator must enable the Box AI API
|
|
||||||
* You must enable the ``Manage AI`` scope in your app in the developer console.
|
|
||||||
* Your administrator must install and enable your application.
|
|
||||||
|
|
||||||
**Setup**:
|
|
||||||
Install ``langchain-box`` and set environment variable ``BOX_DEVELOPER_TOKEN``.
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
pip install -U langchain-box
|
|
||||||
export BOX_DEVELOPER_TOKEN="your-api-key"
|
|
||||||
|
|
||||||
|
|
||||||
This loader returns ``Document`` objects built from text representations of files
|
|
||||||
in Box. It will skip any document without a text representation available. You can
|
|
||||||
provide either a ``List[str]`` containing Box file IDS, or you can provide a
|
|
||||||
``str`` contining a Box folder ID. If providing a folder ID, you can also enable
|
|
||||||
recursive mode to get the full tree under that folder.
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
A Box instance can contain Petabytes of files, and folders can contain millions
|
|
||||||
of files. Be intentional when choosing what folders you choose to index. And we
|
|
||||||
recommend never getting all files from folder 0 recursively. Folder ID 0 is your
|
|
||||||
root folder.
|
|
||||||
|
|
||||||
**Instantiate**:
|
|
||||||
|
|
||||||
.. list-table:: Initialization variables
|
|
||||||
:widths: 25 50 15 10
|
|
||||||
:header-rows: 1
|
|
||||||
|
|
||||||
* - Variable
|
|
||||||
- Description
|
|
||||||
- Type
|
|
||||||
- Default
|
|
||||||
* - box_developer_token
|
|
||||||
- Token to use for auth.
|
|
||||||
- ``str``
|
|
||||||
- ``None``
|
|
||||||
* - box_auth
|
|
||||||
- client id for you app. Used for CCG
|
|
||||||
- ``langchain_box.utilities.BoxAuth``
|
|
||||||
- ``None``
|
|
||||||
* - box_file_ids
|
|
||||||
- client id for you app. Used for CCG
|
|
||||||
- ``List[str]``
|
|
||||||
- ``None``
|
|
||||||
* - box_folder_id
|
|
||||||
- client id for you app. Used for CCG
|
|
||||||
- ``str``
|
|
||||||
- ``None``
|
|
||||||
* - recursive
|
|
||||||
- client id for you app. Used for CCG
|
|
||||||
- ``Bool``
|
|
||||||
- ``False``
|
|
||||||
* - character_limit
|
|
||||||
- client id for you app. Used for CCG
|
|
||||||
- ``int``
|
|
||||||
- ``-1``
|
|
||||||
|
|
||||||
|
|
||||||
**Get files** — this method requires you pass the ``box_file_ids`` parameter.
|
|
||||||
This is a ``List[str]`` containing the file IDs you wish to index.
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
from langchain_box.document_loaders import BoxLoader
|
|
||||||
|
|
||||||
box_file_ids = ["1514555423624", "1514553902288"]
|
|
||||||
|
|
||||||
loader = BoxLoader(
|
|
||||||
box_file_ids=box_file_ids,
|
|
||||||
character_limit=10000 # Optional. Defaults to no limit
|
|
||||||
)
|
|
||||||
|
|
||||||
**Get files in a folder** — this method requires you pass the ``box_folder_id``
|
|
||||||
parameter. This is a ``str`` containing the folder ID you wish to index.
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
from langchain_box.document_loaders import BoxLoader
|
|
||||||
|
|
||||||
box_folder_id = "260932470532"
|
|
||||||
|
|
||||||
loader = BoxLoader(
|
|
||||||
box_folder_id=box_folder_id,
|
|
||||||
recursive=False # Optional. return entire tree, defaults to False
|
|
||||||
)
|
|
||||||
|
|
||||||
**Load**:
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
docs = loader.load()
|
|
||||||
docs[0]
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
Document(metadata={'source': 'https://dl.boxcloud.com/api/2.0/
|
|
||||||
internal_files/1514555423624/versions/1663171610024/representations
|
|
||||||
/extracted_text/content/', 'title': 'Invoice-A5555_txt'},
|
|
||||||
page_content='Vendor: AstroTech Solutions\\nInvoice Number: A5555\\n\\nLine
|
|
||||||
Items:\\n - Gravitational Wave Detector Kit: $800\\n - Exoplanet
|
|
||||||
Terrarium: $120\\nTotal: $920')
|
|
||||||
|
|
||||||
**Lazy load**:
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
docs = []
|
|
||||||
docs_lazy = loader.lazy_load()
|
|
||||||
|
|
||||||
for doc in docs_lazy:
|
|
||||||
docs.append(doc)
|
|
||||||
print(docs[0].page_content[:100])
|
|
||||||
print(docs[0].metadata)
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
Document(metadata={'source': 'https://dl.boxcloud.com/api/2.0/
|
|
||||||
internal_files/1514555423624/versions/1663171610024/representations
|
|
||||||
/extracted_text/content/', 'title': 'Invoice-A5555_txt'},
|
|
||||||
page_content='Vendor: AstroTech Solutions\\nInvoice Number: A5555\\n\\nLine
|
|
||||||
Items:\\n - Gravitational Wave Detector Kit: $800\\n - Exoplanet
|
|
||||||
Terrarium: $120\\nTotal: $920')
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
box_developer_token: Optional[str] = Field(
|
|
||||||
default_factory=from_env("BOX_DEVELOPER_TOKEN", default=None)
|
|
||||||
)
|
|
||||||
"""String containing the Box Developer Token generated in the developer console"""
|
|
||||||
|
|
||||||
box_auth: Optional[BoxAuth] = None
|
|
||||||
"""Configured
|
|
||||||
`BoxAuth <https://python.langchain.com/v0.2/api_reference/box/utilities/langchain_box.utilities.box.BoxAuth.html>`_
|
|
||||||
object"""
|
|
||||||
|
|
||||||
box_file_ids: Optional[List[str]] = None
|
|
||||||
"""List[str] containing Box file ids"""
|
|
||||||
|
|
||||||
box_folder_id: Optional[str] = None
|
|
||||||
"""String containing box folder id to load files from"""
|
|
||||||
|
|
||||||
recursive: Optional[bool] = False
|
|
||||||
"""If getting files by folder id, recursive is a bool to determine if you wish
|
|
||||||
to traverse subfolders to return child documents. Default is False"""
|
|
||||||
|
|
||||||
character_limit: Optional[int] = -1
|
|
||||||
"""character_limit is an int that caps the number of characters to
|
|
||||||
return per document."""
|
|
||||||
|
|
||||||
_box: Optional[_BoxAPIWrapper] = None
|
|
||||||
|
|
||||||
model_config = ConfigDict(
|
|
||||||
arbitrary_types_allowed=True,
|
|
||||||
extra="allow",
|
|
||||||
use_enum_values=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
@model_validator(mode="after")
|
|
||||||
def validate_box_loader_inputs(self) -> Self:
|
|
||||||
_box = None
|
|
||||||
|
|
||||||
"""Validate that has either box_file_ids or box_folder_id."""
|
|
||||||
if not self.box_file_ids and not self.box_folder_id:
|
|
||||||
raise ValueError("You must provide box_file_ids or box_folder_id.")
|
|
||||||
|
|
||||||
"""Validate that we don't have both box_file_ids and box_folder_id."""
|
|
||||||
if self.box_file_ids and self.box_folder_id:
|
|
||||||
raise ValueError(
|
|
||||||
"You must provide either box_file_ids or box_folder_id, not both."
|
|
||||||
)
|
|
||||||
|
|
||||||
"""Validate that we have either a box_developer_token or box_auth."""
|
|
||||||
if not self.box_auth:
|
|
||||||
if not self.box_developer_token:
|
|
||||||
raise ValueError(
|
|
||||||
"you must provide box_developer_token or a box_auth "
|
|
||||||
"generated with langchain_box.utilities.BoxAuth"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
_box = _BoxAPIWrapper( # type: ignore[call-arg]
|
|
||||||
box_developer_token=self.box_developer_token,
|
|
||||||
character_limit=self.character_limit,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
_box = _BoxAPIWrapper( # type: ignore[call-arg]
|
|
||||||
box_auth=self.box_auth,
|
|
||||||
character_limit=self.character_limit,
|
|
||||||
)
|
|
||||||
|
|
||||||
self._box = _box
|
|
||||||
|
|
||||||
return self
|
|
||||||
|
|
||||||
def _get_files_from_folder(self, folder_id): # type: ignore[no-untyped-def]
|
|
||||||
folder_content = self.box.get_folder_items(folder_id)
|
|
||||||
|
|
||||||
for file in folder_content:
|
|
||||||
try:
|
|
||||||
if file.type == FileBaseTypeField.FILE:
|
|
||||||
doc = self._box.get_document_by_file_id(file.id)
|
|
||||||
|
|
||||||
if doc is not None:
|
|
||||||
yield doc
|
|
||||||
|
|
||||||
elif file.type == "folder" and self.recursive:
|
|
||||||
try:
|
|
||||||
yield from self._get_files_from_folder(file.id)
|
|
||||||
except TypeError:
|
|
||||||
pass
|
|
||||||
except TypeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def lazy_load(self) -> Iterator[Document]:
|
|
||||||
"""Load documents. Accepts no arguments. Returns `Iterator[Document]`"""
|
|
||||||
if self.box_file_ids:
|
|
||||||
for file_id in self.box_file_ids:
|
|
||||||
try:
|
|
||||||
file = self._box.get_document_by_file_id(file_id) # type: ignore[union-attr]
|
|
||||||
|
|
||||||
if file is not None:
|
|
||||||
yield file
|
|
||||||
except TypeError:
|
|
||||||
pass
|
|
||||||
elif self.box_folder_id:
|
|
||||||
try:
|
|
||||||
yield from self._get_files_from_folder(self.box_folder_id)
|
|
||||||
except TypeError:
|
|
||||||
pass
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Exception {e}") # noqa: T201
|
|
||||||
else:
|
|
||||||
raise ValueError(
|
|
||||||
"You must provide either `box_file_ids` or `box_folder_id`"
|
|
||||||
)
|
|
@ -1,5 +0,0 @@
|
|||||||
"""Box Document Loaders."""
|
|
||||||
|
|
||||||
from langchain_box.retrievers.box import BoxRetriever
|
|
||||||
|
|
||||||
__all__ = ["BoxRetriever"]
|
|
@ -1,185 +0,0 @@
|
|||||||
from typing import List, Optional
|
|
||||||
|
|
||||||
from langchain_core.callbacks import CallbackManagerForRetrieverRun
|
|
||||||
from langchain_core.documents import Document
|
|
||||||
from langchain_core.retrievers import BaseRetriever
|
|
||||||
from langchain_core.utils import from_env
|
|
||||||
from pydantic import ConfigDict, Field, model_validator
|
|
||||||
from typing_extensions import Self
|
|
||||||
|
|
||||||
from langchain_box.utilities import BoxAuth, BoxSearchOptions, _BoxAPIWrapper
|
|
||||||
|
|
||||||
|
|
||||||
class BoxRetriever(BaseRetriever):
|
|
||||||
"""Box retriever.
|
|
||||||
|
|
||||||
`BoxRetriever` provides the ability to retrieve content from
|
|
||||||
your Box instance in a couple of ways.
|
|
||||||
|
|
||||||
1. You can use the Box full-text search to retrieve the
|
|
||||||
complete document(s) that match your search query, as
|
|
||||||
`List[Document]`
|
|
||||||
2. You can use the Box AI Platform API to retrieve the results
|
|
||||||
from a Box AI prompt. This can be a `Document` containing
|
|
||||||
the result of the prompt, or you can retrieve the citations
|
|
||||||
used to generate the prompt to include in your vectorstore.
|
|
||||||
|
|
||||||
Setup:
|
|
||||||
Install ``langchain-box``:
|
|
||||||
|
|
||||||
.. code-block:: bash
|
|
||||||
|
|
||||||
pip install -U langchain-box
|
|
||||||
|
|
||||||
Instantiate:
|
|
||||||
|
|
||||||
To use search:
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
from langchain_box.retrievers import BoxRetriever
|
|
||||||
|
|
||||||
retriever = BoxRetriever()
|
|
||||||
|
|
||||||
To use Box AI:
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
from langchain_box.retrievers import BoxRetriever
|
|
||||||
|
|
||||||
file_ids=["12345","67890"]
|
|
||||||
|
|
||||||
retriever = BoxRetriever(file_ids)
|
|
||||||
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
retriever = BoxRetriever()
|
|
||||||
retriever.invoke("victor")
|
|
||||||
print(docs[0].page_content[:100])
|
|
||||||
|
|
||||||
.. code-block:: none
|
|
||||||
|
|
||||||
[
|
|
||||||
Document(
|
|
||||||
metadata={
|
|
||||||
'source': 'url',
|
|
||||||
'title': 'FIVE_FEET_AND_RISING_by_Peter_Sollett_pdf'
|
|
||||||
},
|
|
||||||
page_content='\\n3/20/23, 5:31 PM F...'
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
Use within a chain:
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
from langchain_core.output_parsers import StrOutputParser
|
|
||||||
from langchain_core.prompts import ChatPromptTemplate
|
|
||||||
from langchain_core.runnables import RunnablePassthrough
|
|
||||||
from langchain_openai import ChatOpenAI
|
|
||||||
|
|
||||||
retriever = BoxRetriever(box_developer_token=box_developer_token, character_limit=10000)
|
|
||||||
|
|
||||||
context="You are an actor reading scripts to learn about your role in an upcoming movie."
|
|
||||||
question="describe the character Victor"
|
|
||||||
|
|
||||||
prompt = ChatPromptTemplate.from_template(
|
|
||||||
\"""Answer the question based only on the context provided.
|
|
||||||
|
|
||||||
Context: {context}
|
|
||||||
|
|
||||||
Question: {question}\"""
|
|
||||||
)
|
|
||||||
|
|
||||||
def format_docs(docs):
|
|
||||||
return "\\n\\n".join(doc.page_content for doc in docs)
|
|
||||||
|
|
||||||
chain = (
|
|
||||||
{"context": retriever | format_docs, "question": RunnablePassthrough()}
|
|
||||||
| prompt
|
|
||||||
| llm
|
|
||||||
| StrOutputParser()
|
|
||||||
)
|
|
||||||
|
|
||||||
chain.invoke("Victor") # search query to find files in Box
|
|
||||||
)
|
|
||||||
|
|
||||||
.. code-block:: none
|
|
||||||
|
|
||||||
'Victor is a skinny 12-year-old with sloppy hair who is seen
|
|
||||||
sleeping on his fire escape in the sun. He is hesitant to go to
|
|
||||||
the pool with his friend Carlos because he is afraid of getting
|
|
||||||
in trouble for not letting his mother cut his hair. Ultimately,
|
|
||||||
he decides to go to the pool with Carlos.'
|
|
||||||
""" # noqa: E501
|
|
||||||
|
|
||||||
box_developer_token: Optional[str] = Field(
|
|
||||||
default_factory=from_env("BOX_DEVELOPER_TOKEN", default=None)
|
|
||||||
)
|
|
||||||
|
|
||||||
box_auth: Optional[BoxAuth] = None
|
|
||||||
"""Configured
|
|
||||||
`BoxAuth <https://python.langchain.com/v0.2/api_reference/box/utilities/langchain_box.utilities.box.BoxAuth.html>`_
|
|
||||||
object"""
|
|
||||||
|
|
||||||
box_file_ids: Optional[List[str]] = None
|
|
||||||
"""List[str] containing Box file ids"""
|
|
||||||
|
|
||||||
character_limit: Optional[int] = -1
|
|
||||||
"""character_limit is an int that caps the number of characters to
|
|
||||||
return per document."""
|
|
||||||
|
|
||||||
box_search_options: Optional[BoxSearchOptions] = None
|
|
||||||
"""Search options to configure BoxRetriever to narrow search results."""
|
|
||||||
|
|
||||||
answer: Optional[bool] = True
|
|
||||||
"""When using Box AI, return the answer to the prompt as a `Document`
|
|
||||||
object. Returned as `List[Document`]. Default is `True`."""
|
|
||||||
|
|
||||||
citations: Optional[bool] = False
|
|
||||||
"""When using Box AI, return the citations from to the prompt as
|
|
||||||
`Document` objects. Can be used with answer. Returned as `List[Document`].
|
|
||||||
Default is `False`."""
|
|
||||||
|
|
||||||
_box: Optional[_BoxAPIWrapper]
|
|
||||||
|
|
||||||
model_config = ConfigDict(
|
|
||||||
arbitrary_types_allowed=True,
|
|
||||||
extra="allow",
|
|
||||||
)
|
|
||||||
|
|
||||||
@model_validator(mode="after")
|
|
||||||
def validate_box_loader_inputs(self) -> Self:
|
|
||||||
_box = None
|
|
||||||
|
|
||||||
"""Validate that we have either a box_developer_token or box_auth."""
|
|
||||||
if not self.box_auth and not self.box_developer_token:
|
|
||||||
raise ValueError(
|
|
||||||
"you must provide box_developer_token or a box_auth "
|
|
||||||
"generated with langchain_box.utilities.BoxAuth"
|
|
||||||
)
|
|
||||||
|
|
||||||
_box = _BoxAPIWrapper( # type: ignore[call-arg]
|
|
||||||
box_developer_token=self.box_developer_token,
|
|
||||||
box_auth=self.box_auth,
|
|
||||||
character_limit=self.character_limit,
|
|
||||||
box_search_options=self.box_search_options,
|
|
||||||
)
|
|
||||||
|
|
||||||
self._box = _box
|
|
||||||
|
|
||||||
return self
|
|
||||||
|
|
||||||
def _get_relevant_documents(
|
|
||||||
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
|
||||||
) -> List[Document]:
|
|
||||||
if self.box_file_ids: # If using Box AI
|
|
||||||
return self._box.ask_box_ai( # type: ignore[union-attr]
|
|
||||||
query=query,
|
|
||||||
box_file_ids=self.box_file_ids,
|
|
||||||
answer=self.answer, # type: ignore[arg-type]
|
|
||||||
citations=self.citations, # type: ignore[arg-type]
|
|
||||||
)
|
|
||||||
else: # If using Search
|
|
||||||
return self._box.search_box(query=query) # type: ignore[union-attr]
|
|
@ -1,19 +0,0 @@
|
|||||||
"""Box API Utilities."""
|
|
||||||
|
|
||||||
from langchain_box.utilities.box import (
|
|
||||||
BoxAuth,
|
|
||||||
BoxAuthType,
|
|
||||||
BoxSearchOptions,
|
|
||||||
DocumentFiles,
|
|
||||||
SearchTypeFilter,
|
|
||||||
_BoxAPIWrapper,
|
|
||||||
)
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
"BoxAuth",
|
|
||||||
"BoxAuthType",
|
|
||||||
"BoxSearchOptions",
|
|
||||||
"DocumentFiles",
|
|
||||||
"SearchTypeFilter",
|
|
||||||
"_BoxAPIWrapper",
|
|
||||||
]
|
|
@ -1,875 +0,0 @@
|
|||||||
"""Util that calls Box APIs."""
|
|
||||||
|
|
||||||
from enum import Enum
|
|
||||||
from typing import Any, Dict, List, Optional
|
|
||||||
|
|
||||||
import box_sdk_gen # type: ignore
|
|
||||||
import requests
|
|
||||||
from langchain_core.documents import Document
|
|
||||||
from langchain_core.utils import from_env
|
|
||||||
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
|
||||||
from typing_extensions import Self
|
|
||||||
|
|
||||||
|
|
||||||
class DocumentFiles(Enum):
|
|
||||||
"""DocumentFiles(Enum).
|
|
||||||
|
|
||||||
An enum containing all of the supported extensions for files
|
|
||||||
Box considers Documents. These files should have text
|
|
||||||
representations.
|
|
||||||
"""
|
|
||||||
|
|
||||||
DOC = "doc"
|
|
||||||
DOCX = "docx"
|
|
||||||
GDOC = "gdoc"
|
|
||||||
GSHEET = "gsheet"
|
|
||||||
NUMBERS = "numbers"
|
|
||||||
ODS = "ods"
|
|
||||||
ODT = "odt"
|
|
||||||
PAGES = "pages"
|
|
||||||
PDF = "pdf"
|
|
||||||
RTF = "rtf"
|
|
||||||
WPD = "wpd"
|
|
||||||
XLS = "xls"
|
|
||||||
XLSM = "xlsm"
|
|
||||||
XLSX = "xlsx"
|
|
||||||
AS = "as"
|
|
||||||
AS3 = "as3"
|
|
||||||
ASM = "asm"
|
|
||||||
BAT = "bat"
|
|
||||||
C = "c"
|
|
||||||
CC = "cc"
|
|
||||||
CMAKE = "cmake"
|
|
||||||
CPP = "cpp"
|
|
||||||
CS = "cs"
|
|
||||||
CSS = "css"
|
|
||||||
CSV = "csv"
|
|
||||||
CXX = "cxx"
|
|
||||||
DIFF = "diff"
|
|
||||||
ERB = "erb"
|
|
||||||
GROOVY = "groovy"
|
|
||||||
H = "h"
|
|
||||||
HAML = "haml"
|
|
||||||
HH = "hh"
|
|
||||||
HTM = "htm"
|
|
||||||
HTML = "html"
|
|
||||||
JAVA = "java"
|
|
||||||
JS = "js"
|
|
||||||
JSON = "json"
|
|
||||||
LESS = "less"
|
|
||||||
LOG = "log"
|
|
||||||
M = "m"
|
|
||||||
MAKE = "make"
|
|
||||||
MD = "md"
|
|
||||||
ML = "ml"
|
|
||||||
MM = "mm"
|
|
||||||
MSG = "msg"
|
|
||||||
PHP = "php"
|
|
||||||
PL = "pl"
|
|
||||||
PROPERTIES = "properties"
|
|
||||||
PY = "py"
|
|
||||||
RB = "rb"
|
|
||||||
RST = "rst"
|
|
||||||
SASS = "sass"
|
|
||||||
SCALA = "scala"
|
|
||||||
SCM = "scm"
|
|
||||||
SCRIPT = "script"
|
|
||||||
SH = "sh"
|
|
||||||
SML = "sml"
|
|
||||||
SQL = "sql"
|
|
||||||
TXT = "txt"
|
|
||||||
VI = "vi"
|
|
||||||
VIM = "vim"
|
|
||||||
WEBDOC = "webdoc"
|
|
||||||
XHTML = "xhtml"
|
|
||||||
XLSB = "xlsb"
|
|
||||||
XML = "xml"
|
|
||||||
XSD = "xsd"
|
|
||||||
XSL = "xsl"
|
|
||||||
YAML = "yaml"
|
|
||||||
GSLLIDE = "gslide"
|
|
||||||
GSLIDES = "gslides"
|
|
||||||
KEY = "key"
|
|
||||||
ODP = "odp"
|
|
||||||
PPT = "ppt"
|
|
||||||
PPTX = "pptx"
|
|
||||||
BOXNOTE = "boxnote"
|
|
||||||
|
|
||||||
|
|
||||||
class ImageFiles(Enum):
|
|
||||||
"""ImageFiles(Enum).
|
|
||||||
|
|
||||||
An enum containing all of the supported extensions for files
|
|
||||||
Box considers images.
|
|
||||||
"""
|
|
||||||
|
|
||||||
ARW = "arw"
|
|
||||||
BMP = "bmp"
|
|
||||||
CR2 = "cr2"
|
|
||||||
DCM = "dcm"
|
|
||||||
DICM = "dicm"
|
|
||||||
DICOM = "dicom"
|
|
||||||
DNG = "dng"
|
|
||||||
EPS = "eps"
|
|
||||||
EXR = "exr"
|
|
||||||
GIF = "gif"
|
|
||||||
HEIC = "heic"
|
|
||||||
INDD = "indd"
|
|
||||||
INDML = "indml"
|
|
||||||
INDT = "indt"
|
|
||||||
INX = "inx"
|
|
||||||
JPEG = "jpeg"
|
|
||||||
JPG = "jpg"
|
|
||||||
NEF = "nef"
|
|
||||||
PNG = "png"
|
|
||||||
SVG = "svg"
|
|
||||||
TIF = "tif"
|
|
||||||
TIFF = "tiff"
|
|
||||||
TGA = "tga"
|
|
||||||
SVS = "svs"
|
|
||||||
|
|
||||||
|
|
||||||
class BoxAuthType(Enum):
|
|
||||||
"""BoxAuthType(Enum).
|
|
||||||
|
|
||||||
an enum to tell BoxLoader how you wish to autheticate your Box connection.
|
|
||||||
|
|
||||||
Options are:
|
|
||||||
|
|
||||||
TOKEN - Use a developer token generated from the Box Deevloper Token.
|
|
||||||
Only recommended for development.
|
|
||||||
Provide ``box_developer_token``.
|
|
||||||
CCG - Client Credentials Grant.
|
|
||||||
provide ``box_client_id`, ``box_client_secret`,
|
|
||||||
and ``box_enterprise_id`` or optionally `box_user_id`.
|
|
||||||
JWT - Use JWT for authentication. Config should be stored on the file
|
|
||||||
system accessible to your app.
|
|
||||||
provide ``box_jwt_path``. Optionally, provide ``box_user_id`` to
|
|
||||||
act as a specific user
|
|
||||||
"""
|
|
||||||
|
|
||||||
TOKEN = "token"
|
|
||||||
"""Use a developer token or a token retrieved from ``box-sdk-gen``"""
|
|
||||||
|
|
||||||
CCG = "ccg"
|
|
||||||
"""Use ``client_credentials`` type grant"""
|
|
||||||
|
|
||||||
JWT = "jwt"
|
|
||||||
"""Use JWT bearer token auth"""
|
|
||||||
|
|
||||||
|
|
||||||
class BoxAuth(BaseModel):
|
|
||||||
"""**BoxAuth.**
|
|
||||||
|
|
||||||
The ``box-langchain`` package offers some flexibility to authentication. The
|
|
||||||
most basic authentication method is by using a developer token. This can be
|
|
||||||
found in the `Box developer console <https://account.box.com/developers/console>`_
|
|
||||||
on the configuration screen. This token is purposely short-lived (1 hour) and is
|
|
||||||
intended for development. With this token, you can add it to your environment as
|
|
||||||
``BOX_DEVELOPER_TOKEN``, you can pass it directly to the loader, or you can use the
|
|
||||||
``BoxAuth`` authentication helper class.
|
|
||||||
|
|
||||||
`BoxAuth` supports the following authentication methods:
|
|
||||||
|
|
||||||
* **Token** — either a developer token or any token generated through the Box SDK
|
|
||||||
* **JWT** with a service account
|
|
||||||
* **JWT** with a specified user
|
|
||||||
* **CCG** with a service account
|
|
||||||
* **CCG** with a specified user
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
If using JWT authentication, you will need to download the configuration from
|
|
||||||
the Box developer console after generating your public/private key pair. Place
|
|
||||||
this file in your application directory structure somewhere. You will use the
|
|
||||||
path to this file when using the ``BoxAuth`` helper class. If you wish to use
|
|
||||||
OAuth2 with the authorization_code flow, please use ``BoxAuthType.TOKEN`` with
|
|
||||||
the token you have acquired.
|
|
||||||
|
|
||||||
For more information, learn about how to
|
|
||||||
`set up a Box application <https://developer.box.com/guides/getting-started/first-application/>`_,
|
|
||||||
and check out the
|
|
||||||
`Box authentication guide <https://developer.box.com/guides/authentication/select/>`_
|
|
||||||
for more about our different authentication options.
|
|
||||||
|
|
||||||
Simple implementation:
|
|
||||||
|
|
||||||
To instantiate, you must provide a ``langchain_box.utilities.BoxAuthType``.
|
|
||||||
|
|
||||||
BoxAuthType is an enum to tell BoxLoader how you wish to autheticate your
|
|
||||||
Box connection.
|
|
||||||
|
|
||||||
Options are:
|
|
||||||
|
|
||||||
TOKEN - Use a developer token generated from the Box Deevloper Token.
|
|
||||||
Only recommended for development.
|
|
||||||
Provide ``box_developer_token``.
|
|
||||||
CCG - Client Credentials Grant.
|
|
||||||
provide ``box_client_id``, ``box_client_secret``,
|
|
||||||
and ``box_enterprise_id`` or optionally ``box_user_id``.
|
|
||||||
JWT - Use JWT for authentication. Config should be stored on the file
|
|
||||||
system accessible to your app.
|
|
||||||
provide ``box_jwt_path``. Optionally, provide ``box_user_id`` to
|
|
||||||
act as a specific user
|
|
||||||
|
|
||||||
**Examples**:
|
|
||||||
|
|
||||||
**Token**
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
from langchain_box.document_loaders import BoxLoader
|
|
||||||
from langchain_box.utilities import BoxAuth, BoxAuthType
|
|
||||||
|
|
||||||
auth = BoxAuth(
|
|
||||||
auth_type=BoxAuthType.TOKEN,
|
|
||||||
box_developer_token=box_developer_token
|
|
||||||
)
|
|
||||||
|
|
||||||
loader = BoxLoader(
|
|
||||||
box_auth=auth,
|
|
||||||
...
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
**JWT with a service account**
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
from langchain_box.document_loaders import BoxLoader
|
|
||||||
from langchain_box.utilities import BoxAuth, BoxAuthType
|
|
||||||
|
|
||||||
auth = BoxAuth(
|
|
||||||
auth_type=BoxAuthType.JWT,
|
|
||||||
box_jwt_path=box_jwt_path
|
|
||||||
)
|
|
||||||
|
|
||||||
loader = BoxLoader(
|
|
||||||
box_auth=auth,
|
|
||||||
...
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
**JWT with a specified user**
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
from langchain_box.document_loaders import BoxLoader
|
|
||||||
from langchain_box.utilities import BoxAuth, BoxAuthType
|
|
||||||
|
|
||||||
auth = BoxAuth(
|
|
||||||
auth_type=BoxAuthType.JWT,
|
|
||||||
box_jwt_path=box_jwt_path,
|
|
||||||
box_user_id=box_user_id
|
|
||||||
)
|
|
||||||
|
|
||||||
loader = BoxLoader(
|
|
||||||
box_auth=auth,
|
|
||||||
...
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
**CCG with a service account**
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
from langchain_box.document_loaders import BoxLoader
|
|
||||||
from langchain_box.utilities import BoxAuth, BoxAuthType
|
|
||||||
|
|
||||||
auth = BoxAuth(
|
|
||||||
auth_type=BoxAuthType.CCG,
|
|
||||||
box_client_id=box_client_id,
|
|
||||||
box_client_secret=box_client_secret,
|
|
||||||
box_enterprise_id=box_enterprise_id
|
|
||||||
)
|
|
||||||
|
|
||||||
loader = BoxLoader(
|
|
||||||
box_auth=auth,
|
|
||||||
...
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
**CCG with a specified user**
|
|
||||||
|
|
||||||
.. code-block:: python
|
|
||||||
|
|
||||||
from langchain_box.document_loaders import BoxLoader
|
|
||||||
from langchain_box.utilities import BoxAuth, BoxAuthType
|
|
||||||
|
|
||||||
auth = BoxAuth(
|
|
||||||
auth_type=BoxAuthType.CCG,
|
|
||||||
box_client_id=box_client_id,
|
|
||||||
box_client_secret=box_client_secret,
|
|
||||||
box_user_id=box_user_id
|
|
||||||
)
|
|
||||||
|
|
||||||
loader = BoxLoader(
|
|
||||||
box_auth=auth,
|
|
||||||
...
|
|
||||||
)
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
auth_type: BoxAuthType
|
|
||||||
"""``langchain_box.utilities.BoxAuthType``. Enum describing how to
|
|
||||||
authenticate against Box"""
|
|
||||||
|
|
||||||
box_developer_token: Optional[str] = Field(
|
|
||||||
default_factory=from_env("BOX_DEVELOPER_TOKEN", default=None)
|
|
||||||
)
|
|
||||||
""" If using ``BoxAuthType.TOKEN``, provide your token here"""
|
|
||||||
|
|
||||||
box_jwt_path: Optional[str] = Field(
|
|
||||||
default_factory=from_env("BOX_JWT_PATH", default=None)
|
|
||||||
)
|
|
||||||
"""If using ``BoxAuthType.JWT``, provide local path to your
|
|
||||||
JWT configuration file"""
|
|
||||||
|
|
||||||
box_client_id: Optional[str] = Field(
|
|
||||||
default_factory=from_env("BOX_CLIENT_ID", default=None)
|
|
||||||
)
|
|
||||||
"""If using ``BoxAuthType.CCG``, provide your app's client ID"""
|
|
||||||
|
|
||||||
box_client_secret: Optional[str] = Field(
|
|
||||||
default_factory=from_env("BOX_CLIENT_SECRET", default=None)
|
|
||||||
)
|
|
||||||
"""If using ``BoxAuthType.CCG``, provide your app's client secret"""
|
|
||||||
|
|
||||||
box_enterprise_id: Optional[str] = None
|
|
||||||
"""If using ``BoxAuthType.CCG``, provide your enterprise ID.
|
|
||||||
Only required if you are not sending ``box_user_id``"""
|
|
||||||
|
|
||||||
box_user_id: Optional[str] = None
|
|
||||||
"""If using ``BoxAuthType.CCG`` or ``BoxAuthType.JWT``, providing
|
|
||||||
``box_user_id`` will act on behalf of a specific user"""
|
|
||||||
|
|
||||||
_box_client: Optional[box_sdk_gen.BoxClient] = None
|
|
||||||
_custom_header: Dict = dict({"x-box-ai-library": "langchain"})
|
|
||||||
|
|
||||||
model_config = ConfigDict(
|
|
||||||
arbitrary_types_allowed=True,
|
|
||||||
use_enum_values=True,
|
|
||||||
extra="allow",
|
|
||||||
)
|
|
||||||
|
|
||||||
@model_validator(mode="after")
|
|
||||||
def validate_box_auth_inputs(self) -> Self:
|
|
||||||
"""Validate auth_type is set"""
|
|
||||||
if not self.auth_type:
|
|
||||||
raise ValueError("Auth type must be set.")
|
|
||||||
|
|
||||||
"""Validate that TOKEN auth type provides box_developer_token."""
|
|
||||||
if self.auth_type == "token" and not self.box_developer_token:
|
|
||||||
raise ValueError(f"{self.auth_type} requires box_developer_token to be set")
|
|
||||||
|
|
||||||
"""Validate that JWT auth type provides box_jwt_path."""
|
|
||||||
if self.auth_type == "jwt" and not self.box_jwt_path:
|
|
||||||
raise ValueError(f"{self.auth_type} requires box_jwt_path to be set")
|
|
||||||
|
|
||||||
"""Validate that CCG auth type provides box_client_id and
|
|
||||||
box_client_secret and either box_enterprise_id or box_user_id."""
|
|
||||||
if self.auth_type == "ccg":
|
|
||||||
if (
|
|
||||||
not self.box_client_id
|
|
||||||
or not self.box_client_secret
|
|
||||||
or (not self.box_enterprise_id and not self.box_user_id)
|
|
||||||
):
|
|
||||||
raise ValueError(
|
|
||||||
f"{self.auth_type} requires box_client_id, \
|
|
||||||
box_client_secret, and box_enterprise_id/box_user_id."
|
|
||||||
)
|
|
||||||
|
|
||||||
return self
|
|
||||||
|
|
||||||
def _authorize(self) -> None:
|
|
||||||
if self.auth_type == "token":
|
|
||||||
try:
|
|
||||||
auth = box_sdk_gen.BoxDeveloperTokenAuth(token=self.box_developer_token)
|
|
||||||
self._box_client = box_sdk_gen.BoxClient(auth=auth).with_extra_headers(
|
|
||||||
extra_headers=self._custom_header
|
|
||||||
)
|
|
||||||
|
|
||||||
except box_sdk_gen.BoxSDKError as bse:
|
|
||||||
raise RuntimeError(
|
|
||||||
f"Error getting client from developer token: {bse.message}"
|
|
||||||
)
|
|
||||||
except Exception as ex:
|
|
||||||
raise ValueError(
|
|
||||||
f"Invalid Box developer token. Please verify your \
|
|
||||||
token and try again.\n{ex}"
|
|
||||||
) from ex
|
|
||||||
|
|
||||||
elif self.auth_type == "jwt":
|
|
||||||
try:
|
|
||||||
jwt_config = box_sdk_gen.JWTConfig.from_config_file(
|
|
||||||
config_file_path=self.box_jwt_path
|
|
||||||
)
|
|
||||||
auth = box_sdk_gen.BoxJWTAuth(config=jwt_config)
|
|
||||||
|
|
||||||
self._box_client = box_sdk_gen.BoxClient(auth=auth).with_extra_headers(
|
|
||||||
extra_headers=self._custom_header
|
|
||||||
)
|
|
||||||
|
|
||||||
if self.box_user_id is not None:
|
|
||||||
user_auth = auth.with_user_subject(self.box_user_id)
|
|
||||||
self._box_client = box_sdk_gen.BoxClient(
|
|
||||||
auth=user_auth
|
|
||||||
).with_extra_headers(extra_headers=self._custom_header)
|
|
||||||
|
|
||||||
except box_sdk_gen.BoxSDKError as bse:
|
|
||||||
raise RuntimeError(
|
|
||||||
f"Error getting client from jwt token: {bse.message}"
|
|
||||||
)
|
|
||||||
except Exception as ex:
|
|
||||||
raise ValueError(
|
|
||||||
"Error authenticating. Please verify your JWT config \
|
|
||||||
and try again."
|
|
||||||
) from ex
|
|
||||||
|
|
||||||
elif self.auth_type == "ccg":
|
|
||||||
try:
|
|
||||||
if self.box_user_id is not None:
|
|
||||||
ccg_config = box_sdk_gen.CCGConfig(
|
|
||||||
client_id=self.box_client_id,
|
|
||||||
client_secret=self.box_client_secret,
|
|
||||||
user_id=self.box_user_id,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
ccg_config = box_sdk_gen.CCGConfig(
|
|
||||||
client_id=self.box_client_id,
|
|
||||||
client_secret=self.box_client_secret,
|
|
||||||
enterprise_id=self.box_enterprise_id,
|
|
||||||
)
|
|
||||||
auth = box_sdk_gen.BoxCCGAuth(config=ccg_config)
|
|
||||||
|
|
||||||
self._box_client = box_sdk_gen.BoxClient(auth=auth).with_extra_headers(
|
|
||||||
extra_headers=self._custom_header
|
|
||||||
)
|
|
||||||
|
|
||||||
except box_sdk_gen.BoxSDKError as bse:
|
|
||||||
raise RuntimeError(
|
|
||||||
f"Error getting client from ccg token: {bse.message}"
|
|
||||||
)
|
|
||||||
except Exception as ex:
|
|
||||||
raise ValueError(
|
|
||||||
"Error authenticating. Please verify you are providing a \
|
|
||||||
valid client id, secret and either a valid user ID or \
|
|
||||||
enterprise ID."
|
|
||||||
) from ex
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise ValueError(
|
|
||||||
f"{self.auth_type} is not a valid auth_type. Value must be \
|
|
||||||
TOKEN, CCG, or JWT."
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_client(self) -> box_sdk_gen.BoxClient:
|
|
||||||
"""Instantiate the Box SDK."""
|
|
||||||
if self._box_client is None:
|
|
||||||
self._authorize()
|
|
||||||
|
|
||||||
return self._box_client
|
|
||||||
|
|
||||||
|
|
||||||
class SearchTypeFilter(Enum):
|
|
||||||
"""SearchTypeFilter.
|
|
||||||
|
|
||||||
Enum to limit the what we search.
|
|
||||||
"""
|
|
||||||
|
|
||||||
NAME = "name"
|
|
||||||
"""The name of the item, as defined by its ``name`` field."""
|
|
||||||
|
|
||||||
DESCRIPTION = "description"
|
|
||||||
"""The description of the item, as defined by its ``description`` field."""
|
|
||||||
|
|
||||||
FILE_CONTENT = "file_content"
|
|
||||||
"""The actual content of the file."""
|
|
||||||
|
|
||||||
COMMENTS = "comments"
|
|
||||||
"""The content of any of the comments on a file or folder."""
|
|
||||||
|
|
||||||
TAGS = "tags"
|
|
||||||
"""Any tags that are applied to an item, as defined by its ``tags`` field."""
|
|
||||||
|
|
||||||
|
|
||||||
class BoxSearchOptions(BaseModel):
|
|
||||||
ancestor_folder_ids: Optional[List[str]] = None
|
|
||||||
"""Limits the search results to items within the given list of folders,
|
|
||||||
defined as a comma separated lists of folder IDs."""
|
|
||||||
|
|
||||||
search_type_filter: Optional[List[SearchTypeFilter]] = None
|
|
||||||
"""Limits the search results to any items that match the search query for a
|
|
||||||
specific part of the file, for example the file description.
|
|
||||||
|
|
||||||
Content types are defined as a comma separated lists of Box recognized
|
|
||||||
content types. The allowed content types are as follows. Default is all."""
|
|
||||||
|
|
||||||
created_date_range: Optional[List[str]] = None
|
|
||||||
"""Limits the search results to any items created within a given date range.
|
|
||||||
|
|
||||||
Date ranges are defined as comma separated RFC3339 timestamps.
|
|
||||||
|
|
||||||
If the the start date is omitted (,2014-05-17T13:35:01-07:00) anything
|
|
||||||
created before the end date will be returned.
|
|
||||||
|
|
||||||
If the end date is omitted (2014-05-15T13:35:01-07:00,) the current
|
|
||||||
date will be used as the end date instead."""
|
|
||||||
|
|
||||||
file_extensions: Optional[List[DocumentFiles]] = None
|
|
||||||
"""Limits the search results to any files that match any of the provided
|
|
||||||
file extensions. This list is a comma-separated list of
|
|
||||||
``langchain_box.utilities.DocumentFiles`` entries"""
|
|
||||||
|
|
||||||
k: Optional[int] = 100
|
|
||||||
"""Defines the maximum number of items to return. Defaults to 100, maximum
|
|
||||||
is 200."""
|
|
||||||
|
|
||||||
size_range: Optional[List[int]] = None
|
|
||||||
"""Limits the search results to any items with a size within a given file
|
|
||||||
size range. This applied to files and folders.
|
|
||||||
|
|
||||||
Size ranges are defined as comma separated list of a lower and upper
|
|
||||||
byte size limit (inclusive).
|
|
||||||
|
|
||||||
The upper and lower bound can be omitted to create open ranges."""
|
|
||||||
|
|
||||||
updated_date_range: Optional[List[str]] = None
|
|
||||||
"""Limits the search results to any items updated within a given date range.
|
|
||||||
|
|
||||||
Date ranges are defined as comma separated RFC3339 timestamps.
|
|
||||||
|
|
||||||
If the start date is omitted (,2014-05-17T13:35:01-07:00) anything
|
|
||||||
updated before the end date will be returned.
|
|
||||||
|
|
||||||
If the end date is omitted (2014-05-15T13:35:01-07:00,) the current
|
|
||||||
date will be used as the end date instead."""
|
|
||||||
|
|
||||||
class Config:
|
|
||||||
arbitrary_types_allowed = True
|
|
||||||
use_enum_values = True
|
|
||||||
extra = "allow"
|
|
||||||
|
|
||||||
@model_validator(mode="after")
|
|
||||||
def validate_search_options(self) -> Self:
|
|
||||||
"""Validate k is between 1 and 200"""
|
|
||||||
if self.k > 200 or self.k < 1: # type: ignore[operator]
|
|
||||||
raise ValueError(
|
|
||||||
f"Invalid setting of k {self.k}. " "Value must be between 1 and 200."
|
|
||||||
)
|
|
||||||
|
|
||||||
"""Validate created_date_range start date is before end date"""
|
|
||||||
if self.created_date_range:
|
|
||||||
if (
|
|
||||||
self.created_date_range[0] is None # type: ignore[index]
|
|
||||||
or self.created_date_range[0] == "" # type: ignore[index]
|
|
||||||
or self.created_date_range[1] is None # type: ignore[index]
|
|
||||||
or self.created_date_range[1] == "" # type: ignore[index]
|
|
||||||
):
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
if (
|
|
||||||
self.created_date_range[0] # type: ignore[index]
|
|
||||||
> self.created_date_range[1] # type: ignore[index]
|
|
||||||
):
|
|
||||||
raise ValueError("Start date must be before end date.")
|
|
||||||
|
|
||||||
"""Validate updated_date_range start date is before end date"""
|
|
||||||
if self.updated_date_range:
|
|
||||||
if (
|
|
||||||
self.updated_date_range[0] is None # type: ignore[index]
|
|
||||||
or self.updated_date_range[0] == "" # type: ignore[index]
|
|
||||||
or self.updated_date_range[1] is None # type: ignore[index]
|
|
||||||
or self.updated_date_range[1] == "" # type: ignore[index]
|
|
||||||
):
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
if (
|
|
||||||
self.updated_date_range[0] # type: ignore[index]
|
|
||||||
> self.updated_date_range[1] # type: ignore[index]
|
|
||||||
):
|
|
||||||
raise ValueError("Start date must be before end date.")
|
|
||||||
|
|
||||||
return self
|
|
||||||
|
|
||||||
|
|
||||||
class _BoxAPIWrapper(BaseModel):
|
|
||||||
"""Wrapper for Box API."""
|
|
||||||
|
|
||||||
box_developer_token: Optional[str] = Field(
|
|
||||||
default_factory=from_env("BOX_DEVELOPER_TOKEN", default=None)
|
|
||||||
)
|
|
||||||
"""String containing the Box Developer Token generated in the developer console"""
|
|
||||||
|
|
||||||
box_auth: Optional[BoxAuth] = None
|
|
||||||
"""Configured langchain_box.utilities.BoxAuth object"""
|
|
||||||
|
|
||||||
character_limit: Optional[int] = -1
|
|
||||||
"""character_limit is an int that caps the number of characters to
|
|
||||||
return per document."""
|
|
||||||
|
|
||||||
box_search_options: Optional[BoxSearchOptions] = None
|
|
||||||
"""Search options to configure BoxRetriever to narrow search results."""
|
|
||||||
|
|
||||||
_box: Optional[box_sdk_gen.BoxClient]
|
|
||||||
|
|
||||||
model_config = ConfigDict(
|
|
||||||
arbitrary_types_allowed=True,
|
|
||||||
use_enum_values=True,
|
|
||||||
extra="allow",
|
|
||||||
)
|
|
||||||
|
|
||||||
@model_validator(mode="after")
|
|
||||||
def validate_box_api_inputs(self) -> Self:
|
|
||||||
self._box = None
|
|
||||||
|
|
||||||
"""Validate that TOKEN auth type provides box_developer_token."""
|
|
||||||
if not self.box_auth:
|
|
||||||
if not self.box_developer_token:
|
|
||||||
raise ValueError(
|
|
||||||
"You must configure either box_developer_token of box_auth"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
box_auth = self.box_auth
|
|
||||||
self._box = box_auth.get_client() # type: ignore[union-attr]
|
|
||||||
|
|
||||||
return self
|
|
||||||
|
|
||||||
def get_box_client(self) -> box_sdk_gen.BoxClient:
|
|
||||||
box_auth = BoxAuth(
|
|
||||||
auth_type=BoxAuthType.TOKEN, box_developer_token=self.box_developer_token
|
|
||||||
)
|
|
||||||
|
|
||||||
self._box = box_auth.get_client()
|
|
||||||
|
|
||||||
def _do_request(self, url: str) -> Any:
|
|
||||||
try:
|
|
||||||
access_token = self._box.auth.retrieve_token().access_token # type: ignore[union-attr]
|
|
||||||
except box_sdk_gen.BoxSDKError as bse:
|
|
||||||
raise RuntimeError(f"Error getting client from jwt token: {bse.message}")
|
|
||||||
|
|
||||||
resp = requests.get(url, headers={"Authorization": f"Bearer {access_token}"})
|
|
||||||
resp.raise_for_status()
|
|
||||||
return resp.content
|
|
||||||
|
|
||||||
def _get_text_representation(self, file_id: str = "") -> tuple[str, str, str]:
|
|
||||||
try:
|
|
||||||
from box_sdk_gen import BoxAPIError, BoxSDKError
|
|
||||||
except ImportError:
|
|
||||||
raise ImportError("You must run `pip install box-sdk-gen`")
|
|
||||||
|
|
||||||
if self._box is None:
|
|
||||||
self.get_box_client()
|
|
||||||
|
|
||||||
try:
|
|
||||||
file = self._box.files.get_file_by_id( # type: ignore[union-attr]
|
|
||||||
file_id,
|
|
||||||
x_rep_hints="[extracted_text]",
|
|
||||||
fields=["name", "representations", "type"],
|
|
||||||
)
|
|
||||||
except BoxAPIError as bae:
|
|
||||||
raise RuntimeError(f"BoxAPIError: Error getting text rep: {bae.message}")
|
|
||||||
except BoxSDKError as bse:
|
|
||||||
raise RuntimeError(f"BoxSDKError: Error getting text rep: {bse.message}")
|
|
||||||
except Exception:
|
|
||||||
return None, None, None # type: ignore[return-value]
|
|
||||||
|
|
||||||
file_repr = file.representations.entries
|
|
||||||
|
|
||||||
if len(file_repr) <= 0:
|
|
||||||
return None, None, None # type: ignore[return-value]
|
|
||||||
|
|
||||||
for entry in file_repr:
|
|
||||||
if entry.representation == "extracted_text":
|
|
||||||
# If the file representation doesn't exist, calling
|
|
||||||
# info.url will generate text if possible
|
|
||||||
if entry.status.state == "none":
|
|
||||||
self._do_request(entry.info.url)
|
|
||||||
|
|
||||||
url = entry.content.url_template.replace("{+asset_path}", "")
|
|
||||||
file_name = file.name.replace(".", "_").replace(" ", "_")
|
|
||||||
|
|
||||||
try:
|
|
||||||
raw_content = self._do_request(url)
|
|
||||||
except requests.exceptions.HTTPError:
|
|
||||||
return None, None, None # type: ignore[return-value]
|
|
||||||
|
|
||||||
if (
|
|
||||||
self.character_limit is not None and self.character_limit > 0 # type: ignore[operator]
|
|
||||||
):
|
|
||||||
content = raw_content[0 : (self.character_limit - 1)]
|
|
||||||
else:
|
|
||||||
content = raw_content
|
|
||||||
|
|
||||||
return file_name, content, url
|
|
||||||
|
|
||||||
return None, None, None # type: ignore[return-value]
|
|
||||||
|
|
||||||
def get_document_by_file_id(self, file_id: str) -> Optional[Document]:
|
|
||||||
"""Load a file from a Box id. Accepts file_id as str.
|
|
||||||
Returns `Document`"""
|
|
||||||
|
|
||||||
if self._box is None:
|
|
||||||
self.get_box_client()
|
|
||||||
|
|
||||||
file = self._box.files.get_file_by_id( # type: ignore[union-attr]
|
|
||||||
file_id, fields=["name", "type", "extension"]
|
|
||||||
)
|
|
||||||
|
|
||||||
if file.type == "file":
|
|
||||||
if hasattr(DocumentFiles, file.extension.upper()):
|
|
||||||
file_name, content, url = self._get_text_representation(file_id=file_id)
|
|
||||||
|
|
||||||
if file_name is None or content is None or url is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
metadata = {
|
|
||||||
"source": f"{url}",
|
|
||||||
"title": f"{file_name}",
|
|
||||||
}
|
|
||||||
|
|
||||||
return Document(page_content=content, metadata=metadata)
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
def get_folder_items(self, folder_id: str) -> box_sdk_gen.Items:
|
|
||||||
"""Get all the items in a folder. Accepts folder_id as str.
|
|
||||||
returns box_sdk_gen.Items"""
|
|
||||||
if self._box is None:
|
|
||||||
self.get_box_client()
|
|
||||||
|
|
||||||
try:
|
|
||||||
folder_contents = self._box.folders.get_folder_items( # type: ignore[union-attr]
|
|
||||||
folder_id, fields=["id", "type", "name"]
|
|
||||||
)
|
|
||||||
except box_sdk_gen.BoxAPIError as bae:
|
|
||||||
raise RuntimeError(
|
|
||||||
f"BoxAPIError: Error getting folder content: {bae.message}"
|
|
||||||
)
|
|
||||||
except box_sdk_gen.BoxSDKError as bse:
|
|
||||||
raise RuntimeError(
|
|
||||||
f"BoxSDKError: Error getting folder content: {bse.message}"
|
|
||||||
)
|
|
||||||
|
|
||||||
return folder_contents.entries
|
|
||||||
|
|
||||||
def search_box(self, query: str) -> List[Document]:
|
|
||||||
if self._box is None:
|
|
||||||
self.get_box_client()
|
|
||||||
|
|
||||||
files = []
|
|
||||||
|
|
||||||
try:
|
|
||||||
results = None
|
|
||||||
|
|
||||||
if self.box_search_options is None:
|
|
||||||
results = self._box.search.search_for_content( # type: ignore[union-attr]
|
|
||||||
query=query, fields=["id", "type", "extension"], type="file"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
results = self._box.search.search_for_content( # type: ignore[union-attr]
|
|
||||||
query=query,
|
|
||||||
fields=["id", "type", "extension"],
|
|
||||||
type="file",
|
|
||||||
ancestor_folder_ids=self.box_search_options.ancestor_folder_ids, # type: ignore[union-attr]
|
|
||||||
content_types=self.box_search_options.search_type_filter, # type: ignore[union-attr]
|
|
||||||
created_at_range=self.box_search_options.created_date_range, # type: ignore[union-attr]
|
|
||||||
file_extensions=self.box_search_options.file_extensions, # type: ignore[union-attr]
|
|
||||||
limit=self.box_search_options.k, # type: ignore[union-attr]
|
|
||||||
size_range=self.box_search_options.size_range, # type: ignore[union-attr]
|
|
||||||
updated_at_range=self.box_search_options.updated_date_range, # type: ignore[union-attr]
|
|
||||||
)
|
|
||||||
|
|
||||||
if results.entries is None or len(results.entries) <= 0:
|
|
||||||
return None # type: ignore[return-value]
|
|
||||||
|
|
||||||
for file in results.entries:
|
|
||||||
if (
|
|
||||||
file is not None
|
|
||||||
and file.type == "file"
|
|
||||||
and hasattr(DocumentFiles, file.extension.upper())
|
|
||||||
):
|
|
||||||
doc = self.get_document_by_file_id(file.id)
|
|
||||||
|
|
||||||
if doc is not None:
|
|
||||||
files.append(doc)
|
|
||||||
|
|
||||||
return files
|
|
||||||
except box_sdk_gen.BoxAPIError as bae:
|
|
||||||
raise RuntimeError(
|
|
||||||
f"BoxAPIError: Error getting search results: {bae.message}"
|
|
||||||
)
|
|
||||||
except box_sdk_gen.BoxSDKError as bse:
|
|
||||||
raise RuntimeError(
|
|
||||||
f"BoxSDKError: Error getting search results: {bse.message}"
|
|
||||||
)
|
|
||||||
|
|
||||||
def ask_box_ai(
|
|
||||||
self,
|
|
||||||
query: str,
|
|
||||||
box_file_ids: List[str],
|
|
||||||
answer: bool = True,
|
|
||||||
citations: bool = False,
|
|
||||||
) -> List[Document]:
|
|
||||||
if self._box is None:
|
|
||||||
self.get_box_client()
|
|
||||||
|
|
||||||
ai_mode = box_sdk_gen.CreateAiAskMode.SINGLE_ITEM_QA.value
|
|
||||||
|
|
||||||
if len(box_file_ids) > 1:
|
|
||||||
ai_mode = box_sdk_gen.CreateAiAskMode.MULTIPLE_ITEM_QA.value
|
|
||||||
elif len(box_file_ids) <= 0:
|
|
||||||
raise ValueError("BOX_AI_ASK requires at least one file ID")
|
|
||||||
|
|
||||||
items = []
|
|
||||||
|
|
||||||
for file_id in box_file_ids:
|
|
||||||
item = box_sdk_gen.AiItemBase(
|
|
||||||
id=file_id, type=box_sdk_gen.AiItemBaseTypeField.FILE.value
|
|
||||||
)
|
|
||||||
items.append(item)
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = self._box.ai.create_ai_ask( # type: ignore[union-attr]
|
|
||||||
mode=ai_mode, prompt=query, items=items, include_citations=citations
|
|
||||||
)
|
|
||||||
|
|
||||||
except box_sdk_gen.BoxAPIError as bae:
|
|
||||||
raise RuntimeError(
|
|
||||||
f"BoxAPIError: Error getting Box AI result: {bae.message}"
|
|
||||||
)
|
|
||||||
except box_sdk_gen.BoxSDKError as bse:
|
|
||||||
raise RuntimeError(
|
|
||||||
f"BoxSDKError: Error getting Box AI result: {bse.message}"
|
|
||||||
)
|
|
||||||
|
|
||||||
docs = []
|
|
||||||
|
|
||||||
if answer:
|
|
||||||
content = response.answer
|
|
||||||
metadata = {"source": "Box AI", "title": f"Box AI {query}"}
|
|
||||||
|
|
||||||
document = Document(page_content=content, metadata=metadata)
|
|
||||||
docs.append(document)
|
|
||||||
|
|
||||||
if citations:
|
|
||||||
box_citations = response.citations
|
|
||||||
|
|
||||||
for citation in box_citations:
|
|
||||||
content = citation.content
|
|
||||||
file_name = citation.name
|
|
||||||
file_id = citation.id
|
|
||||||
file_type = citation.type.value
|
|
||||||
|
|
||||||
metadata = {
|
|
||||||
"source": f"Box AI {query}",
|
|
||||||
"file_name": file_name,
|
|
||||||
"file_id": file_id,
|
|
||||||
"file_type": file_type,
|
|
||||||
}
|
|
||||||
|
|
||||||
document = Document(page_content=content, metadata=metadata)
|
|
||||||
docs.append(document)
|
|
||||||
|
|
||||||
return docs
|
|
1102
libs/partners/box/poetry.lock
generated
1102
libs/partners/box/poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -1,84 +0,0 @@
|
|||||||
[build-system]
|
|
||||||
requires = [ "poetry-core>=1.0.0",]
|
|
||||||
build-backend = "poetry.core.masonry.api"
|
|
||||||
|
|
||||||
[tool.poetry]
|
|
||||||
name = "langchain-box"
|
|
||||||
version = "0.2.3"
|
|
||||||
description = "An integration package connecting Box and LangChain"
|
|
||||||
authors = []
|
|
||||||
readme = "README.md"
|
|
||||||
repository = "https://github.com/langchain-ai/langchain"
|
|
||||||
license = "MIT"
|
|
||||||
|
|
||||||
[tool.mypy]
|
|
||||||
disallow_untyped_defs = "True"
|
|
||||||
|
|
||||||
[tool.poetry.urls]
|
|
||||||
"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/box"
|
|
||||||
"Release Notes" = "https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-box%3D%3D0%22&expanded=true"
|
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
|
||||||
python = ">=3.9.0,<3.13"
|
|
||||||
langchain-core = "^0.3.15"
|
|
||||||
pydantic = "^2"
|
|
||||||
|
|
||||||
[tool.ruff.lint]
|
|
||||||
select = [ "E", "F", "I", "T201",]
|
|
||||||
|
|
||||||
[tool.coverage.run]
|
|
||||||
omit = [ "tests/*",]
|
|
||||||
|
|
||||||
[tool.pytest.ini_options]
|
|
||||||
markers = [ "compile: mark placeholder test used to compile integration tests without running them",]
|
|
||||||
asyncio_mode = "auto"
|
|
||||||
|
|
||||||
[tool.poetry.dependencies.box-sdk-gen]
|
|
||||||
extras = [ "jwt",]
|
|
||||||
version = "^1.5.0"
|
|
||||||
|
|
||||||
[tool.poetry.group.test]
|
|
||||||
optional = true
|
|
||||||
|
|
||||||
[tool.poetry.group.codespell]
|
|
||||||
optional = true
|
|
||||||
|
|
||||||
[tool.poetry.group.test_integration]
|
|
||||||
optional = true
|
|
||||||
|
|
||||||
[tool.poetry.group.lint]
|
|
||||||
optional = true
|
|
||||||
|
|
||||||
[tool.poetry.group.dev]
|
|
||||||
optional = true
|
|
||||||
|
|
||||||
[tool.poetry.group.test.dependencies]
|
|
||||||
pytest = "^7.4.3"
|
|
||||||
pytest_mock = "^3.14.0"
|
|
||||||
pytest-asyncio = "^0.23.2"
|
|
||||||
pytest-socket = "^0.7.0"
|
|
||||||
|
|
||||||
[tool.poetry.group.codespell.dependencies]
|
|
||||||
codespell = "^2.2.6"
|
|
||||||
|
|
||||||
[tool.poetry.group.test_integration.dependencies]
|
|
||||||
python-dotenv = "^1.0.1"
|
|
||||||
|
|
||||||
[tool.poetry.group.lint.dependencies]
|
|
||||||
ruff = "^0.1.8"
|
|
||||||
|
|
||||||
[tool.poetry.group.typing.dependencies]
|
|
||||||
mypy = "^1.7.1"
|
|
||||||
types-requests = "^2.32.0.20240712"
|
|
||||||
|
|
||||||
[tool.poetry.group.test.dependencies.langchain-core]
|
|
||||||
path = "../../core"
|
|
||||||
develop = true
|
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies.langchain-core]
|
|
||||||
path = "../../core"
|
|
||||||
develop = true
|
|
||||||
|
|
||||||
[tool.poetry.group.typing.dependencies.langchain-core]
|
|
||||||
path = "../../core"
|
|
||||||
develop = true
|
|
@ -1,17 +0,0 @@
|
|||||||
import sys
|
|
||||||
import traceback
|
|
||||||
from importlib.machinery import SourceFileLoader
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
files = sys.argv[1:]
|
|
||||||
has_failure = False
|
|
||||||
for file in files:
|
|
||||||
try:
|
|
||||||
SourceFileLoader("x", file).load_module()
|
|
||||||
except Exception:
|
|
||||||
has_faillure = True
|
|
||||||
print(file) # noqa: T201
|
|
||||||
traceback.print_exc()
|
|
||||||
print() # noqa: T201
|
|
||||||
|
|
||||||
sys.exit(1 if has_failure else 0)
|
|
@ -1,18 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
set -eu
|
|
||||||
|
|
||||||
# Initialize a variable to keep track of errors
|
|
||||||
errors=0
|
|
||||||
|
|
||||||
# make sure not importing from langchain, langchain_experimental, or langchain_community
|
|
||||||
git --no-pager grep '^from langchain\.' . && errors=$((errors+1))
|
|
||||||
git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1))
|
|
||||||
git --no-pager grep '^from langchain_community\.' . && errors=$((errors+1))
|
|
||||||
|
|
||||||
# Decide on an exit status based on the errors
|
|
||||||
if [ "$errors" -gt 0 ]; then
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
exit 0
|
|
||||||
fi
|
|
@ -1,3 +0,0 @@
|
|||||||
"""
|
|
||||||
TODO: build live integration tests
|
|
||||||
"""
|
|
@ -1,3 +0,0 @@
|
|||||||
"""
|
|
||||||
TODO: build live integration tests
|
|
||||||
"""
|
|
@ -1,7 +0,0 @@
|
|||||||
import pytest # type: ignore[import-not-found]
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.compile
|
|
||||||
def test_placeholder() -> None:
|
|
||||||
"""Used for compiling integration tests without running any real tests."""
|
|
||||||
pass
|
|
@ -1,3 +0,0 @@
|
|||||||
"""
|
|
||||||
TODO: build live integration tests
|
|
||||||
"""
|
|
@ -1,99 +0,0 @@
|
|||||||
import pytest
|
|
||||||
from langchain_core.documents import Document
|
|
||||||
from pytest_mock import MockerFixture
|
|
||||||
|
|
||||||
from langchain_box.document_loaders import BoxLoader
|
|
||||||
from langchain_box.utilities import BoxAuth, BoxAuthType
|
|
||||||
|
|
||||||
|
|
||||||
# Test auth types
|
|
||||||
def test_direct_token_initialization() -> None:
|
|
||||||
loader = BoxLoader( # type: ignore[call-arg]
|
|
||||||
box_developer_token="box_developer_token",
|
|
||||||
box_file_ids=["box_file_ids"],
|
|
||||||
)
|
|
||||||
|
|
||||||
assert loader.box_developer_token == "box_developer_token"
|
|
||||||
assert loader.box_file_ids == ["box_file_ids"]
|
|
||||||
|
|
||||||
|
|
||||||
def test_failed_direct_token_initialization() -> None:
|
|
||||||
with pytest.raises(ValueError):
|
|
||||||
loader = BoxLoader(box_file_ids=["box_file_ids"]) # type: ignore[call-arg] # noqa: F841
|
|
||||||
|
|
||||||
|
|
||||||
def test_auth_initialization() -> None:
|
|
||||||
auth = BoxAuth(
|
|
||||||
auth_type=BoxAuthType.TOKEN, box_developer_token="box_developer_token"
|
|
||||||
)
|
|
||||||
|
|
||||||
loader = BoxLoader( # type: ignore[call-arg]
|
|
||||||
box_auth=auth,
|
|
||||||
box_file_ids=["box_file_ids"],
|
|
||||||
)
|
|
||||||
|
|
||||||
assert loader.box_file_ids == ["box_file_ids"]
|
|
||||||
|
|
||||||
|
|
||||||
# test loaders
|
|
||||||
def test_failed_file_initialization() -> None:
|
|
||||||
with pytest.raises(ValueError):
|
|
||||||
loader = BoxLoader(box_developer_token="box_developer_token") # type: ignore[call-arg] # noqa: F841
|
|
||||||
|
|
||||||
|
|
||||||
def test_folder_initialization() -> None:
|
|
||||||
loader = BoxLoader( # type: ignore[call-arg]
|
|
||||||
box_developer_token="box_developer_token",
|
|
||||||
box_folder_id="box_folder_id",
|
|
||||||
)
|
|
||||||
|
|
||||||
assert loader.box_developer_token == "box_developer_token"
|
|
||||||
assert loader.box_folder_id == "box_folder_id"
|
|
||||||
|
|
||||||
|
|
||||||
def test_failed_initialization_files_and_folders() -> None:
|
|
||||||
with pytest.raises(ValueError):
|
|
||||||
loader = BoxLoader( # type: ignore[call-arg] # noqa: F841
|
|
||||||
box_developer_token="box_developer_token",
|
|
||||||
box_folder_id="box_folder_id",
|
|
||||||
box_file_ids=["box_file_ids"],
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# test Document retrieval
|
|
||||||
def test_file_load(mocker: MockerFixture) -> None:
|
|
||||||
mocker.patch(
|
|
||||||
"langchain_box.utilities._BoxAPIWrapper.get_document_by_file_id",
|
|
||||||
return_value=[],
|
|
||||||
)
|
|
||||||
|
|
||||||
loader = BoxLoader( # type: ignore[call-arg]
|
|
||||||
box_developer_token="box_developer_token",
|
|
||||||
box_file_ids=["box_file_ids"],
|
|
||||||
)
|
|
||||||
|
|
||||||
documents = loader.load()
|
|
||||||
assert documents
|
|
||||||
|
|
||||||
mocker.patch(
|
|
||||||
"langchain_box.utilities._BoxAPIWrapper.get_document_by_file_id",
|
|
||||||
return_value=(
|
|
||||||
Document(
|
|
||||||
page_content="Test file mode\ndocument contents",
|
|
||||||
metadata={"title": "Testing Files"},
|
|
||||||
)
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
loader = BoxLoader( # type: ignore[call-arg]
|
|
||||||
box_developer_token="box_developer_token",
|
|
||||||
box_file_ids=["box_file_ids"],
|
|
||||||
)
|
|
||||||
|
|
||||||
documents = loader.load()
|
|
||||||
assert documents == [
|
|
||||||
Document(
|
|
||||||
page_content="Test file mode\ndocument contents",
|
|
||||||
metadata={"title": "Testing Files"},
|
|
||||||
)
|
|
||||||
]
|
|
@ -1,203 +0,0 @@
|
|||||||
import pytest
|
|
||||||
from langchain_core.documents import Document
|
|
||||||
from pytest_mock import MockerFixture
|
|
||||||
|
|
||||||
from langchain_box.retrievers import BoxRetriever
|
|
||||||
from langchain_box.utilities import (
|
|
||||||
BoxAuth,
|
|
||||||
BoxAuthType,
|
|
||||||
BoxSearchOptions,
|
|
||||||
DocumentFiles,
|
|
||||||
SearchTypeFilter,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# Test auth types
|
|
||||||
def test_direct_token_initialization() -> None:
|
|
||||||
retriever = BoxRetriever( # type: ignore[call-arg]
|
|
||||||
box_developer_token="box_developer_token",
|
|
||||||
box_file_ids=["box_file_ids"],
|
|
||||||
)
|
|
||||||
|
|
||||||
assert retriever.box_developer_token == "box_developer_token"
|
|
||||||
assert retriever.box_file_ids == ["box_file_ids"]
|
|
||||||
|
|
||||||
|
|
||||||
def test_failed_direct_token_initialization() -> None:
|
|
||||||
with pytest.raises(ValueError):
|
|
||||||
retriever = BoxRetriever(box_file_ids=["box_file_ids"]) # type: ignore[call-arg] # noqa: F841
|
|
||||||
|
|
||||||
|
|
||||||
def test_auth_initialization() -> None:
|
|
||||||
auth = BoxAuth(
|
|
||||||
auth_type=BoxAuthType.TOKEN, box_developer_token="box_developer_token"
|
|
||||||
)
|
|
||||||
|
|
||||||
retriever = BoxRetriever( # type: ignore[call-arg]
|
|
||||||
box_auth=auth,
|
|
||||||
box_file_ids=["box_file_ids"],
|
|
||||||
)
|
|
||||||
|
|
||||||
assert retriever.box_file_ids == ["box_file_ids"]
|
|
||||||
|
|
||||||
|
|
||||||
# test search retrieval
|
|
||||||
def test_search(mocker: MockerFixture) -> None:
|
|
||||||
mocker.patch(
|
|
||||||
"langchain_box.utilities._BoxAPIWrapper.search_box",
|
|
||||||
return_value=(
|
|
||||||
[
|
|
||||||
Document(
|
|
||||||
page_content="Test file mode\ndocument contents",
|
|
||||||
metadata={"title": "Testing Files"},
|
|
||||||
)
|
|
||||||
]
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
retriever = BoxRetriever( # type: ignore[call-arg]
|
|
||||||
box_developer_token="box_developer_token"
|
|
||||||
)
|
|
||||||
|
|
||||||
documents = retriever.invoke("query")
|
|
||||||
assert documents == [
|
|
||||||
Document(
|
|
||||||
page_content="Test file mode\ndocument contents",
|
|
||||||
metadata={"title": "Testing Files"},
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
# test search options
|
|
||||||
def test_search_options(mocker: MockerFixture) -> None:
|
|
||||||
mocker.patch(
|
|
||||||
"langchain_box.utilities._BoxAPIWrapper.search_box",
|
|
||||||
return_value=(
|
|
||||||
[
|
|
||||||
Document(
|
|
||||||
page_content="Test file mode\ndocument contents",
|
|
||||||
metadata={"title": "Testing Files"},
|
|
||||||
)
|
|
||||||
]
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
box_search_options = BoxSearchOptions(
|
|
||||||
ancestor_folder_ids=["box_folder_id"],
|
|
||||||
search_type_filter=[SearchTypeFilter.FILE_CONTENT],
|
|
||||||
created_date_range=["2023-01-01T00:00:00-07:00", "2024-08-01T00:00:00-07:00,"],
|
|
||||||
file_extensions=[DocumentFiles.DOCX, DocumentFiles.PDF],
|
|
||||||
k=200,
|
|
||||||
size_range=[1, 1000000],
|
|
||||||
updated_date_range=None,
|
|
||||||
)
|
|
||||||
|
|
||||||
retriever = BoxRetriever( # type: ignore[call-arg]
|
|
||||||
box_developer_token="box_developer_token", box_search_options=box_search_options
|
|
||||||
)
|
|
||||||
|
|
||||||
documents = retriever.invoke("query")
|
|
||||||
|
|
||||||
assert documents == [
|
|
||||||
Document(
|
|
||||||
page_content="Test file mode\ndocument contents",
|
|
||||||
metadata={"title": "Testing Files"},
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
# test ai retrieval
|
|
||||||
def test_ai(mocker: MockerFixture) -> None:
|
|
||||||
mocker.patch(
|
|
||||||
"langchain_box.utilities._BoxAPIWrapper.ask_box_ai",
|
|
||||||
return_value=(
|
|
||||||
[
|
|
||||||
Document(
|
|
||||||
page_content="Test file mode\ndocument contents",
|
|
||||||
metadata={"title": "Testing Files"},
|
|
||||||
)
|
|
||||||
]
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
retriever = BoxRetriever( # type: ignore[call-arg]
|
|
||||||
box_developer_token="box_developer_token", box_file_ids=["box_file_ids"]
|
|
||||||
)
|
|
||||||
|
|
||||||
documents = retriever.invoke("query")
|
|
||||||
assert documents == [
|
|
||||||
Document(
|
|
||||||
page_content="Test file mode\ndocument contents",
|
|
||||||
metadata={"title": "Testing Files"},
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
# test ai retrieval with answer and citations
|
|
||||||
def test_ai_answer_citations(mocker: MockerFixture) -> None:
|
|
||||||
mocker.patch(
|
|
||||||
"langchain_box.utilities._BoxAPIWrapper.ask_box_ai",
|
|
||||||
return_value=(
|
|
||||||
[
|
|
||||||
Document(
|
|
||||||
page_content="Test file mode\ndocument contents",
|
|
||||||
metadata={"title": "Testing Files"},
|
|
||||||
),
|
|
||||||
Document(page_content="citation 1", metadata={"source": "source 1"}),
|
|
||||||
Document(page_content="citation 2", metadata={"source": "source 2"}),
|
|
||||||
Document(page_content="citation 3", metadata={"source": "source 3"}),
|
|
||||||
Document(page_content="citation 4", metadata={"source": "source 4"}),
|
|
||||||
Document(page_content="citation 5", metadata={"source": "source 5"}),
|
|
||||||
]
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
retriever = BoxRetriever( # type: ignore[call-arg]
|
|
||||||
box_developer_token="box_developer_token",
|
|
||||||
box_file_ids=["box_file_ids"],
|
|
||||||
citations=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
documents = retriever.invoke("query")
|
|
||||||
assert documents == [
|
|
||||||
Document(
|
|
||||||
page_content="Test file mode\ndocument contents",
|
|
||||||
metadata={"title": "Testing Files"},
|
|
||||||
),
|
|
||||||
Document(page_content="citation 1", metadata={"source": "source 1"}),
|
|
||||||
Document(page_content="citation 2", metadata={"source": "source 2"}),
|
|
||||||
Document(page_content="citation 3", metadata={"source": "source 3"}),
|
|
||||||
Document(page_content="citation 4", metadata={"source": "source 4"}),
|
|
||||||
Document(page_content="citation 5", metadata={"source": "source 5"}),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
# test ai retrieval with citations only
|
|
||||||
def test_ai_citations_only(mocker: MockerFixture) -> None:
|
|
||||||
mocker.patch(
|
|
||||||
"langchain_box.utilities._BoxAPIWrapper.ask_box_ai",
|
|
||||||
return_value=(
|
|
||||||
[
|
|
||||||
Document(page_content="citation 1", metadata={"source": "source 1"}),
|
|
||||||
Document(page_content="citation 2", metadata={"source": "source 2"}),
|
|
||||||
Document(page_content="citation 3", metadata={"source": "source 3"}),
|
|
||||||
Document(page_content="citation 4", metadata={"source": "source 4"}),
|
|
||||||
Document(page_content="citation 5", metadata={"source": "source 5"}),
|
|
||||||
]
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
retriever = BoxRetriever( # type: ignore[call-arg]
|
|
||||||
box_developer_token="box_developer_token",
|
|
||||||
box_file_ids=["box_file_ids"],
|
|
||||||
citations=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
documents = retriever.invoke("query")
|
|
||||||
assert documents == [
|
|
||||||
Document(page_content="citation 1", metadata={"source": "source 1"}),
|
|
||||||
Document(page_content="citation 2", metadata={"source": "source 2"}),
|
|
||||||
Document(page_content="citation 3", metadata={"source": "source 3"}),
|
|
||||||
Document(page_content="citation 4", metadata={"source": "source 4"}),
|
|
||||||
Document(page_content="citation 5", metadata={"source": "source 5"}),
|
|
||||||
]
|
|
@ -1,17 +0,0 @@
|
|||||||
from langchain_box import __all__
|
|
||||||
|
|
||||||
EXPECTED_ALL = [
|
|
||||||
"BoxLoader",
|
|
||||||
"BoxRetriever",
|
|
||||||
"BoxAuth",
|
|
||||||
"BoxAuthType",
|
|
||||||
"BoxSearchOptions",
|
|
||||||
"DocumentFiles",
|
|
||||||
"SearchTypeFilter",
|
|
||||||
"_BoxAPIWrapper",
|
|
||||||
"__version__",
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def test_all_imports() -> None:
|
|
||||||
assert sorted(EXPECTED_ALL) == sorted(__all__)
|
|
@ -1,230 +0,0 @@
|
|||||||
from unittest.mock import Mock
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
from langchain_core.documents import Document
|
|
||||||
from pydantic.error_wrappers import ValidationError
|
|
||||||
from pytest_mock import MockerFixture
|
|
||||||
|
|
||||||
from langchain_box.utilities import BoxAuth, BoxAuthType, _BoxAPIWrapper
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture()
|
|
||||||
def mock_worker(mocker: MockerFixture) -> None:
|
|
||||||
mocker.patch("langchain_box.utilities.BoxAuth._authorize", return_value=Mock())
|
|
||||||
mocker.patch("langchain_box.utilities.BoxAuth.get_client", return_value=Mock())
|
|
||||||
mocker.patch(
|
|
||||||
"langchain_box.utilities._BoxAPIWrapper._get_text_representation",
|
|
||||||
return_value=("filename", "content", "url"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# Test auth types
|
|
||||||
def test_token_initialization() -> None:
|
|
||||||
auth = BoxAuth(
|
|
||||||
auth_type=BoxAuthType.TOKEN, box_developer_token="box_developer_token"
|
|
||||||
)
|
|
||||||
|
|
||||||
assert auth.auth_type == "token"
|
|
||||||
assert auth.box_developer_token == "box_developer_token"
|
|
||||||
|
|
||||||
|
|
||||||
def test_failed_token_initialization() -> None:
|
|
||||||
with pytest.raises(ValidationError):
|
|
||||||
auth = BoxAuth(auth_type=BoxAuthType.TOKEN) # noqa: F841
|
|
||||||
|
|
||||||
|
|
||||||
def test_jwt_eid_initialization() -> None:
|
|
||||||
auth = BoxAuth(auth_type=BoxAuthType.JWT, box_jwt_path="box_jwt_path")
|
|
||||||
|
|
||||||
assert auth.auth_type == "jwt"
|
|
||||||
assert auth.box_jwt_path == "box_jwt_path"
|
|
||||||
|
|
||||||
|
|
||||||
def test_jwt_user_initialization() -> None:
|
|
||||||
auth = BoxAuth(
|
|
||||||
auth_type=BoxAuthType.JWT,
|
|
||||||
box_jwt_path="box_jwt_path",
|
|
||||||
box_user_id="box_user_id",
|
|
||||||
)
|
|
||||||
|
|
||||||
assert auth.auth_type == "jwt"
|
|
||||||
assert auth.box_jwt_path == "box_jwt_path"
|
|
||||||
assert auth.box_user_id == "box_user_id"
|
|
||||||
|
|
||||||
|
|
||||||
def test_failed_jwt_initialization() -> None:
|
|
||||||
with pytest.raises(ValidationError):
|
|
||||||
auth = BoxAuth(auth_type=BoxAuthType.JWT, box_user_id="box_user_id") # noqa: F841
|
|
||||||
|
|
||||||
|
|
||||||
def test_ccg_eid_initialization() -> None:
|
|
||||||
auth = BoxAuth(
|
|
||||||
auth_type=BoxAuthType.CCG,
|
|
||||||
box_client_id="box_client_id",
|
|
||||||
box_client_secret="box_client_secret",
|
|
||||||
box_enterprise_id="box_enterprise_id",
|
|
||||||
)
|
|
||||||
|
|
||||||
assert auth.auth_type == "ccg"
|
|
||||||
assert auth.box_client_id == "box_client_id"
|
|
||||||
assert auth.box_client_secret == "box_client_secret"
|
|
||||||
assert auth.box_enterprise_id == "box_enterprise_id"
|
|
||||||
|
|
||||||
|
|
||||||
def test_ccg_user_initialization() -> None:
|
|
||||||
auth = BoxAuth(
|
|
||||||
auth_type=BoxAuthType.CCG,
|
|
||||||
box_client_id="box_client_id",
|
|
||||||
box_client_secret="box_client_secret",
|
|
||||||
box_enterprise_id="box_enterprise_id",
|
|
||||||
box_user_id="box_user_id",
|
|
||||||
)
|
|
||||||
|
|
||||||
assert auth.auth_type == "ccg"
|
|
||||||
assert auth.box_client_id == "box_client_id"
|
|
||||||
assert auth.box_client_secret == "box_client_secret"
|
|
||||||
assert auth.box_enterprise_id == "box_enterprise_id"
|
|
||||||
assert auth.box_user_id == "box_user_id"
|
|
||||||
|
|
||||||
|
|
||||||
def test_failed_ccg_initialization() -> None:
|
|
||||||
with pytest.raises(ValidationError):
|
|
||||||
auth = BoxAuth(auth_type=BoxAuthType.CCG) # noqa: F841
|
|
||||||
|
|
||||||
|
|
||||||
def test_direct_token_initialization() -> None:
|
|
||||||
box = _BoxAPIWrapper( # type: ignore[call-arg]
|
|
||||||
box_developer_token="box_developer_token"
|
|
||||||
)
|
|
||||||
|
|
||||||
assert box.box_developer_token == "box_developer_token"
|
|
||||||
|
|
||||||
|
|
||||||
def test_auth_initialization() -> None:
|
|
||||||
auth = BoxAuth(
|
|
||||||
auth_type=BoxAuthType.TOKEN, box_developer_token="box_developer_token"
|
|
||||||
)
|
|
||||||
|
|
||||||
box = _BoxAPIWrapper(box_auth=auth) # type: ignore[call-arg] # noqa: F841
|
|
||||||
|
|
||||||
assert auth.box_developer_token == "box_developer_token"
|
|
||||||
|
|
||||||
|
|
||||||
def test_failed_initialization_no_auth() -> None:
|
|
||||||
with pytest.raises(ValidationError):
|
|
||||||
box = _BoxAPIWrapper() # type: ignore[call-arg] # noqa: F841
|
|
||||||
|
|
||||||
|
|
||||||
def test_get_documents_by_file_ids(mock_worker, mocker: MockerFixture) -> None: # type: ignore[no-untyped-def]
|
|
||||||
mocker.patch(
|
|
||||||
"langchain_box.utilities._BoxAPIWrapper.get_document_by_file_id",
|
|
||||||
return_value=(
|
|
||||||
Document(
|
|
||||||
page_content="content", metadata={"source": "url", "title": "filename"}
|
|
||||||
)
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
box = _BoxAPIWrapper(box_developer_token="box_developer_token") # type: ignore[call-arg]
|
|
||||||
|
|
||||||
documents = box.get_document_by_file_id("box_file_id")
|
|
||||||
assert documents == Document(
|
|
||||||
page_content="content", metadata={"source": "url", "title": "filename"}
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_get_documents_by_folder_id(mock_worker, mocker: MockerFixture) -> None: # type: ignore[no-untyped-def]
|
|
||||||
mocker.patch(
|
|
||||||
"langchain_box.utilities._BoxAPIWrapper.get_folder_items",
|
|
||||||
return_value=([{"id": "file_id", "type": "file"}]),
|
|
||||||
)
|
|
||||||
|
|
||||||
box = _BoxAPIWrapper(box_developer_token="box_developer_token") # type: ignore[call-arg]
|
|
||||||
|
|
||||||
folder_contents = box.get_folder_items("box_folder_id")
|
|
||||||
assert folder_contents == [{"id": "file_id", "type": "file"}]
|
|
||||||
|
|
||||||
|
|
||||||
def test_box_search(mock_worker, mocker: MockerFixture) -> None: # type: ignore[no-untyped-def]
|
|
||||||
mocker.patch(
|
|
||||||
"langchain_box.utilities._BoxAPIWrapper.search_box",
|
|
||||||
return_value=(
|
|
||||||
[
|
|
||||||
Document(
|
|
||||||
page_content="Test file mode\ndocument contents",
|
|
||||||
metadata={"title": "Testing Files"},
|
|
||||||
)
|
|
||||||
]
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
box = _BoxAPIWrapper(box_developer_token="box_developer_token") # type: ignore[call-arg]
|
|
||||||
|
|
||||||
documents = box.search_box("query")
|
|
||||||
assert documents == [
|
|
||||||
Document(
|
|
||||||
page_content="Test file mode\ndocument contents",
|
|
||||||
metadata={"title": "Testing Files"},
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def test_ask_box_ai_single_file(mock_worker, mocker: MockerFixture) -> None: # type: ignore[no-untyped-def]
|
|
||||||
mocker.patch(
|
|
||||||
"langchain_box.utilities._BoxAPIWrapper.ask_box_ai",
|
|
||||||
return_value=(
|
|
||||||
[
|
|
||||||
Document(
|
|
||||||
page_content="Test file mode\ndocument contents",
|
|
||||||
metadata={"title": "Testing Files"},
|
|
||||||
)
|
|
||||||
]
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
box = _BoxAPIWrapper( # type: ignore[call-arg]
|
|
||||||
box_developer_token="box_developer_token", box_file_ids=["box_file_ids"]
|
|
||||||
)
|
|
||||||
|
|
||||||
documents = box.ask_box_ai("query") # type: ignore[call-arg]
|
|
||||||
assert documents == [
|
|
||||||
Document(
|
|
||||||
page_content="Test file mode\ndocument contents",
|
|
||||||
metadata={"title": "Testing Files"},
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def test_ask_box_ai_multiple_files(mock_worker, mocker: MockerFixture) -> None: # type: ignore[no-untyped-def]
|
|
||||||
mocker.patch(
|
|
||||||
"langchain_box.utilities._BoxAPIWrapper.ask_box_ai",
|
|
||||||
return_value=(
|
|
||||||
[
|
|
||||||
Document(
|
|
||||||
page_content="Test file 1 mode\ndocument contents",
|
|
||||||
metadata={"title": "Test File 1"},
|
|
||||||
),
|
|
||||||
Document(
|
|
||||||
page_content="Test file 2 mode\ndocument contents",
|
|
||||||
metadata={"title": "Test File 2"},
|
|
||||||
),
|
|
||||||
]
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
box = _BoxAPIWrapper( # type: ignore[call-arg]
|
|
||||||
box_developer_token="box_developer_token",
|
|
||||||
box_file_ids=["box_file_id 1", "box_file_id 2"],
|
|
||||||
)
|
|
||||||
|
|
||||||
documents = box.ask_box_ai("query") # type: ignore[call-arg]
|
|
||||||
assert documents == [
|
|
||||||
Document(
|
|
||||||
page_content="Test file 1 mode\ndocument contents",
|
|
||||||
metadata={"title": "Test File 1"},
|
|
||||||
),
|
|
||||||
Document(
|
|
||||||
page_content="Test file 2 mode\ndocument contents",
|
|
||||||
metadata={"title": "Test File 2"},
|
|
||||||
),
|
|
||||||
]
|
|
Loading…
Reference in New Issue
Block a user