box: migrate to repo (#27969)

This commit is contained in:
Erick Friis 2024-11-07 10:19:22 -08:00 committed by GitHub
parent 1ad49957f5
commit 8a5b9bf2ad
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
34 changed files with 4 additions and 3449 deletions

View File

@ -141,5 +141,5 @@ packages:
repo: langchain-ai/langchain repo: langchain-ai/langchain
path: libs/partners/ollama path: libs/partners/ollama
- name: langchain-box - name: langchain-box
repo: langchain-ai/langchain repo: langchain-ai/langchain-box
path: libs/partners/box path: libs/box

View File

@ -1 +0,0 @@
__pycache__

View File

@ -1,21 +0,0 @@
MIT License
Copyright (c) 2024 LangChain, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -1,65 +0,0 @@
.PHONY: all format lint test tests integration_tests docker_tests help extended_tests
# Default target executed when no arguments are given to make.
all: help
# Define a variable for the test file path.
TEST_FILE ?= tests/unit_tests/
integration_test integration_tests: TEST_FILE = tests/integration_tests/
# unit tests are run with the --disable-socket flag to prevent network calls
test tests:
poetry run pytest --disable-socket --allow-unix-socket $(TEST_FILE)
test_watch:
poetry run ptw --snapshot-update --now . -- -vv $(TEST_FILE)
# integration tests are run without the --disable-socket flag to allow network calls
integration_test integration_tests:
poetry run pytest $(TEST_FILE)
######################
# LINTING AND FORMATTING
######################
# Define a variable for Python and notebook files.
PYTHON_FILES=.
MYPY_CACHE=.mypy_cache
lint format: PYTHON_FILES=.
lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/partners/box --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
lint_package: PYTHON_FILES=langchain_box
lint_tests: PYTHON_FILES=tests
lint_tests: MYPY_CACHE=.mypy_cache_test
lint lint_diff lint_package lint_tests:
poetry run ruff .
poetry run ruff format $(PYTHON_FILES) --diff
poetry run ruff --select I $(PYTHON_FILES)
mkdir -p $(MYPY_CACHE); poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
format format_diff:
poetry run ruff format $(PYTHON_FILES)
poetry run ruff --select I --fix $(PYTHON_FILES)
spell_check:
poetry run codespell --toml pyproject.toml
spell_fix:
poetry run codespell --toml pyproject.toml -w
check_imports: $(shell find langchain_box -name '*.py')
poetry run python ./scripts/check_imports.py $^
######################
# HELP
######################
help:
@echo '----'
@echo 'check_imports - check imports'
@echo 'format - run code formatters'
@echo 'lint - run linters'
@echo 'test - run unit tests'
@echo 'tests - run unit tests'
@echo 'test TEST_FILE=<test_file> - run all tests in file'

View File

@ -1,195 +1,3 @@
# langchain-box This package has moved!
This package contains the LangChain integration with Box. For more information about https://github.com/langchain-ai/langchain-box/tree/main/libs/box
Box, check out our [developer documentation](https://developer.box.com).
## Pre-requisites
In order to integrate with Box, you need a few things:
* A Box instance — if you are not a current Box customer, sign up for a
[free dev account](https://account.box.com/signup/n/developer#ty9l3).
* A Box app — more on how to
[create an app](https://developer.box.com/guides/getting-started/first-application/)
* Your app approved in your Box instance — This is done by your admin.
The good news is if you are using a free developer account, you are the admin.
[Authorize your app](https://developer.box.com/guides/authorization/custom-app-approval/#manual-approval)
## Installation
```bash
pip install -U langchain-box
```
## Authentication
The `box-langchain` package offers some flexibility to authentication. The
most basic authentication method is by using a developer token. This can be
found in the [Box developer console](https://account.box.com/developers/console)
on the configuration screen. This token is purposely short-lived (1 hour) and is
intended for development. With this token, you can add it to your environment as
`BOX_DEVELOPER_TOKEN`, you can pass it directly to the loader, or you can use the
`BoxAuth` authentication helper class.
We will cover passing it directly to the loader in the section below.
### BoxAuth helper class
`BoxAuth` supports the following authentication methods:
* Token — either a developer token or any token generated through the Box SDK
* JWT with a service account
* JWT with a specified user
* CCG with a service account
* CCG with a specified user
> [!NOTE]
> If using JWT authentication, you will need to download the configuration from the Box
> developer console after generating your public/private key pair. Place this file in your
> application directory structure somewhere. You will use the path to this file when using
> the `BoxAuth` helper class.
For more information, learn about how to
[set up a Box application](https://developer.box.com/guides/getting-started/first-application/),
and check out the
[Box authentication guide](https://developer.box.com/guides/authentication/select/)
for more about our different authentication options.
Examples:
**Token**
```python
from langchain_box.document_loaders import BoxLoader
from langchain_box.utilities import BoxAuth, BoxAuthType
auth = BoxAuth(
auth_type=BoxAuthType.TOKEN,
box_developer_token=box_developer_token
)
loader = BoxLoader(
box_auth=auth,
...
)
```
**JWT with a service account**
```python
from langchain_box.document_loaders import BoxLoader
from langchain_box.utilities import BoxAuth, BoxAuthType
auth = BoxAuth(
auth_type=BoxAuthType.JWT,
box_jwt_path=box_jwt_path
)
loader = BoxLoader(
box_auth=auth,
...
```
**JWT with a specified user**
```python
from langchain_box.document_loaders import BoxLoader
from langchain_box.utilities import BoxAuth, BoxAuthType
auth = BoxAuth(
auth_type=BoxAuthType.JWT,
box_jwt_path=box_jwt_path,
box_user_id=box_user_id
)
loader = BoxLoader(
box_auth=auth,
...
```
**CCG with a service account**
```python
from langchain_box.document_loaders import BoxLoader
from langchain_box.utilities import BoxAuth, BoxAuthType
auth = BoxAuth(
auth_type=BoxAuthType.CCG,
box_client_id=box_client_id,
box_client_secret=box_client_secret,
box_enterprise_id=box_enterprise_id
)
loader = BoxLoader(
box_auth=auth,
...
```
**CCG with a specified user**
```python
from langchain_box.document_loaders import BoxLoader
from langchain_box.utilities import BoxAuth, BoxAuthType
auth = BoxAuth(
auth_type=BoxAuthType.CCG,
box_client_id=box_client_id,
box_client_secret=box_client_secret,
box_user_id=box_user_id
)
loader = BoxLoader(
box_auth=auth,
...
```
## Document Loaders
The `BoxLoader` class helps you get your unstructured content from Box
in Langchain's `Document` format. You can do this with either a `List[str]`
containing Box file IDs, or with a `str` containing a Box folder ID.
If getting files from a folder with folder ID, you can also set a `Bool` to
tell the loader to get all sub-folders in that folder, as well.
:::info
A Box instance can contain Petabytes of files, and folders can contain millions
of files. Be intentional when choosing what folders you choose to index. And we
recommend never getting all files from folder 0 recursively. Folder ID 0 is your
root folder.
:::
### Load files
```python
import os
from langchain_box.document_loaders import BoxLoader
os.environ["BOX_DEVELOPER_TOKEN"] = "df21df2df21df2d1f21df2df1"
loader = BoxLoader(
box_file_ids=["12345", "67890"],
character_limit=10000 # Optional. Defaults to no limit
)
docs = loader.lazy_load()
```
### Load from folder
```python
import os
from langchain_box.document_loaders import BoxLoader
os.environ["BOX_DEVELOPER_TOKEN"] = "df21df2df21df2d1f21df2df1"
loader = BoxLoader(
box_folder_id="12345",
recursive=False, # Optional. return entire tree, defaults to False
character_limit=10000 # Optional. Defaults to no limit
)
docs = loader.lazy_load()
```

View File

@ -1,31 +0,0 @@
from importlib import metadata
from langchain_box.document_loaders import BoxLoader
from langchain_box.retrievers import BoxRetriever
from langchain_box.utilities.box import (
BoxAuth,
BoxAuthType,
BoxSearchOptions,
DocumentFiles,
SearchTypeFilter,
_BoxAPIWrapper,
)
try:
__version__ = metadata.version(__package__)
except metadata.PackageNotFoundError:
# Case where package metadata is not available.
__version__ = ""
del metadata # optional, avoids polluting the results of dir(__package__)
__all__ = [
"BoxLoader",
"BoxRetriever",
"BoxAuth",
"BoxAuthType",
"BoxSearchOptions",
"DocumentFiles",
"SearchTypeFilter",
"_BoxAPIWrapper",
"__version__",
]

View File

@ -1,5 +0,0 @@
"""Box Document Loaders."""
from langchain_box.document_loaders.box import BoxLoader
__all__ = ["BoxLoader"]

View File

@ -1,260 +0,0 @@
from typing import Iterator, List, Optional
from box_sdk_gen import FileBaseTypeField # type: ignore
from langchain_core.document_loaders.base import BaseLoader
from langchain_core.documents import Document
from langchain_core.utils import from_env
from pydantic import BaseModel, ConfigDict, Field, model_validator
from typing_extensions import Self
from langchain_box.utilities import BoxAuth, _BoxAPIWrapper
class BoxLoader(BaseLoader, BaseModel):
"""BoxLoader.
This class will help you load files from your Box instance. You must have a
Box account. If you need one, you can sign up for a free developer account.
You will also need a Box application created in the developer portal, where
you can select your authorization type.
If you wish to use either of the Box AI options, you must be on an Enterprise
Plus plan or above. The free developer account does not have access to Box AI.
In addition, using the Box AI API requires a few prerequisite steps:
* Your administrator must enable the Box AI API
* You must enable the ``Manage AI`` scope in your app in the developer console.
* Your administrator must install and enable your application.
**Setup**:
Install ``langchain-box`` and set environment variable ``BOX_DEVELOPER_TOKEN``.
.. code-block:: bash
pip install -U langchain-box
export BOX_DEVELOPER_TOKEN="your-api-key"
This loader returns ``Document`` objects built from text representations of files
in Box. It will skip any document without a text representation available. You can
provide either a ``List[str]`` containing Box file IDS, or you can provide a
``str`` contining a Box folder ID. If providing a folder ID, you can also enable
recursive mode to get the full tree under that folder.
.. note::
A Box instance can contain Petabytes of files, and folders can contain millions
of files. Be intentional when choosing what folders you choose to index. And we
recommend never getting all files from folder 0 recursively. Folder ID 0 is your
root folder.
**Instantiate**:
.. list-table:: Initialization variables
:widths: 25 50 15 10
:header-rows: 1
* - Variable
- Description
- Type
- Default
* - box_developer_token
- Token to use for auth.
- ``str``
- ``None``
* - box_auth
- client id for you app. Used for CCG
- ``langchain_box.utilities.BoxAuth``
- ``None``
* - box_file_ids
- client id for you app. Used for CCG
- ``List[str]``
- ``None``
* - box_folder_id
- client id for you app. Used for CCG
- ``str``
- ``None``
* - recursive
- client id for you app. Used for CCG
- ``Bool``
- ``False``
* - character_limit
- client id for you app. Used for CCG
- ``int``
- ``-1``
**Get files** this method requires you pass the ``box_file_ids`` parameter.
This is a ``List[str]`` containing the file IDs you wish to index.
.. code-block:: python
from langchain_box.document_loaders import BoxLoader
box_file_ids = ["1514555423624", "1514553902288"]
loader = BoxLoader(
box_file_ids=box_file_ids,
character_limit=10000 # Optional. Defaults to no limit
)
**Get files in a folder** this method requires you pass the ``box_folder_id``
parameter. This is a ``str`` containing the folder ID you wish to index.
.. code-block:: python
from langchain_box.document_loaders import BoxLoader
box_folder_id = "260932470532"
loader = BoxLoader(
box_folder_id=box_folder_id,
recursive=False # Optional. return entire tree, defaults to False
)
**Load**:
.. code-block:: python
docs = loader.load()
docs[0]
.. code-block:: python
Document(metadata={'source': 'https://dl.boxcloud.com/api/2.0/
internal_files/1514555423624/versions/1663171610024/representations
/extracted_text/content/', 'title': 'Invoice-A5555_txt'},
page_content='Vendor: AstroTech Solutions\\nInvoice Number: A5555\\n\\nLine
Items:\\n - Gravitational Wave Detector Kit: $800\\n - Exoplanet
Terrarium: $120\\nTotal: $920')
**Lazy load**:
.. code-block:: python
docs = []
docs_lazy = loader.lazy_load()
for doc in docs_lazy:
docs.append(doc)
print(docs[0].page_content[:100])
print(docs[0].metadata)
.. code-block:: python
Document(metadata={'source': 'https://dl.boxcloud.com/api/2.0/
internal_files/1514555423624/versions/1663171610024/representations
/extracted_text/content/', 'title': 'Invoice-A5555_txt'},
page_content='Vendor: AstroTech Solutions\\nInvoice Number: A5555\\n\\nLine
Items:\\n - Gravitational Wave Detector Kit: $800\\n - Exoplanet
Terrarium: $120\\nTotal: $920')
"""
box_developer_token: Optional[str] = Field(
default_factory=from_env("BOX_DEVELOPER_TOKEN", default=None)
)
"""String containing the Box Developer Token generated in the developer console"""
box_auth: Optional[BoxAuth] = None
"""Configured
`BoxAuth <https://python.langchain.com/v0.2/api_reference/box/utilities/langchain_box.utilities.box.BoxAuth.html>`_
object"""
box_file_ids: Optional[List[str]] = None
"""List[str] containing Box file ids"""
box_folder_id: Optional[str] = None
"""String containing box folder id to load files from"""
recursive: Optional[bool] = False
"""If getting files by folder id, recursive is a bool to determine if you wish
to traverse subfolders to return child documents. Default is False"""
character_limit: Optional[int] = -1
"""character_limit is an int that caps the number of characters to
return per document."""
_box: Optional[_BoxAPIWrapper] = None
model_config = ConfigDict(
arbitrary_types_allowed=True,
extra="allow",
use_enum_values=True,
)
@model_validator(mode="after")
def validate_box_loader_inputs(self) -> Self:
_box = None
"""Validate that has either box_file_ids or box_folder_id."""
if not self.box_file_ids and not self.box_folder_id:
raise ValueError("You must provide box_file_ids or box_folder_id.")
"""Validate that we don't have both box_file_ids and box_folder_id."""
if self.box_file_ids and self.box_folder_id:
raise ValueError(
"You must provide either box_file_ids or box_folder_id, not both."
)
"""Validate that we have either a box_developer_token or box_auth."""
if not self.box_auth:
if not self.box_developer_token:
raise ValueError(
"you must provide box_developer_token or a box_auth "
"generated with langchain_box.utilities.BoxAuth"
)
else:
_box = _BoxAPIWrapper( # type: ignore[call-arg]
box_developer_token=self.box_developer_token,
character_limit=self.character_limit,
)
else:
_box = _BoxAPIWrapper( # type: ignore[call-arg]
box_auth=self.box_auth,
character_limit=self.character_limit,
)
self._box = _box
return self
def _get_files_from_folder(self, folder_id): # type: ignore[no-untyped-def]
folder_content = self.box.get_folder_items(folder_id)
for file in folder_content:
try:
if file.type == FileBaseTypeField.FILE:
doc = self._box.get_document_by_file_id(file.id)
if doc is not None:
yield doc
elif file.type == "folder" and self.recursive:
try:
yield from self._get_files_from_folder(file.id)
except TypeError:
pass
except TypeError:
pass
def lazy_load(self) -> Iterator[Document]:
"""Load documents. Accepts no arguments. Returns `Iterator[Document]`"""
if self.box_file_ids:
for file_id in self.box_file_ids:
try:
file = self._box.get_document_by_file_id(file_id) # type: ignore[union-attr]
if file is not None:
yield file
except TypeError:
pass
elif self.box_folder_id:
try:
yield from self._get_files_from_folder(self.box_folder_id)
except TypeError:
pass
except Exception as e:
print(f"Exception {e}") # noqa: T201
else:
raise ValueError(
"You must provide either `box_file_ids` or `box_folder_id`"
)

View File

@ -1,5 +0,0 @@
"""Box Document Loaders."""
from langchain_box.retrievers.box import BoxRetriever
__all__ = ["BoxRetriever"]

View File

@ -1,185 +0,0 @@
from typing import List, Optional
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
from langchain_core.utils import from_env
from pydantic import ConfigDict, Field, model_validator
from typing_extensions import Self
from langchain_box.utilities import BoxAuth, BoxSearchOptions, _BoxAPIWrapper
class BoxRetriever(BaseRetriever):
"""Box retriever.
`BoxRetriever` provides the ability to retrieve content from
your Box instance in a couple of ways.
1. You can use the Box full-text search to retrieve the
complete document(s) that match your search query, as
`List[Document]`
2. You can use the Box AI Platform API to retrieve the results
from a Box AI prompt. This can be a `Document` containing
the result of the prompt, or you can retrieve the citations
used to generate the prompt to include in your vectorstore.
Setup:
Install ``langchain-box``:
.. code-block:: bash
pip install -U langchain-box
Instantiate:
To use search:
.. code-block:: python
from langchain_box.retrievers import BoxRetriever
retriever = BoxRetriever()
To use Box AI:
.. code-block:: python
from langchain_box.retrievers import BoxRetriever
file_ids=["12345","67890"]
retriever = BoxRetriever(file_ids)
Usage:
.. code-block:: python
retriever = BoxRetriever()
retriever.invoke("victor")
print(docs[0].page_content[:100])
.. code-block:: none
[
Document(
metadata={
'source': 'url',
'title': 'FIVE_FEET_AND_RISING_by_Peter_Sollett_pdf'
},
page_content='\\n3/20/23, 5:31 PM F...'
)
]
Use within a chain:
.. code-block:: python
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI
retriever = BoxRetriever(box_developer_token=box_developer_token, character_limit=10000)
context="You are an actor reading scripts to learn about your role in an upcoming movie."
question="describe the character Victor"
prompt = ChatPromptTemplate.from_template(
\"""Answer the question based only on the context provided.
Context: {context}
Question: {question}\"""
)
def format_docs(docs):
return "\\n\\n".join(doc.page_content for doc in docs)
chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
chain.invoke("Victor") # search query to find files in Box
)
.. code-block:: none
'Victor is a skinny 12-year-old with sloppy hair who is seen
sleeping on his fire escape in the sun. He is hesitant to go to
the pool with his friend Carlos because he is afraid of getting
in trouble for not letting his mother cut his hair. Ultimately,
he decides to go to the pool with Carlos.'
""" # noqa: E501
box_developer_token: Optional[str] = Field(
default_factory=from_env("BOX_DEVELOPER_TOKEN", default=None)
)
box_auth: Optional[BoxAuth] = None
"""Configured
`BoxAuth <https://python.langchain.com/v0.2/api_reference/box/utilities/langchain_box.utilities.box.BoxAuth.html>`_
object"""
box_file_ids: Optional[List[str]] = None
"""List[str] containing Box file ids"""
character_limit: Optional[int] = -1
"""character_limit is an int that caps the number of characters to
return per document."""
box_search_options: Optional[BoxSearchOptions] = None
"""Search options to configure BoxRetriever to narrow search results."""
answer: Optional[bool] = True
"""When using Box AI, return the answer to the prompt as a `Document`
object. Returned as `List[Document`]. Default is `True`."""
citations: Optional[bool] = False
"""When using Box AI, return the citations from to the prompt as
`Document` objects. Can be used with answer. Returned as `List[Document`].
Default is `False`."""
_box: Optional[_BoxAPIWrapper]
model_config = ConfigDict(
arbitrary_types_allowed=True,
extra="allow",
)
@model_validator(mode="after")
def validate_box_loader_inputs(self) -> Self:
_box = None
"""Validate that we have either a box_developer_token or box_auth."""
if not self.box_auth and not self.box_developer_token:
raise ValueError(
"you must provide box_developer_token or a box_auth "
"generated with langchain_box.utilities.BoxAuth"
)
_box = _BoxAPIWrapper( # type: ignore[call-arg]
box_developer_token=self.box_developer_token,
box_auth=self.box_auth,
character_limit=self.character_limit,
box_search_options=self.box_search_options,
)
self._box = _box
return self
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
if self.box_file_ids: # If using Box AI
return self._box.ask_box_ai( # type: ignore[union-attr]
query=query,
box_file_ids=self.box_file_ids,
answer=self.answer, # type: ignore[arg-type]
citations=self.citations, # type: ignore[arg-type]
)
else: # If using Search
return self._box.search_box(query=query) # type: ignore[union-attr]

View File

@ -1,19 +0,0 @@
"""Box API Utilities."""
from langchain_box.utilities.box import (
BoxAuth,
BoxAuthType,
BoxSearchOptions,
DocumentFiles,
SearchTypeFilter,
_BoxAPIWrapper,
)
__all__ = [
"BoxAuth",
"BoxAuthType",
"BoxSearchOptions",
"DocumentFiles",
"SearchTypeFilter",
"_BoxAPIWrapper",
]

View File

@ -1,875 +0,0 @@
"""Util that calls Box APIs."""
from enum import Enum
from typing import Any, Dict, List, Optional
import box_sdk_gen # type: ignore
import requests
from langchain_core.documents import Document
from langchain_core.utils import from_env
from pydantic import BaseModel, ConfigDict, Field, model_validator
from typing_extensions import Self
class DocumentFiles(Enum):
"""DocumentFiles(Enum).
An enum containing all of the supported extensions for files
Box considers Documents. These files should have text
representations.
"""
DOC = "doc"
DOCX = "docx"
GDOC = "gdoc"
GSHEET = "gsheet"
NUMBERS = "numbers"
ODS = "ods"
ODT = "odt"
PAGES = "pages"
PDF = "pdf"
RTF = "rtf"
WPD = "wpd"
XLS = "xls"
XLSM = "xlsm"
XLSX = "xlsx"
AS = "as"
AS3 = "as3"
ASM = "asm"
BAT = "bat"
C = "c"
CC = "cc"
CMAKE = "cmake"
CPP = "cpp"
CS = "cs"
CSS = "css"
CSV = "csv"
CXX = "cxx"
DIFF = "diff"
ERB = "erb"
GROOVY = "groovy"
H = "h"
HAML = "haml"
HH = "hh"
HTM = "htm"
HTML = "html"
JAVA = "java"
JS = "js"
JSON = "json"
LESS = "less"
LOG = "log"
M = "m"
MAKE = "make"
MD = "md"
ML = "ml"
MM = "mm"
MSG = "msg"
PHP = "php"
PL = "pl"
PROPERTIES = "properties"
PY = "py"
RB = "rb"
RST = "rst"
SASS = "sass"
SCALA = "scala"
SCM = "scm"
SCRIPT = "script"
SH = "sh"
SML = "sml"
SQL = "sql"
TXT = "txt"
VI = "vi"
VIM = "vim"
WEBDOC = "webdoc"
XHTML = "xhtml"
XLSB = "xlsb"
XML = "xml"
XSD = "xsd"
XSL = "xsl"
YAML = "yaml"
GSLLIDE = "gslide"
GSLIDES = "gslides"
KEY = "key"
ODP = "odp"
PPT = "ppt"
PPTX = "pptx"
BOXNOTE = "boxnote"
class ImageFiles(Enum):
"""ImageFiles(Enum).
An enum containing all of the supported extensions for files
Box considers images.
"""
ARW = "arw"
BMP = "bmp"
CR2 = "cr2"
DCM = "dcm"
DICM = "dicm"
DICOM = "dicom"
DNG = "dng"
EPS = "eps"
EXR = "exr"
GIF = "gif"
HEIC = "heic"
INDD = "indd"
INDML = "indml"
INDT = "indt"
INX = "inx"
JPEG = "jpeg"
JPG = "jpg"
NEF = "nef"
PNG = "png"
SVG = "svg"
TIF = "tif"
TIFF = "tiff"
TGA = "tga"
SVS = "svs"
class BoxAuthType(Enum):
"""BoxAuthType(Enum).
an enum to tell BoxLoader how you wish to autheticate your Box connection.
Options are:
TOKEN - Use a developer token generated from the Box Deevloper Token.
Only recommended for development.
Provide ``box_developer_token``.
CCG - Client Credentials Grant.
provide ``box_client_id`, ``box_client_secret`,
and ``box_enterprise_id`` or optionally `box_user_id`.
JWT - Use JWT for authentication. Config should be stored on the file
system accessible to your app.
provide ``box_jwt_path``. Optionally, provide ``box_user_id`` to
act as a specific user
"""
TOKEN = "token"
"""Use a developer token or a token retrieved from ``box-sdk-gen``"""
CCG = "ccg"
"""Use ``client_credentials`` type grant"""
JWT = "jwt"
"""Use JWT bearer token auth"""
class BoxAuth(BaseModel):
"""**BoxAuth.**
The ``box-langchain`` package offers some flexibility to authentication. The
most basic authentication method is by using a developer token. This can be
found in the `Box developer console <https://account.box.com/developers/console>`_
on the configuration screen. This token is purposely short-lived (1 hour) and is
intended for development. With this token, you can add it to your environment as
``BOX_DEVELOPER_TOKEN``, you can pass it directly to the loader, or you can use the
``BoxAuth`` authentication helper class.
`BoxAuth` supports the following authentication methods:
* **Token** either a developer token or any token generated through the Box SDK
* **JWT** with a service account
* **JWT** with a specified user
* **CCG** with a service account
* **CCG** with a specified user
.. note::
If using JWT authentication, you will need to download the configuration from
the Box developer console after generating your public/private key pair. Place
this file in your application directory structure somewhere. You will use the
path to this file when using the ``BoxAuth`` helper class. If you wish to use
OAuth2 with the authorization_code flow, please use ``BoxAuthType.TOKEN`` with
the token you have acquired.
For more information, learn about how to
`set up a Box application <https://developer.box.com/guides/getting-started/first-application/>`_,
and check out the
`Box authentication guide <https://developer.box.com/guides/authentication/select/>`_
for more about our different authentication options.
Simple implementation:
To instantiate, you must provide a ``langchain_box.utilities.BoxAuthType``.
BoxAuthType is an enum to tell BoxLoader how you wish to autheticate your
Box connection.
Options are:
TOKEN - Use a developer token generated from the Box Deevloper Token.
Only recommended for development.
Provide ``box_developer_token``.
CCG - Client Credentials Grant.
provide ``box_client_id``, ``box_client_secret``,
and ``box_enterprise_id`` or optionally ``box_user_id``.
JWT - Use JWT for authentication. Config should be stored on the file
system accessible to your app.
provide ``box_jwt_path``. Optionally, provide ``box_user_id`` to
act as a specific user
**Examples**:
**Token**
.. code-block:: python
from langchain_box.document_loaders import BoxLoader
from langchain_box.utilities import BoxAuth, BoxAuthType
auth = BoxAuth(
auth_type=BoxAuthType.TOKEN,
box_developer_token=box_developer_token
)
loader = BoxLoader(
box_auth=auth,
...
)
**JWT with a service account**
.. code-block:: python
from langchain_box.document_loaders import BoxLoader
from langchain_box.utilities import BoxAuth, BoxAuthType
auth = BoxAuth(
auth_type=BoxAuthType.JWT,
box_jwt_path=box_jwt_path
)
loader = BoxLoader(
box_auth=auth,
...
)
**JWT with a specified user**
.. code-block:: python
from langchain_box.document_loaders import BoxLoader
from langchain_box.utilities import BoxAuth, BoxAuthType
auth = BoxAuth(
auth_type=BoxAuthType.JWT,
box_jwt_path=box_jwt_path,
box_user_id=box_user_id
)
loader = BoxLoader(
box_auth=auth,
...
)
**CCG with a service account**
.. code-block:: python
from langchain_box.document_loaders import BoxLoader
from langchain_box.utilities import BoxAuth, BoxAuthType
auth = BoxAuth(
auth_type=BoxAuthType.CCG,
box_client_id=box_client_id,
box_client_secret=box_client_secret,
box_enterprise_id=box_enterprise_id
)
loader = BoxLoader(
box_auth=auth,
...
)
**CCG with a specified user**
.. code-block:: python
from langchain_box.document_loaders import BoxLoader
from langchain_box.utilities import BoxAuth, BoxAuthType
auth = BoxAuth(
auth_type=BoxAuthType.CCG,
box_client_id=box_client_id,
box_client_secret=box_client_secret,
box_user_id=box_user_id
)
loader = BoxLoader(
box_auth=auth,
...
)
"""
auth_type: BoxAuthType
"""``langchain_box.utilities.BoxAuthType``. Enum describing how to
authenticate against Box"""
box_developer_token: Optional[str] = Field(
default_factory=from_env("BOX_DEVELOPER_TOKEN", default=None)
)
""" If using ``BoxAuthType.TOKEN``, provide your token here"""
box_jwt_path: Optional[str] = Field(
default_factory=from_env("BOX_JWT_PATH", default=None)
)
"""If using ``BoxAuthType.JWT``, provide local path to your
JWT configuration file"""
box_client_id: Optional[str] = Field(
default_factory=from_env("BOX_CLIENT_ID", default=None)
)
"""If using ``BoxAuthType.CCG``, provide your app's client ID"""
box_client_secret: Optional[str] = Field(
default_factory=from_env("BOX_CLIENT_SECRET", default=None)
)
"""If using ``BoxAuthType.CCG``, provide your app's client secret"""
box_enterprise_id: Optional[str] = None
"""If using ``BoxAuthType.CCG``, provide your enterprise ID.
Only required if you are not sending ``box_user_id``"""
box_user_id: Optional[str] = None
"""If using ``BoxAuthType.CCG`` or ``BoxAuthType.JWT``, providing
``box_user_id`` will act on behalf of a specific user"""
_box_client: Optional[box_sdk_gen.BoxClient] = None
_custom_header: Dict = dict({"x-box-ai-library": "langchain"})
model_config = ConfigDict(
arbitrary_types_allowed=True,
use_enum_values=True,
extra="allow",
)
@model_validator(mode="after")
def validate_box_auth_inputs(self) -> Self:
"""Validate auth_type is set"""
if not self.auth_type:
raise ValueError("Auth type must be set.")
"""Validate that TOKEN auth type provides box_developer_token."""
if self.auth_type == "token" and not self.box_developer_token:
raise ValueError(f"{self.auth_type} requires box_developer_token to be set")
"""Validate that JWT auth type provides box_jwt_path."""
if self.auth_type == "jwt" and not self.box_jwt_path:
raise ValueError(f"{self.auth_type} requires box_jwt_path to be set")
"""Validate that CCG auth type provides box_client_id and
box_client_secret and either box_enterprise_id or box_user_id."""
if self.auth_type == "ccg":
if (
not self.box_client_id
or not self.box_client_secret
or (not self.box_enterprise_id and not self.box_user_id)
):
raise ValueError(
f"{self.auth_type} requires box_client_id, \
box_client_secret, and box_enterprise_id/box_user_id."
)
return self
def _authorize(self) -> None:
if self.auth_type == "token":
try:
auth = box_sdk_gen.BoxDeveloperTokenAuth(token=self.box_developer_token)
self._box_client = box_sdk_gen.BoxClient(auth=auth).with_extra_headers(
extra_headers=self._custom_header
)
except box_sdk_gen.BoxSDKError as bse:
raise RuntimeError(
f"Error getting client from developer token: {bse.message}"
)
except Exception as ex:
raise ValueError(
f"Invalid Box developer token. Please verify your \
token and try again.\n{ex}"
) from ex
elif self.auth_type == "jwt":
try:
jwt_config = box_sdk_gen.JWTConfig.from_config_file(
config_file_path=self.box_jwt_path
)
auth = box_sdk_gen.BoxJWTAuth(config=jwt_config)
self._box_client = box_sdk_gen.BoxClient(auth=auth).with_extra_headers(
extra_headers=self._custom_header
)
if self.box_user_id is not None:
user_auth = auth.with_user_subject(self.box_user_id)
self._box_client = box_sdk_gen.BoxClient(
auth=user_auth
).with_extra_headers(extra_headers=self._custom_header)
except box_sdk_gen.BoxSDKError as bse:
raise RuntimeError(
f"Error getting client from jwt token: {bse.message}"
)
except Exception as ex:
raise ValueError(
"Error authenticating. Please verify your JWT config \
and try again."
) from ex
elif self.auth_type == "ccg":
try:
if self.box_user_id is not None:
ccg_config = box_sdk_gen.CCGConfig(
client_id=self.box_client_id,
client_secret=self.box_client_secret,
user_id=self.box_user_id,
)
else:
ccg_config = box_sdk_gen.CCGConfig(
client_id=self.box_client_id,
client_secret=self.box_client_secret,
enterprise_id=self.box_enterprise_id,
)
auth = box_sdk_gen.BoxCCGAuth(config=ccg_config)
self._box_client = box_sdk_gen.BoxClient(auth=auth).with_extra_headers(
extra_headers=self._custom_header
)
except box_sdk_gen.BoxSDKError as bse:
raise RuntimeError(
f"Error getting client from ccg token: {bse.message}"
)
except Exception as ex:
raise ValueError(
"Error authenticating. Please verify you are providing a \
valid client id, secret and either a valid user ID or \
enterprise ID."
) from ex
else:
raise ValueError(
f"{self.auth_type} is not a valid auth_type. Value must be \
TOKEN, CCG, or JWT."
)
def get_client(self) -> box_sdk_gen.BoxClient:
"""Instantiate the Box SDK."""
if self._box_client is None:
self._authorize()
return self._box_client
class SearchTypeFilter(Enum):
"""SearchTypeFilter.
Enum to limit the what we search.
"""
NAME = "name"
"""The name of the item, as defined by its ``name`` field."""
DESCRIPTION = "description"
"""The description of the item, as defined by its ``description`` field."""
FILE_CONTENT = "file_content"
"""The actual content of the file."""
COMMENTS = "comments"
"""The content of any of the comments on a file or folder."""
TAGS = "tags"
"""Any tags that are applied to an item, as defined by its ``tags`` field."""
class BoxSearchOptions(BaseModel):
ancestor_folder_ids: Optional[List[str]] = None
"""Limits the search results to items within the given list of folders,
defined as a comma separated lists of folder IDs."""
search_type_filter: Optional[List[SearchTypeFilter]] = None
"""Limits the search results to any items that match the search query for a
specific part of the file, for example the file description.
Content types are defined as a comma separated lists of Box recognized
content types. The allowed content types are as follows. Default is all."""
created_date_range: Optional[List[str]] = None
"""Limits the search results to any items created within a given date range.
Date ranges are defined as comma separated RFC3339 timestamps.
If the the start date is omitted (,2014-05-17T13:35:01-07:00) anything
created before the end date will be returned.
If the end date is omitted (2014-05-15T13:35:01-07:00,) the current
date will be used as the end date instead."""
file_extensions: Optional[List[DocumentFiles]] = None
"""Limits the search results to any files that match any of the provided
file extensions. This list is a comma-separated list of
``langchain_box.utilities.DocumentFiles`` entries"""
k: Optional[int] = 100
"""Defines the maximum number of items to return. Defaults to 100, maximum
is 200."""
size_range: Optional[List[int]] = None
"""Limits the search results to any items with a size within a given file
size range. This applied to files and folders.
Size ranges are defined as comma separated list of a lower and upper
byte size limit (inclusive).
The upper and lower bound can be omitted to create open ranges."""
updated_date_range: Optional[List[str]] = None
"""Limits the search results to any items updated within a given date range.
Date ranges are defined as comma separated RFC3339 timestamps.
If the start date is omitted (,2014-05-17T13:35:01-07:00) anything
updated before the end date will be returned.
If the end date is omitted (2014-05-15T13:35:01-07:00,) the current
date will be used as the end date instead."""
class Config:
arbitrary_types_allowed = True
use_enum_values = True
extra = "allow"
@model_validator(mode="after")
def validate_search_options(self) -> Self:
"""Validate k is between 1 and 200"""
if self.k > 200 or self.k < 1: # type: ignore[operator]
raise ValueError(
f"Invalid setting of k {self.k}. " "Value must be between 1 and 200."
)
"""Validate created_date_range start date is before end date"""
if self.created_date_range:
if (
self.created_date_range[0] is None # type: ignore[index]
or self.created_date_range[0] == "" # type: ignore[index]
or self.created_date_range[1] is None # type: ignore[index]
or self.created_date_range[1] == "" # type: ignore[index]
):
pass
else:
if (
self.created_date_range[0] # type: ignore[index]
> self.created_date_range[1] # type: ignore[index]
):
raise ValueError("Start date must be before end date.")
"""Validate updated_date_range start date is before end date"""
if self.updated_date_range:
if (
self.updated_date_range[0] is None # type: ignore[index]
or self.updated_date_range[0] == "" # type: ignore[index]
or self.updated_date_range[1] is None # type: ignore[index]
or self.updated_date_range[1] == "" # type: ignore[index]
):
pass
else:
if (
self.updated_date_range[0] # type: ignore[index]
> self.updated_date_range[1] # type: ignore[index]
):
raise ValueError("Start date must be before end date.")
return self
class _BoxAPIWrapper(BaseModel):
"""Wrapper for Box API."""
box_developer_token: Optional[str] = Field(
default_factory=from_env("BOX_DEVELOPER_TOKEN", default=None)
)
"""String containing the Box Developer Token generated in the developer console"""
box_auth: Optional[BoxAuth] = None
"""Configured langchain_box.utilities.BoxAuth object"""
character_limit: Optional[int] = -1
"""character_limit is an int that caps the number of characters to
return per document."""
box_search_options: Optional[BoxSearchOptions] = None
"""Search options to configure BoxRetriever to narrow search results."""
_box: Optional[box_sdk_gen.BoxClient]
model_config = ConfigDict(
arbitrary_types_allowed=True,
use_enum_values=True,
extra="allow",
)
@model_validator(mode="after")
def validate_box_api_inputs(self) -> Self:
self._box = None
"""Validate that TOKEN auth type provides box_developer_token."""
if not self.box_auth:
if not self.box_developer_token:
raise ValueError(
"You must configure either box_developer_token of box_auth"
)
else:
box_auth = self.box_auth
self._box = box_auth.get_client() # type: ignore[union-attr]
return self
def get_box_client(self) -> box_sdk_gen.BoxClient:
box_auth = BoxAuth(
auth_type=BoxAuthType.TOKEN, box_developer_token=self.box_developer_token
)
self._box = box_auth.get_client()
def _do_request(self, url: str) -> Any:
try:
access_token = self._box.auth.retrieve_token().access_token # type: ignore[union-attr]
except box_sdk_gen.BoxSDKError as bse:
raise RuntimeError(f"Error getting client from jwt token: {bse.message}")
resp = requests.get(url, headers={"Authorization": f"Bearer {access_token}"})
resp.raise_for_status()
return resp.content
def _get_text_representation(self, file_id: str = "") -> tuple[str, str, str]:
try:
from box_sdk_gen import BoxAPIError, BoxSDKError
except ImportError:
raise ImportError("You must run `pip install box-sdk-gen`")
if self._box is None:
self.get_box_client()
try:
file = self._box.files.get_file_by_id( # type: ignore[union-attr]
file_id,
x_rep_hints="[extracted_text]",
fields=["name", "representations", "type"],
)
except BoxAPIError as bae:
raise RuntimeError(f"BoxAPIError: Error getting text rep: {bae.message}")
except BoxSDKError as bse:
raise RuntimeError(f"BoxSDKError: Error getting text rep: {bse.message}")
except Exception:
return None, None, None # type: ignore[return-value]
file_repr = file.representations.entries
if len(file_repr) <= 0:
return None, None, None # type: ignore[return-value]
for entry in file_repr:
if entry.representation == "extracted_text":
# If the file representation doesn't exist, calling
# info.url will generate text if possible
if entry.status.state == "none":
self._do_request(entry.info.url)
url = entry.content.url_template.replace("{+asset_path}", "")
file_name = file.name.replace(".", "_").replace(" ", "_")
try:
raw_content = self._do_request(url)
except requests.exceptions.HTTPError:
return None, None, None # type: ignore[return-value]
if (
self.character_limit is not None and self.character_limit > 0 # type: ignore[operator]
):
content = raw_content[0 : (self.character_limit - 1)]
else:
content = raw_content
return file_name, content, url
return None, None, None # type: ignore[return-value]
def get_document_by_file_id(self, file_id: str) -> Optional[Document]:
"""Load a file from a Box id. Accepts file_id as str.
Returns `Document`"""
if self._box is None:
self.get_box_client()
file = self._box.files.get_file_by_id( # type: ignore[union-attr]
file_id, fields=["name", "type", "extension"]
)
if file.type == "file":
if hasattr(DocumentFiles, file.extension.upper()):
file_name, content, url = self._get_text_representation(file_id=file_id)
if file_name is None or content is None or url is None:
return None
metadata = {
"source": f"{url}",
"title": f"{file_name}",
}
return Document(page_content=content, metadata=metadata)
return None
return None
def get_folder_items(self, folder_id: str) -> box_sdk_gen.Items:
"""Get all the items in a folder. Accepts folder_id as str.
returns box_sdk_gen.Items"""
if self._box is None:
self.get_box_client()
try:
folder_contents = self._box.folders.get_folder_items( # type: ignore[union-attr]
folder_id, fields=["id", "type", "name"]
)
except box_sdk_gen.BoxAPIError as bae:
raise RuntimeError(
f"BoxAPIError: Error getting folder content: {bae.message}"
)
except box_sdk_gen.BoxSDKError as bse:
raise RuntimeError(
f"BoxSDKError: Error getting folder content: {bse.message}"
)
return folder_contents.entries
def search_box(self, query: str) -> List[Document]:
if self._box is None:
self.get_box_client()
files = []
try:
results = None
if self.box_search_options is None:
results = self._box.search.search_for_content( # type: ignore[union-attr]
query=query, fields=["id", "type", "extension"], type="file"
)
else:
results = self._box.search.search_for_content( # type: ignore[union-attr]
query=query,
fields=["id", "type", "extension"],
type="file",
ancestor_folder_ids=self.box_search_options.ancestor_folder_ids, # type: ignore[union-attr]
content_types=self.box_search_options.search_type_filter, # type: ignore[union-attr]
created_at_range=self.box_search_options.created_date_range, # type: ignore[union-attr]
file_extensions=self.box_search_options.file_extensions, # type: ignore[union-attr]
limit=self.box_search_options.k, # type: ignore[union-attr]
size_range=self.box_search_options.size_range, # type: ignore[union-attr]
updated_at_range=self.box_search_options.updated_date_range, # type: ignore[union-attr]
)
if results.entries is None or len(results.entries) <= 0:
return None # type: ignore[return-value]
for file in results.entries:
if (
file is not None
and file.type == "file"
and hasattr(DocumentFiles, file.extension.upper())
):
doc = self.get_document_by_file_id(file.id)
if doc is not None:
files.append(doc)
return files
except box_sdk_gen.BoxAPIError as bae:
raise RuntimeError(
f"BoxAPIError: Error getting search results: {bae.message}"
)
except box_sdk_gen.BoxSDKError as bse:
raise RuntimeError(
f"BoxSDKError: Error getting search results: {bse.message}"
)
def ask_box_ai(
self,
query: str,
box_file_ids: List[str],
answer: bool = True,
citations: bool = False,
) -> List[Document]:
if self._box is None:
self.get_box_client()
ai_mode = box_sdk_gen.CreateAiAskMode.SINGLE_ITEM_QA.value
if len(box_file_ids) > 1:
ai_mode = box_sdk_gen.CreateAiAskMode.MULTIPLE_ITEM_QA.value
elif len(box_file_ids) <= 0:
raise ValueError("BOX_AI_ASK requires at least one file ID")
items = []
for file_id in box_file_ids:
item = box_sdk_gen.AiItemBase(
id=file_id, type=box_sdk_gen.AiItemBaseTypeField.FILE.value
)
items.append(item)
try:
response = self._box.ai.create_ai_ask( # type: ignore[union-attr]
mode=ai_mode, prompt=query, items=items, include_citations=citations
)
except box_sdk_gen.BoxAPIError as bae:
raise RuntimeError(
f"BoxAPIError: Error getting Box AI result: {bae.message}"
)
except box_sdk_gen.BoxSDKError as bse:
raise RuntimeError(
f"BoxSDKError: Error getting Box AI result: {bse.message}"
)
docs = []
if answer:
content = response.answer
metadata = {"source": "Box AI", "title": f"Box AI {query}"}
document = Document(page_content=content, metadata=metadata)
docs.append(document)
if citations:
box_citations = response.citations
for citation in box_citations:
content = citation.content
file_name = citation.name
file_id = citation.id
file_type = citation.type.value
metadata = {
"source": f"Box AI {query}",
"file_name": file_name,
"file_id": file_id,
"file_type": file_type,
}
document = Document(page_content=content, metadata=metadata)
docs.append(document)
return docs

File diff suppressed because it is too large Load Diff

View File

@ -1,84 +0,0 @@
[build-system]
requires = [ "poetry-core>=1.0.0",]
build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "langchain-box"
version = "0.2.3"
description = "An integration package connecting Box and LangChain"
authors = []
readme = "README.md"
repository = "https://github.com/langchain-ai/langchain"
license = "MIT"
[tool.mypy]
disallow_untyped_defs = "True"
[tool.poetry.urls]
"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/box"
"Release Notes" = "https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain-box%3D%3D0%22&expanded=true"
[tool.poetry.dependencies]
python = ">=3.9.0,<3.13"
langchain-core = "^0.3.15"
pydantic = "^2"
[tool.ruff.lint]
select = [ "E", "F", "I", "T201",]
[tool.coverage.run]
omit = [ "tests/*",]
[tool.pytest.ini_options]
markers = [ "compile: mark placeholder test used to compile integration tests without running them",]
asyncio_mode = "auto"
[tool.poetry.dependencies.box-sdk-gen]
extras = [ "jwt",]
version = "^1.5.0"
[tool.poetry.group.test]
optional = true
[tool.poetry.group.codespell]
optional = true
[tool.poetry.group.test_integration]
optional = true
[tool.poetry.group.lint]
optional = true
[tool.poetry.group.dev]
optional = true
[tool.poetry.group.test.dependencies]
pytest = "^7.4.3"
pytest_mock = "^3.14.0"
pytest-asyncio = "^0.23.2"
pytest-socket = "^0.7.0"
[tool.poetry.group.codespell.dependencies]
codespell = "^2.2.6"
[tool.poetry.group.test_integration.dependencies]
python-dotenv = "^1.0.1"
[tool.poetry.group.lint.dependencies]
ruff = "^0.1.8"
[tool.poetry.group.typing.dependencies]
mypy = "^1.7.1"
types-requests = "^2.32.0.20240712"
[tool.poetry.group.test.dependencies.langchain-core]
path = "../../core"
develop = true
[tool.poetry.group.dev.dependencies.langchain-core]
path = "../../core"
develop = true
[tool.poetry.group.typing.dependencies.langchain-core]
path = "../../core"
develop = true

View File

@ -1,17 +0,0 @@
import sys
import traceback
from importlib.machinery import SourceFileLoader
if __name__ == "__main__":
files = sys.argv[1:]
has_failure = False
for file in files:
try:
SourceFileLoader("x", file).load_module()
except Exception:
has_faillure = True
print(file) # noqa: T201
traceback.print_exc()
print() # noqa: T201
sys.exit(1 if has_failure else 0)

View File

@ -1,18 +0,0 @@
#!/bin/bash
set -eu
# Initialize a variable to keep track of errors
errors=0
# make sure not importing from langchain, langchain_experimental, or langchain_community
git --no-pager grep '^from langchain\.' . && errors=$((errors+1))
git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1))
git --no-pager grep '^from langchain_community\.' . && errors=$((errors+1))
# Decide on an exit status based on the errors
if [ "$errors" -gt 0 ]; then
exit 1
else
exit 0
fi

View File

@ -1,3 +0,0 @@
"""
TODO: build live integration tests
"""

View File

@ -1,3 +0,0 @@
"""
TODO: build live integration tests
"""

View File

@ -1,7 +0,0 @@
import pytest # type: ignore[import-not-found]
@pytest.mark.compile
def test_placeholder() -> None:
"""Used for compiling integration tests without running any real tests."""
pass

View File

@ -1,3 +0,0 @@
"""
TODO: build live integration tests
"""

View File

@ -1,99 +0,0 @@
import pytest
from langchain_core.documents import Document
from pytest_mock import MockerFixture
from langchain_box.document_loaders import BoxLoader
from langchain_box.utilities import BoxAuth, BoxAuthType
# Test auth types
def test_direct_token_initialization() -> None:
loader = BoxLoader( # type: ignore[call-arg]
box_developer_token="box_developer_token",
box_file_ids=["box_file_ids"],
)
assert loader.box_developer_token == "box_developer_token"
assert loader.box_file_ids == ["box_file_ids"]
def test_failed_direct_token_initialization() -> None:
with pytest.raises(ValueError):
loader = BoxLoader(box_file_ids=["box_file_ids"]) # type: ignore[call-arg] # noqa: F841
def test_auth_initialization() -> None:
auth = BoxAuth(
auth_type=BoxAuthType.TOKEN, box_developer_token="box_developer_token"
)
loader = BoxLoader( # type: ignore[call-arg]
box_auth=auth,
box_file_ids=["box_file_ids"],
)
assert loader.box_file_ids == ["box_file_ids"]
# test loaders
def test_failed_file_initialization() -> None:
with pytest.raises(ValueError):
loader = BoxLoader(box_developer_token="box_developer_token") # type: ignore[call-arg] # noqa: F841
def test_folder_initialization() -> None:
loader = BoxLoader( # type: ignore[call-arg]
box_developer_token="box_developer_token",
box_folder_id="box_folder_id",
)
assert loader.box_developer_token == "box_developer_token"
assert loader.box_folder_id == "box_folder_id"
def test_failed_initialization_files_and_folders() -> None:
with pytest.raises(ValueError):
loader = BoxLoader( # type: ignore[call-arg] # noqa: F841
box_developer_token="box_developer_token",
box_folder_id="box_folder_id",
box_file_ids=["box_file_ids"],
)
# test Document retrieval
def test_file_load(mocker: MockerFixture) -> None:
mocker.patch(
"langchain_box.utilities._BoxAPIWrapper.get_document_by_file_id",
return_value=[],
)
loader = BoxLoader( # type: ignore[call-arg]
box_developer_token="box_developer_token",
box_file_ids=["box_file_ids"],
)
documents = loader.load()
assert documents
mocker.patch(
"langchain_box.utilities._BoxAPIWrapper.get_document_by_file_id",
return_value=(
Document(
page_content="Test file mode\ndocument contents",
metadata={"title": "Testing Files"},
)
),
)
loader = BoxLoader( # type: ignore[call-arg]
box_developer_token="box_developer_token",
box_file_ids=["box_file_ids"],
)
documents = loader.load()
assert documents == [
Document(
page_content="Test file mode\ndocument contents",
metadata={"title": "Testing Files"},
)
]

View File

@ -1,203 +0,0 @@
import pytest
from langchain_core.documents import Document
from pytest_mock import MockerFixture
from langchain_box.retrievers import BoxRetriever
from langchain_box.utilities import (
BoxAuth,
BoxAuthType,
BoxSearchOptions,
DocumentFiles,
SearchTypeFilter,
)
# Test auth types
def test_direct_token_initialization() -> None:
retriever = BoxRetriever( # type: ignore[call-arg]
box_developer_token="box_developer_token",
box_file_ids=["box_file_ids"],
)
assert retriever.box_developer_token == "box_developer_token"
assert retriever.box_file_ids == ["box_file_ids"]
def test_failed_direct_token_initialization() -> None:
with pytest.raises(ValueError):
retriever = BoxRetriever(box_file_ids=["box_file_ids"]) # type: ignore[call-arg] # noqa: F841
def test_auth_initialization() -> None:
auth = BoxAuth(
auth_type=BoxAuthType.TOKEN, box_developer_token="box_developer_token"
)
retriever = BoxRetriever( # type: ignore[call-arg]
box_auth=auth,
box_file_ids=["box_file_ids"],
)
assert retriever.box_file_ids == ["box_file_ids"]
# test search retrieval
def test_search(mocker: MockerFixture) -> None:
mocker.patch(
"langchain_box.utilities._BoxAPIWrapper.search_box",
return_value=(
[
Document(
page_content="Test file mode\ndocument contents",
metadata={"title": "Testing Files"},
)
]
),
)
retriever = BoxRetriever( # type: ignore[call-arg]
box_developer_token="box_developer_token"
)
documents = retriever.invoke("query")
assert documents == [
Document(
page_content="Test file mode\ndocument contents",
metadata={"title": "Testing Files"},
)
]
# test search options
def test_search_options(mocker: MockerFixture) -> None:
mocker.patch(
"langchain_box.utilities._BoxAPIWrapper.search_box",
return_value=(
[
Document(
page_content="Test file mode\ndocument contents",
metadata={"title": "Testing Files"},
)
]
),
)
box_search_options = BoxSearchOptions(
ancestor_folder_ids=["box_folder_id"],
search_type_filter=[SearchTypeFilter.FILE_CONTENT],
created_date_range=["2023-01-01T00:00:00-07:00", "2024-08-01T00:00:00-07:00,"],
file_extensions=[DocumentFiles.DOCX, DocumentFiles.PDF],
k=200,
size_range=[1, 1000000],
updated_date_range=None,
)
retriever = BoxRetriever( # type: ignore[call-arg]
box_developer_token="box_developer_token", box_search_options=box_search_options
)
documents = retriever.invoke("query")
assert documents == [
Document(
page_content="Test file mode\ndocument contents",
metadata={"title": "Testing Files"},
)
]
# test ai retrieval
def test_ai(mocker: MockerFixture) -> None:
mocker.patch(
"langchain_box.utilities._BoxAPIWrapper.ask_box_ai",
return_value=(
[
Document(
page_content="Test file mode\ndocument contents",
metadata={"title": "Testing Files"},
)
]
),
)
retriever = BoxRetriever( # type: ignore[call-arg]
box_developer_token="box_developer_token", box_file_ids=["box_file_ids"]
)
documents = retriever.invoke("query")
assert documents == [
Document(
page_content="Test file mode\ndocument contents",
metadata={"title": "Testing Files"},
)
]
# test ai retrieval with answer and citations
def test_ai_answer_citations(mocker: MockerFixture) -> None:
mocker.patch(
"langchain_box.utilities._BoxAPIWrapper.ask_box_ai",
return_value=(
[
Document(
page_content="Test file mode\ndocument contents",
metadata={"title": "Testing Files"},
),
Document(page_content="citation 1", metadata={"source": "source 1"}),
Document(page_content="citation 2", metadata={"source": "source 2"}),
Document(page_content="citation 3", metadata={"source": "source 3"}),
Document(page_content="citation 4", metadata={"source": "source 4"}),
Document(page_content="citation 5", metadata={"source": "source 5"}),
]
),
)
retriever = BoxRetriever( # type: ignore[call-arg]
box_developer_token="box_developer_token",
box_file_ids=["box_file_ids"],
citations=True,
)
documents = retriever.invoke("query")
assert documents == [
Document(
page_content="Test file mode\ndocument contents",
metadata={"title": "Testing Files"},
),
Document(page_content="citation 1", metadata={"source": "source 1"}),
Document(page_content="citation 2", metadata={"source": "source 2"}),
Document(page_content="citation 3", metadata={"source": "source 3"}),
Document(page_content="citation 4", metadata={"source": "source 4"}),
Document(page_content="citation 5", metadata={"source": "source 5"}),
]
# test ai retrieval with citations only
def test_ai_citations_only(mocker: MockerFixture) -> None:
mocker.patch(
"langchain_box.utilities._BoxAPIWrapper.ask_box_ai",
return_value=(
[
Document(page_content="citation 1", metadata={"source": "source 1"}),
Document(page_content="citation 2", metadata={"source": "source 2"}),
Document(page_content="citation 3", metadata={"source": "source 3"}),
Document(page_content="citation 4", metadata={"source": "source 4"}),
Document(page_content="citation 5", metadata={"source": "source 5"}),
]
),
)
retriever = BoxRetriever( # type: ignore[call-arg]
box_developer_token="box_developer_token",
box_file_ids=["box_file_ids"],
citations=True,
)
documents = retriever.invoke("query")
assert documents == [
Document(page_content="citation 1", metadata={"source": "source 1"}),
Document(page_content="citation 2", metadata={"source": "source 2"}),
Document(page_content="citation 3", metadata={"source": "source 3"}),
Document(page_content="citation 4", metadata={"source": "source 4"}),
Document(page_content="citation 5", metadata={"source": "source 5"}),
]

View File

@ -1,17 +0,0 @@
from langchain_box import __all__
EXPECTED_ALL = [
"BoxLoader",
"BoxRetriever",
"BoxAuth",
"BoxAuthType",
"BoxSearchOptions",
"DocumentFiles",
"SearchTypeFilter",
"_BoxAPIWrapper",
"__version__",
]
def test_all_imports() -> None:
assert sorted(EXPECTED_ALL) == sorted(__all__)

View File

@ -1,230 +0,0 @@
from unittest.mock import Mock
import pytest
from langchain_core.documents import Document
from pydantic.error_wrappers import ValidationError
from pytest_mock import MockerFixture
from langchain_box.utilities import BoxAuth, BoxAuthType, _BoxAPIWrapper
@pytest.fixture()
def mock_worker(mocker: MockerFixture) -> None:
mocker.patch("langchain_box.utilities.BoxAuth._authorize", return_value=Mock())
mocker.patch("langchain_box.utilities.BoxAuth.get_client", return_value=Mock())
mocker.patch(
"langchain_box.utilities._BoxAPIWrapper._get_text_representation",
return_value=("filename", "content", "url"),
)
# Test auth types
def test_token_initialization() -> None:
auth = BoxAuth(
auth_type=BoxAuthType.TOKEN, box_developer_token="box_developer_token"
)
assert auth.auth_type == "token"
assert auth.box_developer_token == "box_developer_token"
def test_failed_token_initialization() -> None:
with pytest.raises(ValidationError):
auth = BoxAuth(auth_type=BoxAuthType.TOKEN) # noqa: F841
def test_jwt_eid_initialization() -> None:
auth = BoxAuth(auth_type=BoxAuthType.JWT, box_jwt_path="box_jwt_path")
assert auth.auth_type == "jwt"
assert auth.box_jwt_path == "box_jwt_path"
def test_jwt_user_initialization() -> None:
auth = BoxAuth(
auth_type=BoxAuthType.JWT,
box_jwt_path="box_jwt_path",
box_user_id="box_user_id",
)
assert auth.auth_type == "jwt"
assert auth.box_jwt_path == "box_jwt_path"
assert auth.box_user_id == "box_user_id"
def test_failed_jwt_initialization() -> None:
with pytest.raises(ValidationError):
auth = BoxAuth(auth_type=BoxAuthType.JWT, box_user_id="box_user_id") # noqa: F841
def test_ccg_eid_initialization() -> None:
auth = BoxAuth(
auth_type=BoxAuthType.CCG,
box_client_id="box_client_id",
box_client_secret="box_client_secret",
box_enterprise_id="box_enterprise_id",
)
assert auth.auth_type == "ccg"
assert auth.box_client_id == "box_client_id"
assert auth.box_client_secret == "box_client_secret"
assert auth.box_enterprise_id == "box_enterprise_id"
def test_ccg_user_initialization() -> None:
auth = BoxAuth(
auth_type=BoxAuthType.CCG,
box_client_id="box_client_id",
box_client_secret="box_client_secret",
box_enterprise_id="box_enterprise_id",
box_user_id="box_user_id",
)
assert auth.auth_type == "ccg"
assert auth.box_client_id == "box_client_id"
assert auth.box_client_secret == "box_client_secret"
assert auth.box_enterprise_id == "box_enterprise_id"
assert auth.box_user_id == "box_user_id"
def test_failed_ccg_initialization() -> None:
with pytest.raises(ValidationError):
auth = BoxAuth(auth_type=BoxAuthType.CCG) # noqa: F841
def test_direct_token_initialization() -> None:
box = _BoxAPIWrapper( # type: ignore[call-arg]
box_developer_token="box_developer_token"
)
assert box.box_developer_token == "box_developer_token"
def test_auth_initialization() -> None:
auth = BoxAuth(
auth_type=BoxAuthType.TOKEN, box_developer_token="box_developer_token"
)
box = _BoxAPIWrapper(box_auth=auth) # type: ignore[call-arg] # noqa: F841
assert auth.box_developer_token == "box_developer_token"
def test_failed_initialization_no_auth() -> None:
with pytest.raises(ValidationError):
box = _BoxAPIWrapper() # type: ignore[call-arg] # noqa: F841
def test_get_documents_by_file_ids(mock_worker, mocker: MockerFixture) -> None: # type: ignore[no-untyped-def]
mocker.patch(
"langchain_box.utilities._BoxAPIWrapper.get_document_by_file_id",
return_value=(
Document(
page_content="content", metadata={"source": "url", "title": "filename"}
)
),
)
box = _BoxAPIWrapper(box_developer_token="box_developer_token") # type: ignore[call-arg]
documents = box.get_document_by_file_id("box_file_id")
assert documents == Document(
page_content="content", metadata={"source": "url", "title": "filename"}
)
def test_get_documents_by_folder_id(mock_worker, mocker: MockerFixture) -> None: # type: ignore[no-untyped-def]
mocker.patch(
"langchain_box.utilities._BoxAPIWrapper.get_folder_items",
return_value=([{"id": "file_id", "type": "file"}]),
)
box = _BoxAPIWrapper(box_developer_token="box_developer_token") # type: ignore[call-arg]
folder_contents = box.get_folder_items("box_folder_id")
assert folder_contents == [{"id": "file_id", "type": "file"}]
def test_box_search(mock_worker, mocker: MockerFixture) -> None: # type: ignore[no-untyped-def]
mocker.patch(
"langchain_box.utilities._BoxAPIWrapper.search_box",
return_value=(
[
Document(
page_content="Test file mode\ndocument contents",
metadata={"title": "Testing Files"},
)
]
),
)
box = _BoxAPIWrapper(box_developer_token="box_developer_token") # type: ignore[call-arg]
documents = box.search_box("query")
assert documents == [
Document(
page_content="Test file mode\ndocument contents",
metadata={"title": "Testing Files"},
)
]
def test_ask_box_ai_single_file(mock_worker, mocker: MockerFixture) -> None: # type: ignore[no-untyped-def]
mocker.patch(
"langchain_box.utilities._BoxAPIWrapper.ask_box_ai",
return_value=(
[
Document(
page_content="Test file mode\ndocument contents",
metadata={"title": "Testing Files"},
)
]
),
)
box = _BoxAPIWrapper( # type: ignore[call-arg]
box_developer_token="box_developer_token", box_file_ids=["box_file_ids"]
)
documents = box.ask_box_ai("query") # type: ignore[call-arg]
assert documents == [
Document(
page_content="Test file mode\ndocument contents",
metadata={"title": "Testing Files"},
)
]
def test_ask_box_ai_multiple_files(mock_worker, mocker: MockerFixture) -> None: # type: ignore[no-untyped-def]
mocker.patch(
"langchain_box.utilities._BoxAPIWrapper.ask_box_ai",
return_value=(
[
Document(
page_content="Test file 1 mode\ndocument contents",
metadata={"title": "Test File 1"},
),
Document(
page_content="Test file 2 mode\ndocument contents",
metadata={"title": "Test File 2"},
),
]
),
)
box = _BoxAPIWrapper( # type: ignore[call-arg]
box_developer_token="box_developer_token",
box_file_ids=["box_file_id 1", "box_file_id 2"],
)
documents = box.ask_box_ai("query") # type: ignore[call-arg]
assert documents == [
Document(
page_content="Test file 1 mode\ndocument contents",
metadata={"title": "Test File 1"},
),
Document(
page_content="Test file 2 mode\ndocument contents",
metadata={"title": "Test File 2"},
),
]