box[patch]: fix bugs in docs (#25699)

This commit is contained in:
Scott Hurrey 2024-08-23 15:36:23 -04:00 committed by GitHub
parent 64ace25eb8
commit 92abf62292
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 195 additions and 78 deletions

View File

@ -15,7 +15,7 @@
"source": [ "source": [
"# BoxLoader\n", "# BoxLoader\n",
"\n", "\n",
"This notebook provides a quick overview for getting started with Box [document loader](/docs/integrations/document_loaders/). For detailed documentation of all BoxLoader features and configurations head to the [API reference](https://python.langchain.com/v0.2/api_reference/community/document_loaders/langchain_community.document_loaders.langchain_box_loader.BoxLoader.html).\n", "This notebook provides a quick overview for getting started with Box [document loader](/docs/integrations/document_loaders/). For detailed documentation of all BoxLoader features and configurations head to the [API reference](https://python.langchain.com/v0.2/api_reference/box/document_loaders/langchain_box.document_loaders.box.BoxLoader.html).\n",
"\n", "\n",
"\n", "\n",
"## Overview\n", "## Overview\n",
@ -34,7 +34,7 @@
"\n", "\n",
"| Class | Package | Local | Serializable | JS support|\n", "| Class | Package | Local | Serializable | JS support|\n",
"| :--- | :--- | :---: | :---: | :---: |\n", "| :--- | :--- | :---: | :---: | :---: |\n",
"| [BoxLoader](https://python.langchain.com/v0.2/api_reference/box/document_loaders/langchain_box.document_loaders.langchain_boxloader.BoxLoader.html) | [langchain_box](https://python.langchain.com/v0.2/api_reference/box/index.html) | ✅ | ❌ | ❌ | \n", "| [BoxLoader](https://python.langchain.com/v0.2/api_reference/box/document_loaders/langchain_box.document_loaders.box.BoxLoader.html) | [langchain_box](https://python.langchain.com/v0.2/api_reference/box/index.html) | ✅ | ❌ | ❌ | \n",
"### Loader features\n", "### Loader features\n",
"| Source | Document Lazy Loading | Async Support\n", "| Source | Document Lazy Loading | Async Support\n",
"| :---: | :---: | :---: | \n", "| :---: | :---: | :---: | \n",
@ -244,7 +244,7 @@
"source": [ "source": [
"## API reference\n", "## API reference\n",
"\n", "\n",
"For detailed documentation of all BoxLoader features and configurations head to the API reference: https://python.langchain.com/v0.2/api_reference/box/document_loaders/langchain_box.document_loaders.langchain_box_loader.BoxLoader.html\n", "For detailed documentation of all BoxLoader features and configurations head to the [API reference](https://python.langchain.com/v0.2/api_reference/box/document_loaders/langchain_box.document_loaders.box.BoxLoader.html)\n",
"\n", "\n",
"\n", "\n",
"## Help\n", "## Help\n",

View File

@ -9,8 +9,8 @@ In this package, we make available a number of ways to include Box content in yo
### Installation and setup ### Installation and setup
```text ```bash
%pip install -U langchain-box pip install -U langchain-box
``` ```
@ -31,12 +31,6 @@ In order to integrate with Box, you need a few things:
The good news is if you are using a free developer account, you are the admin. The good news is if you are using a free developer account, you are the admin.
[Authorize your app](https://developer.box.com/guides/authorization/custom-app-approval/#manual-approval) [Authorize your app](https://developer.box.com/guides/authorization/custom-app-approval/#manual-approval)
## Installation
```bash
pip install -U langchain-box
```
## Authentication ## Authentication
The `box-langchain` package offers some flexibility to authentication. The The `box-langchain` package offers some flexibility to authentication. The

View File

@ -17,7 +17,7 @@
"source": [ "source": [
"# BoxRetriever\n", "# BoxRetriever\n",
"\n", "\n",
"This will help you getting started with the Box [retriever](/docs/concepts/#retrievers). For detailed documentation of all BoxRetriever features and configurations head to the [API reference](https://python.langchain.com/v0.2/api_reference/box/retrievers/langchain_box.retrievers.Box.BoxRetriever.html).\n", "This will help you getting started with the Box [retriever](/docs/concepts/#retrievers). For detailed documentation of all BoxRetriever features and configurations head to the [API reference](https://python.langchain.com/v0.2/api_reference/box/retrievers/langchain_box.retrievers.box.BoxRetriever.html).\n",
"\n", "\n",
"# Overview\n", "# Overview\n",
"\n", "\n",
@ -35,7 +35,7 @@
"\n", "\n",
"| Retriever | Self-host | Cloud offering | Package |\n", "| Retriever | Self-host | Cloud offering | Package |\n",
"| :--- | :--- | :---: | :---: |\n", "| :--- | :--- | :---: | :---: |\n",
"[BoxRetriever](https://python.langchain.com/v0.2/api_reference/langchain-box/retrievers/langchain-box.retrievers.langchain_box.BoxRetriever.html) | ❌ | ✅ | langchain-box |\n", "[BoxRetriever](https://python.langchain.com/v0.2/api_reference/box/retrievers/langchain_box.retrievers.box.BoxRetriever.html) | ❌ | ✅ | langchain-box |\n",
"\n", "\n",
"## Setup\n", "## Setup\n",
"\n", "\n",
@ -290,7 +290,7 @@
"source": [ "source": [
"## API reference\n", "## API reference\n",
"\n", "\n",
"For detailed documentation of all BoxRetriever features and configurations head to the [API reference](https://python.langchain.com/v0.2/api_reference/box/retrievers/langchain_box.retrievers.Box.BoxRetriever.html).\n", "For detailed documentation of all BoxRetriever features and configurations head to the [API reference](https://python.langchain.com/v0.2/api_reference/box/retrievers/langchain_box.retrievers.box.BoxRetriever.html).\n",
"\n", "\n",
"\n", "\n",
"## Help\n", "## Help\n",

View File

@ -21,11 +21,12 @@ class BoxLoader(BaseLoader, BaseModel):
Plus plan or above. The free developer account does not have access to Box AI. Plus plan or above. The free developer account does not have access to Box AI.
In addition, using the Box AI API requires a few prerequisite steps: In addition, using the Box AI API requires a few prerequisite steps:
* Your administrator must enable the Box AI API
* You must enable the `Manage AI` scope in your app in the developer console.
* Your administratormust install and enable your application.
Setup: * Your administrator must enable the Box AI API
* You must enable the ``Manage AI`` scope in your app in the developer console.
* Your administrator must install and enable your application.
**Setup**:
Install ``langchain-box`` and set environment variable ``BOX_DEVELOPER_TOKEN``. Install ``langchain-box`` and set environment variable ``BOX_DEVELOPER_TOKEN``.
.. code-block:: bash .. code-block:: bash
@ -34,7 +35,7 @@ class BoxLoader(BaseLoader, BaseModel):
export BOX_DEVELOPER_TOKEN="your-api-key" export BOX_DEVELOPER_TOKEN="your-api-key"
This loader returns ``Document `` objects built from text representations of files This loader returns ``Document`` objects built from text representations of files
in Box. It will skip any document without a text representation available. You can in Box. It will skip any document without a text representation available. You can
provide either a ``List[str]`` containing Box file IDS, or you can provide a provide either a ``List[str]`` containing Box file IDS, or you can provide a
``str`` contining a Box folder ID. If providing a folder ID, you can also enable ``str`` contining a Box folder ID. If providing a folder ID, you can also enable
@ -46,19 +47,44 @@ class BoxLoader(BaseLoader, BaseModel):
recommend never getting all files from folder 0 recursively. Folder ID 0 is your recommend never getting all files from folder 0 recursively. Folder ID 0 is your
root folder. root folder.
Instantiate: **Instantiate**:
Initialization variables .. list-table:: Initialization variables
variable | description | type | required :widths: 25 50 15 10
---+---+--- :header-rows: 1
box_developer_token | token to use for auth. | string | no
box_auth | client id for you app. Used for CCG | string | no
box_file_ids | Array of Box file Ids to retrieve | array of strings | no
box_folder_id | Box folder id to retrieve | string | no
recursive | whether to return subfolders, default False | bool | no
Get files this method requires you pass the ``box_file_ids`` parameter. This is a * - Variable
``List[str]`` containing the file IDs you wish to index. - Description
- Type
- Default
* - box_developer_token
- Token to use for auth.
- ``str``
- ``None``
* - box_auth
- client id for you app. Used for CCG
- ``langchain_box.utilities.BoxAuth``
- ``None``
* - box_file_ids
- client id for you app. Used for CCG
- ``List[str]``
- ``None``
* - box_folder_id
- client id for you app. Used for CCG
- ``str``
- ``None``
* - recursive
- client id for you app. Used for CCG
- ``Bool``
- ``False``
* - character_limit
- client id for you app. Used for CCG
- ``int``
- ``-1``
**Get files** this method requires you pass the ``box_file_ids`` parameter.
This is a ``List[str]`` containing the file IDs you wish to index.
.. code-block:: python .. code-block:: python
@ -71,7 +97,7 @@ class BoxLoader(BaseLoader, BaseModel):
character_limit=10000 # Optional. Defaults to no limit character_limit=10000 # Optional. Defaults to no limit
) )
Get files in a folder this method requires you pass the ``box_folder_id`` **Get files in a folder** this method requires you pass the ``box_folder_id``
parameter. This is a ``str`` containing the folder ID you wish to index. parameter. This is a ``str`` containing the folder ID you wish to index.
.. code-block:: python .. code-block:: python
@ -85,7 +111,7 @@ class BoxLoader(BaseLoader, BaseModel):
recursive=False # Optional. return entire tree, defaults to False recursive=False # Optional. return entire tree, defaults to False
) )
Load: **Load**:
.. code-block:: python .. code-block:: python
docs = loader.load() docs = loader.load()
@ -96,11 +122,11 @@ class BoxLoader(BaseLoader, BaseModel):
Document(metadata={'source': 'https://dl.boxcloud.com/api/2.0/ Document(metadata={'source': 'https://dl.boxcloud.com/api/2.0/
internal_files/1514555423624/versions/1663171610024/representations internal_files/1514555423624/versions/1663171610024/representations
/extracted_text/content/', 'title': 'Invoice-A5555_txt'}, /extracted_text/content/', 'title': 'Invoice-A5555_txt'},
page_content='Vendor: AstroTech Solutions\nInvoice Number: A5555\n\nLine page_content='Vendor: AstroTech Solutions\\nInvoice Number: A5555\\n\\nLine
Items:\n - Gravitational Wave Detector Kit: $800\n - Exoplanet Items:\\n - Gravitational Wave Detector Kit: $800\\n - Exoplanet
Terrarium: $120\nTotal: $920') Terrarium: $120\\nTotal: $920')
Lazy load: **Lazy load**:
.. code-block:: python .. code-block:: python
docs = [] docs = []
@ -116,16 +142,19 @@ class BoxLoader(BaseLoader, BaseModel):
Document(metadata={'source': 'https://dl.boxcloud.com/api/2.0/ Document(metadata={'source': 'https://dl.boxcloud.com/api/2.0/
internal_files/1514555423624/versions/1663171610024/representations internal_files/1514555423624/versions/1663171610024/representations
/extracted_text/content/', 'title': 'Invoice-A5555_txt'}, /extracted_text/content/', 'title': 'Invoice-A5555_txt'},
page_content='Vendor: AstroTech Solutions\nInvoice Number: A5555\n\nLine page_content='Vendor: AstroTech Solutions\\nInvoice Number: A5555\\n\\nLine
Items:\n - Gravitational Wave Detector Kit: $800\n - Exoplanet Items:\\n - Gravitational Wave Detector Kit: $800\\n - Exoplanet
Terrarium: $120\nTotal: $920') Terrarium: $120\\nTotal: $920')
""" """
box_developer_token: Optional[str] = None box_developer_token: Optional[str] = None
"""String containing the Box Developer Token generated in the developer console""" """String containing the Box Developer Token generated in the developer console"""
box_auth: Optional[BoxAuth] = None box_auth: Optional[BoxAuth] = None
"""Configured langchain_box.utilities.BoxAuth object""" """Configured
`BoxAuth <https://python.langchain.com/v0.2/api_reference/box/utilities/langchain_box.utilities.box.BoxAuth.html>`_
object"""
box_file_ids: Optional[List[str]] = None box_file_ids: Optional[List[str]] = None
"""List[str] containing Box file ids""" """List[str] containing Box file ids"""

View File

@ -32,6 +32,7 @@ class BoxRetriever(BaseRetriever):
Instantiate: Instantiate:
To use search: To use search:
.. code-block:: python .. code-block:: python
from langchain_box.retrievers import BoxRetriever from langchain_box.retrievers import BoxRetriever
@ -39,6 +40,7 @@ class BoxRetriever(BaseRetriever):
retriever = BoxRetriever() retriever = BoxRetriever()
To use Box AI: To use Box AI:
.. code-block:: python .. code-block:: python
from langchain_box.retrievers import BoxRetriever from langchain_box.retrievers import BoxRetriever
@ -114,10 +116,13 @@ class BoxRetriever(BaseRetriever):
"""String containing the Box Developer Token generated in the developer console""" """String containing the Box Developer Token generated in the developer console"""
box_auth: Optional[BoxAuth] = None box_auth: Optional[BoxAuth] = None
"""Configured langchain_box.utilities.BoxAuth object""" """Configured
`BoxAuth <https://python.langchain.com/v0.2/api_reference/box/utilities/langchain_box.utilities.box.BoxAuth.html>`_
object"""
box_file_ids: Optional[List[str]] = None box_file_ids: Optional[List[str]] = None
"""List[str] containing Box file ids""" """List[str] containing Box file ids"""
character_limit: Optional[int] = -1 character_limit: Optional[int] = -1
"""character_limit is an int that caps the number of characters to """character_limit is an int that caps the number of characters to
return per document.""" return per document."""

View File

@ -134,52 +134,63 @@ class BoxAuthType(Enum):
an enum to tell BoxLoader how you wish to autheticate your Box connection. an enum to tell BoxLoader how you wish to autheticate your Box connection.
Options are: Options are:
TOKEN - Use a developer token generated from the Box Deevloper Token. TOKEN - Use a developer token generated from the Box Deevloper Token.
Only recommended for development. Only recommended for development.
Provide `box_developer_token`. Provide ``box_developer_token``.
CCG - Client Credentials Grant. CCG - Client Credentials Grant.
provide `box_client_id`, `box_client_secret`, provide ``box_client_id`, ``box_client_secret`,
and `box_enterprise_id` or optionally `box_user_id`. and ``box_enterprise_id`` or optionally `box_user_id`.
JWT - Use JWT for authentication. Config should be stored on the file JWT - Use JWT for authentication. Config should be stored on the file
system accessible to your app. system accessible to your app.
provide `box_jwt_path`. Optionally, provide `box_user_id` to provide ``box_jwt_path``. Optionally, provide ``box_user_id`` to
act as a specific user act as a specific user
""" """
TOKEN = "token" TOKEN = "token"
"""Use a developer token or a token retrieved from box-sdk-gen""" """Use a developer token or a token retrieved from ``box-sdk-gen``"""
CCG = "ccg" CCG = "ccg"
"""Use `client_credentials` type grant""" """Use ``client_credentials`` type grant"""
JWT = "jwt" JWT = "jwt"
"""Use JWT bearer token auth""" """Use JWT bearer token auth"""
class BoxAuth(BaseModel): class BoxAuth(BaseModel):
"""BoxAuth. """**BoxAuth.**
The ``box-langchain`` package offers some flexibility to authentication. The
most basic authentication method is by using a developer token. This can be
found in the `Box developer console <https://account.box.com/developers/console>`_
on the configuration screen. This token is purposely short-lived (1 hour) and is
intended for development. With this token, you can add it to your environment as
``BOX_DEVELOPER_TOKEN``, you can pass it directly to the loader, or you can use the
``BoxAuth`` authentication helper class.
`BoxAuth` supports the following authentication methods: `BoxAuth` supports the following authentication methods:
* Token either a developer token or any token generated through the Box SDK * **Token** either a developer token or any token generated through the Box SDK
* JWT with a service account * **JWT** with a service account
* JWT with a specified user * **JWT** with a specified user
* CCG with a service account * **CCG** with a service account
* CCG with a specified user * **CCG** with a specified user
.. note:: .. note::
If using JWT authentication, you will need to download the configuration from the If using JWT authentication, you will need to download the configuration from
Box developer console after generating your public/private key pair. Place this the Box developer console after generating your public/private key pair. Place
file in your application directory structure somewhere. You will use the path to this file in your application directory structure somewhere. You will use the
this file when using the `BoxAuth` helper class. path to this file when using the ``BoxAuth`` helper class. If you wish to use
OAuth2 with the authorization_code flow, please use ``BoxAuthType.TOKEN`` with
the token you have acquired.
For more information, learn about how to For more information, learn about how to
[set up a Box application](https://developer.box.com/guides/getting-started/first-application/), `set up a Box application <https://developer.box.com/guides/getting-started/first-application/>`_,
and check out the and check out the
[Box authentication guide](https://developer.box.com/guides/authentication/select/) `Box authentication guide <https://developer.box.com/guides/authentication/select/>`_
for more about our different authentication options. for more about our different authentication options.
Simple implementation Simple implementation:
To instantiate, you must provide a ``langchain_box.utilities.BoxAuthType``. To instantiate, you must provide a ``langchain_box.utilities.BoxAuthType``.
@ -187,18 +198,24 @@ class BoxAuth(BaseModel):
Box connection. Box connection.
Options are: Options are:
TOKEN - Use a developer token generated from the Box Deevloper Token. TOKEN - Use a developer token generated from the Box Deevloper Token.
Only recommended for development. Only recommended for development.
Provide `box_developer_token`. Provide ``box_developer_token``.
CCG - Client Credentials Grant. CCG - Client Credentials Grant.
provide `box_client_id`, `box_client_secret`, provide ``box_client_id``, ``box_client_secret``,
and `box_enterprise_id` or optionally `box_user_id`. and ``box_enterprise_id`` or optionally ``box_user_id``.
JWT - Use JWT for authentication. Config should be stored on the file JWT - Use JWT for authentication. Config should be stored on the file
system accessible to your app. system accessible to your app.
provide `box_jwt_path`. Optionally, provide `box_user_id` to provide ``box_jwt_path``. Optionally, provide ``box_user_id`` to
act as a specific user act as a specific user
**Examples**:
**Token**
.. code-block:: python .. code-block:: python
from langchain_box.document_loaders import BoxLoader from langchain_box.document_loaders import BoxLoader
from langchain_box.utilities import BoxAuth, BoxAuthType from langchain_box.utilities import BoxAuth, BoxAuthType
@ -212,37 +229,109 @@ class BoxAuth(BaseModel):
... ...
) )
To see examples for each supported authentication methodology, visit the
[Box providers](/docs/integrations/providers/box) page. If you want to **JWT with a service account**
use OAuth 2.0 `authorization_code` flow, use
[box-sdk-gen](https://github.com/box/box-python-sdk-gen) SDK, get your .. code-block:: python
token, and use `BoxAuthType.TOKEN` type.
from langchain_box.document_loaders import BoxLoader
from langchain_box.utilities import BoxAuth, BoxAuthType
auth = BoxAuth(
auth_type=BoxAuthType.JWT,
box_jwt_path=box_jwt_path
)
loader = BoxLoader(
box_auth=auth,
...
)
**JWT with a specified user**
.. code-block:: python
from langchain_box.document_loaders import BoxLoader
from langchain_box.utilities import BoxAuth, BoxAuthType
auth = BoxAuth(
auth_type=BoxAuthType.JWT,
box_jwt_path=box_jwt_path,
box_user_id=box_user_id
)
loader = BoxLoader(
box_auth=auth,
...
)
**CCG with a service account**
.. code-block:: python
from langchain_box.document_loaders import BoxLoader
from langchain_box.utilities import BoxAuth, BoxAuthType
auth = BoxAuth(
auth_type=BoxAuthType.CCG,
box_client_id=box_client_id,
box_client_secret=box_client_secret,
box_enterprise_id=box_enterprise_id
)
loader = BoxLoader(
box_auth=auth,
...
)
**CCG with a specified user**
.. code-block:: python
from langchain_box.document_loaders import BoxLoader
from langchain_box.utilities import BoxAuth, BoxAuthType
auth = BoxAuth(
auth_type=BoxAuthType.CCG,
box_client_id=box_client_id,
box_client_secret=box_client_secret,
box_user_id=box_user_id
)
loader = BoxLoader(
box_auth=auth,
...
)
""" """
auth_type: BoxAuthType auth_type: BoxAuthType
"""langchain_box.utilities.BoxAuthType. Enum describing how to """``langchain_box.utilities.BoxAuthType``. Enum describing how to
authenticate against Box""" authenticate against Box"""
box_developer_token: Optional[str] = None box_developer_token: Optional[str] = None
""" If using BoxAuthType.TOKEN, provide your token here""" """ If using ``BoxAuthType.TOKEN``, provide your token here"""
box_jwt_path: Optional[str] = None box_jwt_path: Optional[str] = None
"""If using BoxAuthType.JWT, provide local path to your """If using ``BoxAuthType.JWT``, provide local path to your
JWT configuration file""" JWT configuration file"""
box_client_id: Optional[str] = None box_client_id: Optional[str] = None
"""If using BoxAuthType.CCG, provide your app's client ID""" """If using ``BoxAuthType.CCG``, provide your app's client ID"""
box_client_secret: Optional[str] = None box_client_secret: Optional[str] = None
"""If using BoxAuthType.CCG, provide your app's client secret""" """If using ``BoxAuthType.CCG``, provide your app's client secret"""
box_enterprise_id: Optional[str] = None box_enterprise_id: Optional[str] = None
"""If using BoxAuthType.CCG, provide your enterprise ID. """If using ``BoxAuthType.CCG``, provide your enterprise ID.
Only required if you are not sending `box_user_id`""" Only required if you are not sending ``box_user_id``"""
box_user_id: Optional[str] = None box_user_id: Optional[str] = None
"""If using BoxAuthType.CCG or BoxAuthType.JWT, providing """If using ``BoxAuthType.CCG`` or ``BoxAuthType.JWT``, providing
`box_user_id` will act on behalf of a specific user""" ``box_user_id`` will act on behalf of a specific user"""
_box_client: Optional[box_sdk_gen.BoxClient] = None _box_client: Optional[box_sdk_gen.BoxClient] = None
_custom_header: Dict = dict({"x-box-ai-library": "langchain"}) _custom_header: Dict = dict({"x-box-ai-library": "langchain"})