Compare commits
135 Commits
bagatur/ve
...
bagatur/0.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0495ca0d10 | ||
|
|
aa9ccca775 | ||
|
|
68298cdc82 | ||
|
|
d9396bdec1 | ||
|
|
7d216ad1e1 | ||
|
|
455a74486b | ||
|
|
5ac1860484 | ||
|
|
9525e392de | ||
|
|
140f06e59a | ||
|
|
280a914920 | ||
|
|
9dfce56b31 | ||
|
|
00614f332a | ||
|
|
3c4529ac69 | ||
|
|
e46419c851 | ||
|
|
8609afbd10 | ||
|
|
4c2e887276 | ||
|
|
a84310cdcb | ||
|
|
58b8747c44 | ||
|
|
c57e506f9c | ||
|
|
068620a871 | ||
|
|
40f846e65d | ||
|
|
4812403b48 | ||
|
|
69e9610f62 | ||
|
|
e5d7e455dc | ||
|
|
7ad0a3f2a7 | ||
|
|
ed75bccda8 | ||
|
|
5c194ee224 | ||
|
|
305d74c67a | ||
|
|
89af30807b | ||
|
|
f8078e41e5 | ||
|
|
30e4a35d7a | ||
|
|
17c62e0f3a | ||
|
|
7eb376d5fc | ||
|
|
2c835baae4 | ||
|
|
4b3dd34544 | ||
|
|
d314acb2d5 | ||
|
|
50f93d86ec | ||
|
|
4761c09e94 | ||
|
|
ff31cc1648 | ||
|
|
f36418a5b0 | ||
|
|
9b2f9ee952 | ||
|
|
6f544a6a25 | ||
|
|
b82644078e | ||
|
|
bb0dd8f82f | ||
|
|
7afecec280 | ||
|
|
efcdf54edd | ||
|
|
58c7687174 | ||
|
|
bc648f6cfc | ||
|
|
044bc22acc | ||
|
|
07de4abe70 | ||
|
|
24a0a4472a | ||
|
|
dc4ce82ddd | ||
|
|
bde199d128 | ||
|
|
785f8ab174 | ||
|
|
77868b1974 | ||
|
|
ae3c7f702c | ||
|
|
ca9c8c58ea | ||
|
|
c3310c5e7f | ||
|
|
95904fe443 | ||
|
|
21c45475c5 | ||
|
|
edf9d1c905 | ||
|
|
7c26ef88a1 | ||
|
|
408bdd5604 | ||
|
|
6a93ff2a4b | ||
|
|
7e96a7eaea | ||
|
|
516cc44b3f | ||
|
|
94e58dd827 | ||
|
|
780337488e | ||
|
|
bd329e9aad | ||
|
|
6fa1438334 | ||
|
|
7de1d9acfd | ||
|
|
aee5138930 | ||
|
|
21f75991d4 | ||
|
|
ec026004cb | ||
|
|
866d6408af | ||
|
|
366ba77459 | ||
|
|
514fe80778 | ||
|
|
bcc771e37c | ||
|
|
9235dade90 | ||
|
|
5aa68936e0 | ||
|
|
611d5a1618 | ||
|
|
635b3372bd | ||
|
|
a1b26dd9b6 | ||
|
|
8d2c34e655 | ||
|
|
160a7077b0 | ||
|
|
7c092f479f | ||
|
|
d96e0b2de7 | ||
|
|
7cd87d2f6a | ||
|
|
e64cf1aba4 | ||
|
|
ff94f86ce1 | ||
|
|
1a55e950aa | ||
|
|
fd4f536c77 | ||
|
|
05008c4f94 | ||
|
|
80eb510a7b | ||
|
|
ef9813dae6 | ||
|
|
0e0030f494 | ||
|
|
c244e1a50b | ||
|
|
f79d0cb9fb | ||
|
|
eec023766e | ||
|
|
f2a7dda4bd | ||
|
|
a49ac55964 | ||
|
|
cee03630d9 | ||
|
|
0ddfe7fc9d | ||
|
|
0a784074d1 | ||
|
|
6327be9048 | ||
|
|
553a520ab6 | ||
|
|
d647ff1a9a | ||
|
|
ebc4a64f9e | ||
|
|
4468e5bdbe | ||
|
|
cced3eb9bc | ||
|
|
b9c62fb905 | ||
|
|
c20aeef79a | ||
|
|
527676a753 | ||
|
|
34d6f0557d | ||
|
|
745d2476a2 | ||
|
|
aa785fa6ec | ||
|
|
caf47ab666 | ||
|
|
b551d49cf5 | ||
|
|
f5b9aedc48 | ||
|
|
c922ea36cb | ||
|
|
190887c5cd | ||
|
|
bbe164ad28 | ||
|
|
781aee0068 | ||
|
|
e3ff107e4f | ||
|
|
9e569d85a4 | ||
|
|
191ddbc77e | ||
|
|
508f75853c | ||
|
|
7ce81eb6f4 | ||
|
|
5157b15446 | ||
|
|
98cd8f673b | ||
|
|
4d7f6fa968 | ||
|
|
321db89e87 | ||
|
|
d5cf360329 | ||
|
|
b15d150d22 | ||
|
|
7253b816cc |
22
.github/scripts/get_min_versions.py
vendored
@@ -4,7 +4,12 @@ import tomllib
|
||||
from packaging.version import parse as parse_version
|
||||
import re
|
||||
|
||||
MIN_VERSION_LIBS = ["langchain-core", "langchain-community", "langchain", "langchain-text-splitters"]
|
||||
MIN_VERSION_LIBS = [
|
||||
"langchain-core",
|
||||
"langchain-community",
|
||||
"langchain",
|
||||
"langchain-text-splitters",
|
||||
]
|
||||
|
||||
|
||||
def get_min_version(version: str) -> str:
|
||||
@@ -56,12 +61,13 @@ def get_min_version_from_toml(toml_path: str):
|
||||
return min_versions
|
||||
|
||||
|
||||
# Get the TOML file path from the command line argument
|
||||
toml_file = sys.argv[1]
|
||||
if __name__ == "__main__":
|
||||
# Get the TOML file path from the command line argument
|
||||
toml_file = sys.argv[1]
|
||||
|
||||
# Call the function to get the minimum versions
|
||||
min_versions = get_min_version_from_toml(toml_file)
|
||||
# Call the function to get the minimum versions
|
||||
min_versions = get_min_version_from_toml(toml_file)
|
||||
|
||||
print(
|
||||
" ".join([f"{lib}=={version}" for lib, version in min_versions.items()])
|
||||
) # noqa: T201
|
||||
print(
|
||||
" ".join([f"{lib}=={version}" for lib, version in min_versions.items()])
|
||||
) # noqa: T201
|
||||
|
||||
1
.github/workflows/_integration_test.yml
vendored
@@ -75,6 +75,7 @@ jobs:
|
||||
ES_API_KEY: ${{ secrets.ES_API_KEY }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # for airbyte
|
||||
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
|
||||
VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
|
||||
run: |
|
||||
make integration_tests
|
||||
|
||||
|
||||
37
.github/workflows/_release.yml
vendored
@@ -157,6 +157,24 @@ jobs:
|
||||
run: make tests
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: Get minimum versions
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
id: min-version
|
||||
run: |
|
||||
poetry run pip install packaging
|
||||
min_versions="$(poetry run python $GITHUB_WORKSPACE/.github/scripts/get_min_versions.py pyproject.toml)"
|
||||
echo "min-versions=$min_versions" >> "$GITHUB_OUTPUT"
|
||||
echo "min-versions=$min_versions"
|
||||
|
||||
- name: Run unit tests with minimum dependency versions
|
||||
if: ${{ steps.min-version.outputs.min-versions != '' }}
|
||||
env:
|
||||
MIN_VERSIONS: ${{ steps.min-version.outputs.min-versions }}
|
||||
run: |
|
||||
poetry run pip install $MIN_VERSIONS
|
||||
make tests
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: 'Authenticate to Google Cloud'
|
||||
id: 'auth'
|
||||
uses: google-github-actions/auth@v2
|
||||
@@ -196,27 +214,10 @@ jobs:
|
||||
ES_API_KEY: ${{ secrets.ES_API_KEY }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # for airbyte
|
||||
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
|
||||
VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
|
||||
run: make integration_tests
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
- name: Get minimum versions
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
id: min-version
|
||||
run: |
|
||||
poetry run pip install packaging
|
||||
min_versions="$(poetry run python $GITHUB_WORKSPACE/.github/scripts/get_min_versions.py pyproject.toml)"
|
||||
echo "min-versions=$min_versions" >> "$GITHUB_OUTPUT"
|
||||
echo "min-versions=$min_versions"
|
||||
|
||||
- name: Run unit tests with minimum dependency versions
|
||||
if: ${{ steps.min-version.outputs.min-versions != '' }}
|
||||
env:
|
||||
MIN_VERSIONS: ${{ steps.min-version.outputs.min-versions }}
|
||||
run: |
|
||||
poetry run pip install $MIN_VERSIONS
|
||||
make tests
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
||||
publish:
|
||||
needs:
|
||||
- build
|
||||
|
||||
24
.github/workflows/check-broken-links.yml
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
name: Check Broken Links
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '0 13 * * *'
|
||||
|
||||
jobs:
|
||||
check-links:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Use Node.js 18.x
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: 18.x
|
||||
cache: "yarn"
|
||||
cache-dependency-path: ./docs/yarn.lock
|
||||
- name: Install dependencies
|
||||
run: yarn install --immutable --mode=skip-build
|
||||
working-directory: ./docs
|
||||
- name: Check broken links
|
||||
run: yarn check-broken-links
|
||||
working-directory: ./docs
|
||||
1
.gitignore
vendored
@@ -116,6 +116,7 @@ celerybeat.pid
|
||||
.env
|
||||
.envrc
|
||||
.venv*
|
||||
venv*
|
||||
env/
|
||||
ENV/
|
||||
env.bak/
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
" \n",
|
||||
"[Together AI](https://python.langchain.com/docs/integrations/llms/together) has a broad set of OSS LLMs via inference API.\n",
|
||||
"\n",
|
||||
"See [here](https://api.together.xyz/playground). We use `\"mistralai/Mixtral-8x7B-Instruct-v0.1` for RAG on the Mixtral paper.\n",
|
||||
"See [here](https://docs.together.ai/docs/inference-models). We use `\"mistralai/Mixtral-8x7B-Instruct-v0.1` for RAG on the Mixtral paper.\n",
|
||||
"\n",
|
||||
"Download the paper:\n",
|
||||
"https://arxiv.org/pdf/2401.04088.pdf"
|
||||
@@ -148,7 +148,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -14,19 +14,20 @@ For the most part, new integrations should be added to the Community package. Pa
|
||||
|
||||
In the following sections, we'll walk through how to contribute to each of these packages from a fake company, `Parrot Link AI`.
|
||||
|
||||
## Community Package
|
||||
## Community package
|
||||
|
||||
The `langchain-community` package is in `libs/community` and contains most integrations.
|
||||
|
||||
It is installed by users with `pip install langchain-community`, and exported members can be imported with code like
|
||||
It can be installed with `pip install langchain-community`, and exported members can be imported with code like
|
||||
|
||||
```python
|
||||
from langchain_community.chat_models import ParrotLinkLLM
|
||||
from langchain_community.llms import ChatParrotLink
|
||||
from langchain_community.chat_models import ChatParrotLink
|
||||
from langchain_community.llms import ParrotLinkLLM
|
||||
from langchain_community.vectorstores import ParrotLinkVectorStore
|
||||
```
|
||||
|
||||
The community package relies on manually-installed dependent packages, so you will see errors if you try to import a package that is not installed. In our fake example, if you tried to import `ParrotLinkLLM` without installing `parrot-link-sdk`, you will see an `ImportError` telling you to install it when trying to use it.
|
||||
The `community` package relies on manually-installed dependent packages, so you will see errors
|
||||
if you try to import a package that is not installed. In our fake example, if you tried to import `ParrotLinkLLM` without installing `parrot-link-sdk`, you will see an `ImportError` telling you to install it when trying to use it.
|
||||
|
||||
Let's say we wanted to implement a chat model for Parrot Link AI. We would create a new file in `libs/community/langchain_community/chat_models/parrot_link.py` with the following code:
|
||||
|
||||
@@ -39,7 +40,7 @@ class ChatParrotLink(BaseChatModel):
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_parrot_link import ChatParrotLink
|
||||
from langchain_community.chat_models import ChatParrotLink
|
||||
|
||||
model = ChatParrotLink()
|
||||
"""
|
||||
@@ -56,9 +57,16 @@ And add documentation to:
|
||||
|
||||
- `docs/docs/integrations/chat/parrot_link.ipynb`
|
||||
|
||||
## Partner Packages
|
||||
## Partner package in LangChain repo
|
||||
|
||||
Partner packages are in `libs/partners/*` and are installed by users with `pip install langchain-{partner}`, and exported members can be imported with code like
|
||||
Partner packages can be hosted in the `LangChain` monorepo or in an external repo.
|
||||
|
||||
Partner package in the `LangChain` repo is placed in `libs/partners/{partner}`
|
||||
and the package source code is in `libs/partners/{partner}/langchain_{partner}`.
|
||||
|
||||
A package is
|
||||
installed by users with `pip install langchain-{partner}`, and the package members
|
||||
can be imported with code like:
|
||||
|
||||
```python
|
||||
from langchain_{partner} import X
|
||||
@@ -123,13 +131,49 @@ By default, this will include stubs for a Chat Model, an LLM, and/or a Vector St
|
||||
|
||||
### Write Unit and Integration Tests
|
||||
|
||||
Some basic tests are generated in the tests/ directory. You should add more tests to cover your package's functionality.
|
||||
Some basic tests are presented in the `tests/` directory. You should add more tests to cover your package's functionality.
|
||||
|
||||
For information on running and implementing tests, see the [Testing guide](./testing).
|
||||
|
||||
### Write documentation
|
||||
|
||||
Documentation is generated from Jupyter notebooks in the `docs/` directory. You should move the generated notebooks to the relevant `docs/docs/integrations` directory in the monorepo root.
|
||||
Documentation is generated from Jupyter notebooks in the `docs/` directory. You should place the notebooks with examples
|
||||
to the relevant `docs/docs/integrations` directory in the monorepo root.
|
||||
|
||||
### (If Necessary) Deprecate community integration
|
||||
|
||||
Note: this is only necessary if you're migrating an existing community integration into
|
||||
a partner package. If the component you're integrating is net-new to LangChain (i.e.
|
||||
not already in the `community` package), you can skip this step.
|
||||
|
||||
Let's pretend we migrated our `ChatParrotLink` chat model from the community package to
|
||||
the partner package. We would need to deprecate the old model in the community package.
|
||||
|
||||
We would do that by adding a `@deprecated` decorator to the old model as follows, in
|
||||
`libs/community/langchain_community/chat_models/parrot_link.py`.
|
||||
|
||||
Before our change, our chat model might look like this:
|
||||
|
||||
```python
|
||||
class ChatParrotLink(BaseChatModel):
|
||||
...
|
||||
```
|
||||
|
||||
After our change, it would look like this:
|
||||
|
||||
```python
|
||||
from langchain_core._api.deprecation import deprecated
|
||||
|
||||
@deprecated(
|
||||
since="0.0.<next community version>",
|
||||
removal="0.2.0",
|
||||
alternative_import="langchain_parrot_link.ChatParrotLink"
|
||||
)
|
||||
class ChatParrotLink(BaseChatModel):
|
||||
...
|
||||
```
|
||||
|
||||
You should do this for *each* component that you're migrating to the partner package.
|
||||
|
||||
### Additional steps
|
||||
|
||||
@@ -143,3 +187,15 @@ Maintainer steps (Contributors should **not** do these):
|
||||
- [ ] set up pypi and test pypi projects
|
||||
- [ ] add credential secrets to Github Actions
|
||||
- [ ] add package to conda-forge
|
||||
|
||||
## Partner package in external repo
|
||||
|
||||
If you are creating a partner package in an external repo, you should follow the same steps as above,
|
||||
but you will need to set up your own CI/CD and package management.
|
||||
|
||||
Name your package as `langchain-{partner}-{integration}`.
|
||||
|
||||
Still, you have to create the `libs/partners/{partner}-{integration}` folder in the `LangChain` monorepo
|
||||
and add a `README.md` file with a link to the external repo.
|
||||
See this [example](https://github.com/langchain-ai/langchain/tree/master/libs/partners/google-genai).
|
||||
This allows keeping track of all the partner packages in the `LangChain` documentation.
|
||||
|
||||
@@ -20,9 +20,11 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"cell_type": "code",
|
||||
"id": "0f316b5c",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain langchain-openai"
|
||||
]
|
||||
|
||||
@@ -20,9 +20,11 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"cell_type": "code",
|
||||
"id": "b3121aa8",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain langchain-openai"
|
||||
]
|
||||
|
||||
@@ -36,9 +36,11 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "raw",
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b99b47ec",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain-core langchain-openai langchain-anthropic"
|
||||
]
|
||||
|
||||
@@ -286,7 +286,7 @@ embeddings = OllamaEmbeddings()
|
||||
</TabItem>
|
||||
<TabItem value="cohere" label="Cohere (API)" default>
|
||||
|
||||
Make sure you have the `cohere` package installed an the appropriate environment variables set (these are the same as needed for the LLM).
|
||||
Make sure you have the `cohere` package installed and the appropriate environment variables set (these are the same as needed for the LLM).
|
||||
|
||||
```python
|
||||
from langchain_community.embeddings import CohereEmbeddings
|
||||
@@ -563,7 +563,6 @@ from langchain_community.vectorstores import FAISS
|
||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||
from langchain.tools.retriever import create_retriever_tool
|
||||
from langchain_community.tools.tavily_search import TavilySearchResults
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langchain import hub
|
||||
from langchain.agents import create_openai_functions_agent
|
||||
from langchain.agents import AgentExecutor
|
||||
|
||||
@@ -23,7 +23,7 @@ We also are working to share guides and cookbooks that demonstrate how to use th
|
||||
|
||||
## LangSmith Evaluation
|
||||
|
||||
LangSmith provides an integrated evaluation and tracing framework that allows you to check for regressions, compare systems, and easily identify and fix any sources of errors and performance issues. Check out the docs on [LangSmith Evaluation](https://docs.smith.langchain.com/category/testing--evaluation) and additional [cookbooks](https://docs.smith.langchain.com/category/langsmith-cookbook) for more detailed information on evaluating your applications.
|
||||
LangSmith provides an integrated evaluation and tracing framework that allows you to check for regressions, compare systems, and easily identify and fix any sources of errors and performance issues. Check out the docs on [LangSmith Evaluation](https://docs.smith.langchain.com/evaluation) and additional [cookbooks](https://docs.smith.langchain.com/cookbook) for more detailed information on evaluating your applications.
|
||||
|
||||
## LangChain benchmarks
|
||||
|
||||
|
||||
@@ -129,7 +129,7 @@
|
||||
"Who was famed for their Christian spirit?\n",
|
||||
"Who assimilted the Roman language?\n",
|
||||
"Who ruled the country of Normandy?\n",
|
||||
"What principality did William the conquerer found?\n",
|
||||
"What principality did William the conqueror found?\n",
|
||||
"What is the original meaning of the word Norman?\n",
|
||||
"When was the Latin version of the word Norman first recorded?\n",
|
||||
"What name comes from the English words Normans/Normanz?\"\"\"\n",
|
||||
|
||||
@@ -40,18 +40,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 1,
|
||||
"id": "2108b517-1e8d-473d-92fa-4f930e8072a7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
@@ -90,7 +82,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 3,
|
||||
"id": "d4a7c55d-b235-4ca4-a579-c90cc9570da9",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -103,7 +95,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 4,
|
||||
"id": "70cf04e8-423a-4ff6-8b09-f11fb711c817",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -115,7 +107,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 5,
|
||||
"id": "8199ef8f-eb8b-4253-9ea0-6c24a013ca4c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -124,22 +116,22 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"Who's there?\")"
|
||||
"AIMessage(content=\"4! That's one, two, three, four. Keep adding and we'll reach new heights!\", response_metadata={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'token_count': {'prompt_tokens': 73, 'response_tokens': 21, 'total_tokens': 94, 'billed_tokens': 25}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"messages = [HumanMessage(content=\"knock knock\")]\n",
|
||||
"messages = [HumanMessage(content=\"1\"), HumanMessage(content=\"2 3\")]\n",
|
||||
"chat.invoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 6,
|
||||
"id": "c5fac0e9-05a4-4fc1-a3b3-e5bbb24b971b",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -148,10 +140,10 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"Who's there?\")"
|
||||
"AIMessage(content='4! According to the rules of addition, 1 + 2 equals 3, and 3 + 3 equals 6.', response_metadata={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'token_count': {'prompt_tokens': 73, 'response_tokens': 28, 'total_tokens': 101, 'billed_tokens': 32}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -162,7 +154,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 7,
|
||||
"id": "025be980-e50d-4a68-93dc-c9c7b500ce34",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -172,7 +164,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Who's there?"
|
||||
"4! It's a pleasure to be of service in this mathematical game."
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -183,17 +175,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 8,
|
||||
"id": "064288e4-f184-4496-9427-bcf148fa055e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[AIMessage(content=\"Who's there?\")]"
|
||||
"[AIMessage(content='4! According to the rules of addition, 1 + 2 equals 3, and 3 + 3 equals 6.', response_metadata={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'token_count': {'prompt_tokens': 73, 'response_tokens': 28, 'total_tokens': 101, 'billed_tokens': 32}})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -214,7 +206,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 9,
|
||||
"id": "0851b103",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -227,17 +219,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 10,
|
||||
"id": "ae950c0f-1691-47f1-b609-273033cae707",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"Why did the bear go to the chiropractor?\\n\\nBecause she was feeling a bit grizzly!\\n\\nHope you found that joke about bears to be a little bit amusing! If you'd like to hear another one, just let me know. In the meantime, if you have any other questions or need assistance with a different topic, feel free to let me know. \\n\\nJust remember, even if you have a sore back like the bear, it's always best to consult a licensed professional for injuries or pain you may be experiencing. \\n\\nWould you like me to tell you another joke?\")"
|
||||
"AIMessage(content='What do you call a bear with no teeth? A gummy bear!', response_metadata={'documents': None, 'citations': None, 'search_results': None, 'search_queries': None, 'token_count': {'prompt_tokens': 72, 'response_tokens': 14, 'total_tokens': 86, 'billed_tokens': 20}})"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -263,7 +255,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.11.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -65,6 +65,7 @@
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"\n",
|
||||
"llm = ChatMaritalk(\n",
|
||||
" model=\"sabia-2-medium\", # Available models: sabia-2-small and sabia-2-medium\n",
|
||||
" api_key=\"\", # Insert your API key here\n",
|
||||
" temperature=0.7,\n",
|
||||
" max_tokens=100,\n",
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
"cell_type": "raw",
|
||||
"source": [
|
||||
"---\n",
|
||||
"sidebar_label: YUAN2\n",
|
||||
"sidebar_label: Yuan2.0\n",
|
||||
"---"
|
||||
],
|
||||
"metadata": {
|
||||
@@ -22,7 +22,7 @@
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"# YUAN2.0\n",
|
||||
"# Yuan2.0\n",
|
||||
"\n",
|
||||
"This notebook shows how to use [YUAN2 API](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/docs/inference_server.md) in LangChain with the langchain.chat_models.ChatYuan2.\n",
|
||||
"\n",
|
||||
@@ -96,9 +96,9 @@
|
||||
},
|
||||
"source": [
|
||||
"### Setting Up Your API server\n",
|
||||
"Setting up your OpenAI compatible API server following [yuan2 openai api server](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/README-EN.md).\n",
|
||||
"If you deployed api server locally, you can simply set `api_key=\"EMPTY\"` or anything you want.\n",
|
||||
"Just make sure, the `api_base` is set correctly."
|
||||
"Setting up your OpenAI compatible API server following [yuan2 openai api server](https://github.com/IEIT-Yuan/Yuan-2.0/blob/main/docs/Yuan2_fastchat.md).\n",
|
||||
"If you deployed api server locally, you can simply set `yuan2_api_key=\"EMPTY\"` or anything you want.\n",
|
||||
"Just make sure, the `yuan2_api_base` is set correctly."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -187,7 +187,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(chat(messages))"
|
||||
"print(chat.invoke(messages))"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -247,7 +247,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"chat(messages)"
|
||||
"chat.invoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# You need the dgml-utils package to use the DocugamiLoader (run pip install directly without \"poetry run\" if you are not using poetry)\n",
|
||||
"!poetry run pip install dgml-utils==0.3.0 --upgrade --quiet"
|
||||
"!poetry run pip install docugami-langchain dgml-utils==0.3.0 --upgrade --quiet"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -56,7 +56,7 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain_community.document_loaders import DocugamiLoader"
|
||||
"from docugami_langchain.document_loaders import DocugamiLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -470,7 +470,7 @@
|
||||
"source": [
|
||||
"from typing import Dict, List\n",
|
||||
"\n",
|
||||
"from langchain_community.document_loaders import DocugamiLoader\n",
|
||||
"from docugami_langchain.document_loaders import DocugamiLoader\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
"\n",
|
||||
"loader = DocugamiLoader(docset_id=\"zo954yqy53wp\")\n",
|
||||
@@ -655,7 +655,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
"version": "3.9.18"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1357,7 +1357,9 @@
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"## Azure Cosmos DB Semantic Cache"
|
||||
"## Azure Cosmos DB Semantic Cache\n",
|
||||
"\n",
|
||||
"You can use this integrated [vector database](https://learn.microsoft.com/en-us/azure/cosmos-db/vector-database) for caching."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
"https://api.together.xyz/settings/api-keys. This can be passed in as init param\n",
|
||||
"``together_api_key`` or set as environment variable ``TOGETHER_API_KEY``.\n",
|
||||
"\n",
|
||||
"Together API reference: https://docs.together.ai/reference/inference"
|
||||
"Together API reference: https://docs.together.ai/reference"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -45,7 +45,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# default infer_api for a local deployed Yuan2.0 inference server\n",
|
||||
"infer_api = \"http://127.0.0.1:8000\"\n",
|
||||
"infer_api = \"http://127.0.0.1:8000/yuan\"\n",
|
||||
"\n",
|
||||
"# direct access endpoint in a proxied environment\n",
|
||||
"# import os\n",
|
||||
@@ -56,7 +56,6 @@
|
||||
" max_tokens=2048,\n",
|
||||
" temp=1.0,\n",
|
||||
" top_p=0.9,\n",
|
||||
" top_k=40,\n",
|
||||
" use_history=False,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
@@ -89,7 +88,7 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(yuan_llm(question))"
|
||||
"print(yuan_llm.invoke(question))"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@@ -503,21 +503,21 @@ from langchain_google_cloud_sql_pg import PostgreSQLEngine, PostgresVectorStore
|
||||
|
||||
### Vertex AI Vector Search
|
||||
|
||||
> [Google Cloud Vertex AI Vector Search](https://cloud.google.com/vertex-ai/docs/matching-engine/overview) from Google Cloud,
|
||||
> [Google Cloud Vertex AI Vector Search](https://cloud.google.com/vertex-ai/docs/vector-search/overview) from Google Cloud,
|
||||
> formerly known as `Vertex AI Matching Engine`, provides the industry's leading high-scale
|
||||
> low latency vector database. These vector databases are commonly
|
||||
> referred to as vector similarity-matching or an approximate nearest neighbor (ANN) service.
|
||||
|
||||
We need to install several python packages.
|
||||
Install the python package:
|
||||
|
||||
```bash
|
||||
pip install tensorflow langchain-google-vertexai tensorflow-hub tensorflow-text
|
||||
pip install langchain-google-vertexai
|
||||
```
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/google_vertex_ai_vector_search).
|
||||
|
||||
```python
|
||||
from langchain_community.vectorstores import MatchingEngine
|
||||
from langchain_google_vertexai import VectorSearchVectorStore
|
||||
```
|
||||
|
||||
### ScaNN
|
||||
|
||||
@@ -12,13 +12,17 @@ LangChain integrates with many providers.
|
||||
These providers have standalone `langchain-{provider}` packages for improved versioning, dependency management and testing.
|
||||
|
||||
- [AI21](/docs/integrations/providers/ai21)
|
||||
- [Airbyte](/docs/integrations/providers/airbyte)
|
||||
- [Anthropic](/docs/integrations/platforms/anthropic)
|
||||
- [Astra DB](/docs/integrations/providers/astradb)
|
||||
- [Elasticsearch](/docs/integrations/providers/elasticsearch)
|
||||
- [Exa Search](/docs/integrations/providers/exa_search)
|
||||
- [Fireworks](/docs/integrations/providers/fireworks)
|
||||
- [Google](/docs/integrations/platforms/google)
|
||||
- [Groq](/docs/integrations/providers/groq)
|
||||
- [IBM](/docs/integrations/providers/ibm)
|
||||
- [MistralAI](/docs/integrations/providers/mistralai)
|
||||
- [MongoDB](/docs/integrations/providers/mongodb_atlas)
|
||||
- [Nomic](/docs/integrations/providers/nomic)
|
||||
- [Nvidia](/docs/integrations/providers/nvidia)
|
||||
- [OpenAI](/docs/integrations/platforms/openai)
|
||||
|
||||
@@ -3,6 +3,15 @@
|
||||
All functionality related to `Microsoft Azure` and other `Microsoft` products.
|
||||
|
||||
## LLMs
|
||||
|
||||
### Azure ML
|
||||
|
||||
See a [usage example](/docs/integrations/llms/azure_ml).
|
||||
|
||||
```python
|
||||
from langchain_community.llms.azureml_endpoint import AzureMLOnlineEndpoint
|
||||
```
|
||||
|
||||
### Azure OpenAI
|
||||
|
||||
See a [usage example](/docs/integrations/llms/azure_openai).
|
||||
|
||||
30
docs/docs/integrations/providers/arcee.mdx
Normal file
@@ -0,0 +1,30 @@
|
||||
# Arcee
|
||||
|
||||
>[Arcee](https://www.arcee.ai/about/about-us) enables the development and advancement
|
||||
> of what we coin as SLMs—small, specialized, secure, and scalable language models.
|
||||
> By offering a SLM Adaptation System and a seamless, secure integration,
|
||||
> `Arcee` empowers enterprises to harness the full potential of
|
||||
> domain-adapted language models, driving the transformative
|
||||
> innovation in operations.
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
Get your `Arcee API` key.
|
||||
|
||||
|
||||
## LLMs
|
||||
|
||||
See a [usage example](/docs/integrations/llms/arcee).
|
||||
|
||||
```python
|
||||
from langchain_community.llms import Arcee
|
||||
```
|
||||
|
||||
## Retrievers
|
||||
|
||||
See a [usage example](/docs/integrations/retrievers/arcee).
|
||||
|
||||
```python
|
||||
from langchain_community.retrievers import ArceeRetriever
|
||||
```
|
||||
@@ -10,12 +10,7 @@ See a [tutorial provided by DataStax](https://docs.datastax.com/en/astra/astra-d
|
||||
|
||||
Install the following Python package:
|
||||
```bash
|
||||
pip install "langchain-astradb>=0.0.1"
|
||||
```
|
||||
|
||||
Some old integrations require the `astrapy` package:
|
||||
```bash
|
||||
pip install "astrapy>=0.7.1"
|
||||
pip install "langchain-astradb>=0.1.0"
|
||||
```
|
||||
|
||||
Get the [connection secrets](https://docs.datastax.com/en/astra/astra-db-vector/get-started/quickstart.html).
|
||||
@@ -61,7 +56,7 @@ See the [usage example](/docs/integrations/memory/astradb_chat_message_history#e
|
||||
|
||||
```python
|
||||
from langchain.globals import set_llm_cache
|
||||
from langchain_community.cache import AstraDBCache
|
||||
from langchain_astradb import AstraDBCache
|
||||
|
||||
set_llm_cache(AstraDBCache(
|
||||
api_endpoint=ASTRA_DB_API_ENDPOINT,
|
||||
@@ -76,7 +71,7 @@ Learn more in the [example notebook](/docs/integrations/llms/llm_caching#astra-d
|
||||
|
||||
```python
|
||||
from langchain.globals import set_llm_cache
|
||||
from langchain_community.cache import
|
||||
from langchain_astradb import AstraDBSemanticCache
|
||||
|
||||
set_llm_cache(AstraDBSemanticCache(
|
||||
embedding=my_embedding,
|
||||
@@ -92,7 +87,7 @@ Learn more in the [example notebook](/docs/integrations/memory/astradb_chat_mess
|
||||
## Document loader
|
||||
|
||||
```python
|
||||
from langchain_community.document_loaders import AstraDBLoader
|
||||
from langchain_astradb import AstraDBLoader
|
||||
|
||||
loader = AstraDBLoader(
|
||||
collection_name="my_collection",
|
||||
@@ -129,7 +124,7 @@ Learn more in the [example notebook](/docs/integrations/retrievers/self_query/as
|
||||
## Store
|
||||
|
||||
```python
|
||||
from langchain_community.storage import AstraDBStore
|
||||
from langchain_astradb import AstraDBStore
|
||||
|
||||
store = AstraDBStore(
|
||||
collection_name="my_kv_store",
|
||||
@@ -143,7 +138,7 @@ Learn more in the [example notebook](/docs/integrations/stores/astradb#astradbst
|
||||
## Byte Store
|
||||
|
||||
```python
|
||||
from langchain_community.storage import AstraDBByteStore
|
||||
from langchain_astradb import AstraDBByteStore
|
||||
|
||||
store = AstraDBByteStore(
|
||||
collection_name="my_kv_store",
|
||||
|
||||
50
docs/docs/integrations/providers/baidu.mdx
Normal file
@@ -0,0 +1,50 @@
|
||||
# Baidu
|
||||
|
||||
>[Baidu Cloud](https://cloud.baidu.com/) is a cloud service provided by `Baidu, Inc.`,
|
||||
> headquartered in Beijing. It offers a cloud storage service, client software,
|
||||
> file management, resource sharing, and Third Party Integration.
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
Register and get the `Qianfan` `AK` and `SK` keys [here](https://cloud.baidu.com/product/wenxinworkshop).
|
||||
|
||||
## LLMs
|
||||
|
||||
### Baidu Qianfan
|
||||
|
||||
See a [usage example](/docs/integrations/llms/baidu_qianfan_endpoint).
|
||||
|
||||
```python
|
||||
from langchain_community.llms import QianfanLLMEndpoint
|
||||
```
|
||||
|
||||
## Chat models
|
||||
|
||||
### Qianfan Chat Endpoint
|
||||
|
||||
See a [usage example](/docs/integrations/chat/baidu_qianfan_endpoint).
|
||||
|
||||
```python
|
||||
from langchain_community.chat_models import QianfanChatEndpoint
|
||||
```
|
||||
|
||||
## Embedding models
|
||||
|
||||
### Baidu Qianfan
|
||||
|
||||
See a [usage example](/docs/integrations/text_embedding/baidu_qianfan_endpoint).
|
||||
|
||||
```python
|
||||
from langchain_community.embeddings import QianfanEmbeddingsEndpoint
|
||||
```
|
||||
|
||||
## Vector stores
|
||||
|
||||
### Baidu Cloud ElasticSearch VectorSearch
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/baiducloud_vector_search).
|
||||
|
||||
```python
|
||||
from langchain_community.vectorstores import BESVectorStore
|
||||
```
|
||||
30
docs/docs/integrations/providers/ctranslate2.mdx
Normal file
@@ -0,0 +1,30 @@
|
||||
# CTranslate2
|
||||
|
||||
>[CTranslate2](https://opennmt.net/CTranslate2/quickstart.html) is a C++ and Python library
|
||||
> for efficient inference with Transformer models.
|
||||
>
|
||||
>The project implements a custom runtime that applies many performance optimization
|
||||
> techniques such as weights quantization, layers fusion, batch reordering, etc.,
|
||||
> to accelerate and reduce the memory usage of Transformer models on CPU and GPU.
|
||||
>
|
||||
>A full list of features and supported models is included in the
|
||||
> [project’s repository](https://opennmt.net/CTranslate2/guides/transformers.html).
|
||||
> To start, please check out the official [quickstart guide](https://opennmt.net/CTranslate2/quickstart.html).
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
Install the Python package:
|
||||
|
||||
```bash
|
||||
pip install ctranslate2
|
||||
```
|
||||
|
||||
|
||||
## LLMs
|
||||
|
||||
See a [usage example](/docs/integrations/llms/ctranslate2).
|
||||
|
||||
```python
|
||||
from langchain_community.llms import CTranslate2
|
||||
```
|
||||
@@ -8,9 +8,8 @@ It is broken into two parts: installation and setup, and then examples of DeepSp
|
||||
- Install the Python package with `pip install deepsparse`
|
||||
- Choose a [SparseZoo model](https://sparsezoo.neuralmagic.com/?useCase=text_generation) or export a support model to ONNX [using Optimum](https://github.com/neuralmagic/notebooks/blob/main/notebooks/opt-text-generation-deepsparse-quickstart/OPT_Text_Generation_DeepSparse_Quickstart.ipynb)
|
||||
|
||||
## Wrappers
|
||||
|
||||
### LLM
|
||||
## LLMs
|
||||
|
||||
There exists a DeepSparse LLM wrapper, which you can access with:
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
|
||||
```bash
|
||||
pip install dgml-utils
|
||||
pip install docugami-langchain
|
||||
```
|
||||
|
||||
## Document Loader
|
||||
@@ -16,5 +17,5 @@ pip install dgml-utils
|
||||
See a [usage example](/docs/integrations/document_loaders/docugami).
|
||||
|
||||
```python
|
||||
from langchain_community.document_loaders import DocugamiLoader
|
||||
from docugami_langchain.document_loaders import DocugamiLoader
|
||||
```
|
||||
|
||||
62
docs/docs/integrations/providers/edenai.mdx
Normal file
@@ -0,0 +1,62 @@
|
||||
# Eden AI
|
||||
|
||||
>[Eden AI](https://docs.edenai.co/docs/getting-started-with-eden-ai) user interface (UI)
|
||||
> is designed for handling the AI projects. With `Eden AI Portal`,
|
||||
> you can perform no-code AI using the best engines for the market.
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
Accessing the Eden AI API requires an API key, which you can get by
|
||||
[creating an account](https://app.edenai.run/user/register) and
|
||||
heading [here](https://app.edenai.run/admin/account/settings).
|
||||
|
||||
## LLMs
|
||||
|
||||
See a [usage example](/docs/integrations/llms/edenai).
|
||||
|
||||
```python
|
||||
from langchain_community.llms import EdenAI
|
||||
|
||||
```
|
||||
|
||||
## Chat models
|
||||
|
||||
See a [usage example](/docs/integrations/chat/edenai).
|
||||
|
||||
```python
|
||||
from langchain_community.chat_models.edenai import ChatEdenAI
|
||||
```
|
||||
|
||||
## Embedding models
|
||||
|
||||
See a [usage example](/docs/integrations/text_embedding/edenai).
|
||||
|
||||
```python
|
||||
from langchain_community.embeddings.edenai import EdenAiEmbeddings
|
||||
```
|
||||
|
||||
## Tools
|
||||
|
||||
Eden AI provides a list of tools that grants your Agent the ability to do multiple tasks, such as:
|
||||
* speech to text
|
||||
* text to speech
|
||||
* text explicit content detection
|
||||
* image explicit content detection
|
||||
* object detection
|
||||
* OCR invoice parsing
|
||||
* OCR ID parsing
|
||||
|
||||
See a [usage example](/docs/integrations/tools/edenai_tools).
|
||||
|
||||
```python
|
||||
from langchain_community.tools.edenai import (
|
||||
EdenAiExplicitImageTool,
|
||||
EdenAiObjectDetectionTool,
|
||||
EdenAiParsingIDTool,
|
||||
EdenAiParsingInvoiceTool,
|
||||
EdenAiSpeechToTextTool,
|
||||
EdenAiTextModerationTool,
|
||||
EdenAiTextToSpeechTool,
|
||||
)
|
||||
```
|
||||
27
docs/docs/integrations/providers/elevenlabs.mdx
Normal file
@@ -0,0 +1,27 @@
|
||||
# ElevenLabs
|
||||
|
||||
>[ElevenLabs](https://elevenlabs.io/about) is a voice AI research & deployment company
|
||||
> with a mission to make content universally accessible in any language & voice.
|
||||
>
|
||||
>`ElevenLabs` creates the most realistic, versatile and contextually-aware
|
||||
> AI audio, providing the ability to generate speech in hundreds of
|
||||
> new and existing voices in 29 languages.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
First, you need to set up an ElevenLabs account. You can follow the
|
||||
[instructions here](https://docs.elevenlabs.io/welcome/introduction).
|
||||
|
||||
Install the Python package:
|
||||
|
||||
```bash
|
||||
pip install elevenlabs
|
||||
```
|
||||
|
||||
## Tools
|
||||
|
||||
See a [usage example](/docs/integrations/tools/eleven_labs_tts).
|
||||
|
||||
```python
|
||||
from langchain_community.tools import ElevenLabsText2SpeechTool
|
||||
```
|
||||
21
docs/docs/integrations/providers/pygmalionai.mdx
Normal file
@@ -0,0 +1,21 @@
|
||||
# PygmalionAI
|
||||
|
||||
>[PygmalionAI](https://pygmalion.chat/) is a company supporting the
|
||||
> open-source models by serving the inference endpoint
|
||||
> for the [Aphrodite Engine](https://github.com/PygmalionAI/aphrodite-engine).
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
|
||||
```bash
|
||||
pip install aphrodite-engine
|
||||
```
|
||||
|
||||
## LLMs
|
||||
|
||||
See a [usage example](/docs/integrations/llms/aphrodite).
|
||||
|
||||
```python
|
||||
from langchain_community.llms import Aphrodite
|
||||
```
|
||||
@@ -12,7 +12,7 @@
|
||||
"https://api.together.xyz/settings/api-keys. This can be passed in as init param\n",
|
||||
"``together_api_key`` or set as environment variable ``TOGETHER_API_KEY``.\n",
|
||||
"\n",
|
||||
"Together API reference: https://docs.together.ai/reference/inference\n",
|
||||
"Together API reference: https://docs.together.ai/reference\n",
|
||||
"\n",
|
||||
"You will also need to install the `langchain-together` integration package:"
|
||||
]
|
||||
|
||||
24
docs/docs/integrations/providers/voyageai.mdx
Normal file
@@ -0,0 +1,24 @@
|
||||
# VoyageAI
|
||||
|
||||
All functionality related to VoyageAI
|
||||
|
||||
>[VoyageAI](https://www.voyageai.com/) Voyage AI builds embedding models, customized for your domain and company, for better retrieval quality.
|
||||
> customized for your domain and company, for better retrieval quality.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
Install the integration package with
|
||||
```bash
|
||||
pip install langchain-voyageai
|
||||
```
|
||||
|
||||
Get an VoyageAI api key and set it as an environment variable (`VOYAGE_API_KEY`)
|
||||
|
||||
|
||||
## Text Embedding Model
|
||||
|
||||
See a [usage example](/docs/integrations/text_embedding/voyageai)
|
||||
|
||||
```python
|
||||
from langchain_voyageai import VoyageAIEmbeddings
|
||||
```
|
||||
@@ -10,6 +10,19 @@
|
||||
"This notebook covers how to get started with Cohere RAG retriever. This allows you to leverage the ability to search documents over various connectors or by supplying your own."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "2c367be3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"COHERE_API_KEY\"] = getpass.getpass()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -218,7 +231,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.11.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -28,17 +28,17 @@
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"% pip install --upgrade --quiet flashrank\n",
|
||||
"% pip install --upgrade --quiet faiss\n",
|
||||
"%pip install --upgrade --quiet flashrank\n",
|
||||
"%pip install --upgrade --quiet faiss\n",
|
||||
"\n",
|
||||
"# OR (depending on Python version)\n",
|
||||
"\n",
|
||||
"% pip install --upgrade --quiet faiss_cpu"
|
||||
"%pip install --upgrade --quiet faiss_cpu"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 2,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
@@ -53,7 +53,10 @@
|
||||
"def pretty_print_docs(docs):\n",
|
||||
" print(\n",
|
||||
" f\"\\n{'-' * 100}\\n\".join(\n",
|
||||
" [f\"Document {i+1}:\\n\\n\" + d.page_content for i, d in enumerate(docs)]\n",
|
||||
" [\n",
|
||||
" f\"Document {i+1}:\\n\\n{d.page_content}\\nMetadata: {d.metadata}\"\n",
|
||||
" for i, d in enumerate(docs)\n",
|
||||
" ]\n",
|
||||
" )\n",
|
||||
" )"
|
||||
]
|
||||
@@ -73,7 +76,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
@@ -90,7 +93,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
@@ -247,14 +250,6 @@
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 15:\n",
|
||||
"\n",
|
||||
"My plan to fight inflation will lower your costs and lower the deficit. \n",
|
||||
"\n",
|
||||
"17 Nobel laureates in economics say my plan will ease long-term inflationary pressures. Top business leaders and most Americans support my plan. And here’s the plan: \n",
|
||||
"\n",
|
||||
"First – cut the cost of prescription drugs. Just look at insulin. One in ten Americans has diabetes. In Virginia, I met a 13-year-old boy named Joshua Davis.\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 16:\n",
|
||||
"\n",
|
||||
"And soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \n",
|
||||
"\n",
|
||||
"So tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together. \n",
|
||||
@@ -263,15 +258,15 @@
|
||||
"\n",
|
||||
"There is so much we can do. Increase funding for prevention, treatment, harm reduction, and recovery.\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 17:\n",
|
||||
"Document 16:\n",
|
||||
"\n",
|
||||
"So let’s not abandon our streets. Or choose between safety and equal justice. \n",
|
||||
"My plan to fight inflation will lower your costs and lower the deficit. \n",
|
||||
"\n",
|
||||
"Let’s come together to protect our communities, restore trust, and hold law enforcement accountable. \n",
|
||||
"17 Nobel laureates in economics say my plan will ease long-term inflationary pressures. Top business leaders and most Americans support my plan. And here’s the plan: \n",
|
||||
"\n",
|
||||
"That’s why the Justice Department required body cameras, banned chokeholds, and restricted no-knock warrants for its officers.\n",
|
||||
"First – cut the cost of prescription drugs. Just look at insulin. One in ten Americans has diabetes. In Virginia, I met a 13-year-old boy named Joshua Davis.\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 18:\n",
|
||||
"Document 17:\n",
|
||||
"\n",
|
||||
"My plan will not only lower costs to give families a fair shot, it will lower the deficit. \n",
|
||||
"\n",
|
||||
@@ -281,6 +276,14 @@
|
||||
"\n",
|
||||
"We’re going after the criminals who stole billions in relief money meant for small businesses and millions of Americans.\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 18:\n",
|
||||
"\n",
|
||||
"So let’s not abandon our streets. Or choose between safety and equal justice. \n",
|
||||
"\n",
|
||||
"Let’s come together to protect our communities, restore trust, and hold law enforcement accountable. \n",
|
||||
"\n",
|
||||
"That’s why the Justice Department required body cameras, banned chokeholds, and restricted no-knock warrants for its officers.\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 19:\n",
|
||||
"\n",
|
||||
"I understand. \n",
|
||||
@@ -316,6 +319,8 @@
|
||||
").load()\n",
|
||||
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n",
|
||||
"texts = text_splitter.split_documents(documents)\n",
|
||||
"for idx, text in enumerate(texts):\n",
|
||||
" text.metadata[\"id\"] = idx\n",
|
||||
"\n",
|
||||
"embedding = OpenAIEmbeddings(model=\"text-embedding-ada-002\")\n",
|
||||
"retriever = FAISS.from_documents(texts, embedding).as_retriever(search_kwargs={\"k\": 20})\n",
|
||||
@@ -340,16 +345,25 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
"outputs_hidden": false
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0, 5, 3]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.retrievers import ContextualCompressionRetriever, FlashrankRerank\n",
|
||||
"from langchain.retrievers import ContextualCompressionRetriever\n",
|
||||
"from langchain.retrievers.document_compressors import FlashrankRerank\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
@@ -379,7 +393,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
@@ -399,6 +413,16 @@
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 2:\n",
|
||||
"\n",
|
||||
"He met the Ukrainian people. \n",
|
||||
"\n",
|
||||
"From President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. \n",
|
||||
"\n",
|
||||
"Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \n",
|
||||
"\n",
|
||||
"In this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight.\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 3:\n",
|
||||
"\n",
|
||||
"And tonight, I’m announcing that the Justice Department will name a chief prosecutor for pandemic fraud. \n",
|
||||
"\n",
|
||||
"By the end of this year, the deficit will be down to less than half what it was before I took office. \n",
|
||||
@@ -409,19 +433,7 @@
|
||||
"\n",
|
||||
"I’m a capitalist, but capitalism without competition isn’t capitalism. \n",
|
||||
"\n",
|
||||
"It’s exploitation—and it drives up prices.\n",
|
||||
"----------------------------------------------------------------------------------------------------\n",
|
||||
"Document 3:\n",
|
||||
"\n",
|
||||
"As Ohio Senator Sherrod Brown says, “It’s time to bury the label “Rust Belt.” \n",
|
||||
"\n",
|
||||
"It’s time. \n",
|
||||
"\n",
|
||||
"But with all the bright spots in our economy, record job growth and higher wages, too many families are struggling to keep up with the bills. \n",
|
||||
"\n",
|
||||
"Inflation is robbing them of the gains they might otherwise feel. \n",
|
||||
"\n",
|
||||
"I get it. That’s why my top priority is getting prices under control.\n"
|
||||
"It’s exploitation—and it drives up prices.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@@ -443,7 +455,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
@@ -459,7 +471,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"jupyter": {
|
||||
@@ -471,10 +483,10 @@
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'query': 'What did the president say about Ketanji Brown Jackson',\n",
|
||||
" 'result': \"The President said that Ketanji Brown Jackson is one of our nation's top legal minds and will continue Justice Breyer's legacy of excellence.\"}"
|
||||
" 'result': \"The President mentioned that Ketanji Brown Jackson is one of the nation's top legal minds and will continue Justice Breyer's legacy of excellence.\"}"
|
||||
]
|
||||
},
|
||||
"execution_count": 19,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -500,7 +512,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.12.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet google-cloud-discoveryengine"
|
||||
"%pip install --upgrade --quiet google-cloud-discoveryengine"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -115,10 +115,12 @@
|
||||
" - `global` (default)\n",
|
||||
" - `us`\n",
|
||||
" - `eu`\n",
|
||||
"- `data_store_id` - The ID of the data store you want to use.\n",
|
||||
" - Note: This was called `search_engine_id` in previous versions of the retriever.\n",
|
||||
"\n",
|
||||
"The `project_id` and `data_store_id` parameters can be provided explicitly in the retriever's constructor or through the environment variables - `PROJECT_ID` and `DATA_STORE_ID`.\n",
|
||||
"One of:\n",
|
||||
"- `search_engine_id` - The ID of the search app you want to use. (Required for Blended Search)\n",
|
||||
"- `data_store_id` - The ID of the data store you want to use.\n",
|
||||
"\n",
|
||||
"The `project_id`, `search_engine_id` and `data_store_id` parameters can be provided explicitly in the retriever's constructor or through the environment variables - `PROJECT_ID`, `SEARCH_ENGINE_ID` and `DATA_STORE_ID`.\n",
|
||||
"\n",
|
||||
"You can also configure a number of optional parameters, including:\n",
|
||||
"\n",
|
||||
@@ -137,17 +139,17 @@
|
||||
"- `engine_data_type` - Defines the Vertex AI Search data type\n",
|
||||
" - `0` - Unstructured data\n",
|
||||
" - `1` - Structured data\n",
|
||||
" - `2` - Website data with [Advanced Website Indexing](https://cloud.google.com/generative-ai-app-builder/docs/about-advanced-features#advanced-website-indexing)\n",
|
||||
" - `2` - Website data\n",
|
||||
" - `3` - [Blended search](https://cloud.google.com/generative-ai-app-builder/docs/create-data-store-es#multi-data-stores)\n",
|
||||
"\n",
|
||||
"### Migration guide for `GoogleCloudEnterpriseSearchRetriever`\n",
|
||||
"\n",
|
||||
"In previous versions, this retriever was called `GoogleCloudEnterpriseSearchRetriever`. Some backwards-incompatible changes had to be made to the retriever after the General Availability launch due to changes in the product behavior.\n",
|
||||
"In previous versions, this retriever was called `GoogleCloudEnterpriseSearchRetriever`.\n",
|
||||
"\n",
|
||||
"To update to the new retriever, make the following changes:\n",
|
||||
"\n",
|
||||
"- Change the import from: `from langchain.retrievers import GoogleCloudEnterpriseSearchRetriever` -> `from langchain.retrievers import GoogleVertexAISearchRetriever`.\n",
|
||||
"- Change all class references from `GoogleCloudEnterpriseSearchRetriever` -> `GoogleVertexAISearchRetriever`.\n",
|
||||
"- Upon class initialization, change the `search_engine_id` parameter name to `data_store_id`.\n"
|
||||
"- Change all class references from `GoogleCloudEnterpriseSearchRetriever` -> `GoogleVertexAISearchRetriever`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -170,6 +172,7 @@
|
||||
"\n",
|
||||
"PROJECT_ID = \"<YOUR PROJECT ID>\" # Set to your Project ID\n",
|
||||
"LOCATION_ID = \"<YOUR LOCATION>\" # Set to your data store location\n",
|
||||
"SEARCH_ENGINE_ID = \"<YOUR SEARCH APP ID>\" # Set to your search app ID\n",
|
||||
"DATA_STORE_ID = \"<YOUR DATA STORE ID>\" # Set to your data store ID"
|
||||
]
|
||||
},
|
||||
@@ -281,6 +284,32 @@
|
||||
" print(doc)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure and use the retriever for **blended** data\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = GoogleVertexAISearchRetriever(\n",
|
||||
" project_id=PROJECT_ID,\n",
|
||||
" location_id=LOCATION_ID,\n",
|
||||
" search_engine_id=SEARCH_ENGINE_ID,\n",
|
||||
" max_documents=3,\n",
|
||||
" engine_data_type=3,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"result = retriever.get_relevant_documents(query)\n",
|
||||
"for doc in result:\n",
|
||||
" print(doc)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -322,7 +351,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -25,14 +25,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"execution_count": 1,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-03-15T09:36:13.753824100Z",
|
||||
"start_time": "2024-03-15T09:36:13.225834400Z"
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.embeddings import SparkLLMTextEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = SparkLLMTextEmbeddings(\n",
|
||||
" spark_app_id=\"sk-*\", spark_api_key=\"\", spark_api_secret=\"\"\n",
|
||||
" spark_app_id=\"<spark_app_id>\",\n",
|
||||
" spark_api_key=\"<spark_api_key>\",\n",
|
||||
" spark_api_secret=\"<spark_api_secret>\",\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -45,44 +52,67 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-03-15T09:36:25.436201400Z",
|
||||
"start_time": "2024-03-15T09:36:25.313456600Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "[-0.043609619140625,\n 0.2017822265625,\n 0.0270843505859375,\n -0.250244140625,\n -0.024993896484375,\n -0.0382080078125,\n 0.06207275390625,\n -0.0146331787109375]"
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"text_q = \"Introducing iFlytek\"\n",
|
||||
"\n",
|
||||
"os.environ[\"SPARK_APP_ID\"] = \"YOUR_APP_ID\"\n",
|
||||
"os.environ[\"SPARK_API_KEY\"] = \"YOUR_API_KEY\"\n",
|
||||
"os.environ[\"SPARK_API_SECRET\"] = \"YOUR_API_SECRET\""
|
||||
"text_1 = \"Science and Technology Innovation Company Limited, commonly known as iFlytek, is a leading Chinese technology company specializing in speech recognition, natural language processing, and artificial intelligence. With a rich history and remarkable achievements, iFlytek has emerged as a frontrunner in the field of intelligent speech and language technologies.iFlytek has made significant contributions to the advancement of human-computer interaction through its cutting-edge innovations. Their advanced speech recognition technology has not only improved the accuracy and efficiency of voice input systems but has also enabled seamless integration of voice commands into various applications and devices.The company's commitment to research and development has been instrumental in its success. iFlytek invests heavily in fostering talent and collaboration with academic institutions, resulting in groundbreaking advancements in speech synthesis and machine translation. Their dedication to innovation has not only transformed the way we communicate but has also enhanced accessibility for individuals with disabilities.\"\n",
|
||||
"\n",
|
||||
"text_2 = \"Moreover, iFlytek's impact extends beyond domestic boundaries, as they actively promote international cooperation and collaboration in the field of artificial intelligence. They have consistently participated in global competitions and contributed to the development of international standards.In recognition of their achievements, iFlytek has received numerous accolades and awards both domestically and internationally. Their contributions have revolutionized the way we interact with technology and have paved the way for a future where voice-based interfaces play a vital role.Overall, iFlytek is a trailblazer in the field of intelligent speech and language technologies, and their commitment to innovation and excellence deserves commendation.\"\n",
|
||||
"\n",
|
||||
"query_result = embeddings.embed_query(text_q)\n",
|
||||
"query_result[:8]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text_1 = \"iFLYTEK is a well-known intelligent speech and artificial intelligence publicly listed company in the Asia-Pacific Region. Since its establishment, the company is devoted to cornerstone technological research in speech and languages, natural language understanding, machine learning, machine reasoning, adaptive learning, and has maintained the world-leading position in those domains. The company actively promotes the development of A.I. products and their sector-based applications, with visions of enabling machines to listen and speak, understand and think, creating a better world with artificial intelligence.\"\n",
|
||||
"text_2 = \"iFLYTEK Open Platform was launched in 2010 by iFLYTEK as China’s first Artificial Intelligence open platform for Mobile Internet and intelligent hardware developers.\"\n",
|
||||
"\n",
|
||||
"query_result = embeddings.embed_query(text_2)\n",
|
||||
"query_result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-03-15T09:36:54.657224Z",
|
||||
"start_time": "2024-03-15T09:36:54.404690400Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": "[-0.161865234375,\n 0.58984375,\n 0.998046875,\n 0.365966796875,\n 0.72900390625,\n 0.6015625,\n -0.8408203125,\n -0.2666015625]"
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"doc_result = embeddings.embed_documents([text_1, text_2])\n",
|
||||
"doc_result"
|
||||
"doc_result[0][:8]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"language_info": {
|
||||
"name": "python"
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
"language": "python",
|
||||
"display_name": "Python 3 (ipykernel)"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"\n",
|
||||
">[Voyage AI](https://www.voyageai.com/) provides cutting-edge embedding/vectorizations models.\n",
|
||||
"\n",
|
||||
"Let's load the Voyage Embedding class."
|
||||
"Let's load the Voyage Embedding class. (Install the LangChain partner package with `pip install langchain-voyageai`)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -19,7 +19,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.embeddings import VoyageEmbeddings"
|
||||
"from langchain_voyageai import VoyageAIEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -37,7 +37,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = VoyageEmbeddings(\n",
|
||||
"embeddings = VoyageAIEmbeddings(\n",
|
||||
" voyage_api_key=\"[ Your Voyage API key ]\", model=\"voyage-2\"\n",
|
||||
")"
|
||||
]
|
||||
|
||||
@@ -124,7 +124,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain import hub\n",
|
||||
"from langchain.agents import AgentExecutor, create_react_agent\n",
|
||||
"from langchain.agents import AgentExecutor, create_openai_tools_agent\n",
|
||||
"from langchain_openai import ChatOpenAI"
|
||||
]
|
||||
},
|
||||
@@ -135,8 +135,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"llm = ChatOpenAI(temperature=0, model=\"gpt-4\")\n",
|
||||
"prompt = hub.pull(\"hwchase17/react\")\n",
|
||||
"agent = create_react_agent(\n",
|
||||
"prompt = hub.pull(\"hwchase17/openai-tools-agent\")\n",
|
||||
"agent = create_openai_tools_agent(\n",
|
||||
" tools=toolkit.get_tools(),\n",
|
||||
" llm=llm,\n",
|
||||
" prompt=prompt,\n",
|
||||
@@ -151,7 +151,9 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"agent_executor.invoke(\n",
|
||||
" {\"input\": \"Send a greeting to my coworkers in the #general channel.\"}\n",
|
||||
" {\n",
|
||||
" \"input\": \"Send a greeting to my coworkers in the #general channel. Note use `channel` as key of channel id, and `message` as key of content to sent in the channel.\"\n",
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -9,17 +9,13 @@
|
||||
"source": [
|
||||
"# Azure Cosmos DB\n",
|
||||
"\n",
|
||||
">[Azure Cosmos DB for MongoDB vCore](https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/) makes it easy to create a database with full native MongoDB support.\n",
|
||||
"> You can apply your MongoDB experience and continue to use your favorite MongoDB drivers, SDKs, and tools by pointing your application to the API for MongoDB vCore account's connection string.\n",
|
||||
"> Use vector search in Azure Cosmos DB for MongoDB vCore to seamlessly integrate your AI-based applications with your data that's stored in Azure Cosmos DB.\n",
|
||||
"\n",
|
||||
"This notebook shows you how to leverage the [Vector Search](https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/vector-search) capabilities within Azure Cosmos DB for Mongo vCore to store documents in collections, create indicies and perform vector search queries using approximate nearest neighbor algorithms such as COS (cosine distance), L2 (Euclidean distance), and IP (inner product) to locate documents close to the query vectors. \n",
|
||||
"This notebook shows you how to leverage this integrated [vector database](https://learn.microsoft.com/en-us/azure/cosmos-db/vector-database) to store documents in collections, create indicies and perform vector search queries using approximate nearest neighbor algorithms such as COS (cosine distance), L2 (Euclidean distance), and IP (inner product) to locate documents close to the query vectors. \n",
|
||||
" \n",
|
||||
"Azure Cosmos DB for MongoDB vCore provides developers with a fully managed MongoDB-compatible database service for building modern applications with a familiar architecture.\n",
|
||||
"Azure Cosmos DB is the database that powers OpenAI's ChatGPT service. It offers single-digit millisecond response times, automatic and instant scalability, along with guaranteed speed at any scale. \n",
|
||||
"\n",
|
||||
"With Cosmos DB for MongoDB vCore, developers can enjoy the benefits of native Azure integrations, low total cost of ownership (TCO), and the familiar vCore architecture when migrating existing applications or building new ones.\n",
|
||||
"Azure Cosmos DB for MongoDB vCore(https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/) provides developers with a fully managed MongoDB-compatible database service for building modern applications with a familiar architecture. You can apply your MongoDB experience and continue to use your favorite MongoDB drivers, SDKs, and tools by pointing your application to the API for MongoDB vCore account's connection string.\n",
|
||||
"\n",
|
||||
"[Sign Up](https://azure.microsoft.com/en-us/free/) for free to get started today.\n",
|
||||
"[Sign Up](https://azure.microsoft.com/en-us/free/) for lifetime free access to get started today.\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
|
||||
@@ -9,13 +9,15 @@
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"# Tencent Cloud VectorDB\n",
|
||||
"# Baidu VectorDB\n",
|
||||
"\n",
|
||||
">[Tencent Cloud VectorDB](https://cloud.tencent.com/document/product/1709) is a fully managed, self-developed, enterprise-level distributed database service designed for storing, retrieving, and analyzing multi-dimensional vector data. The database supports multiple index types and similarity calculation methods. A single index can support a vector scale of up to 1 billion and can support millions of QPS and millisecond-level query latency. Tencent Cloud Vector Database can not only provide an external knowledge base for large models to improve the accuracy of large model responses but can also be widely used in AI fields such as recommendation systems, NLP services, computer vision, and intelligent customer service.\n",
|
||||
">[Baidu VectorDB](https://cloud.baidu.com/product/vdb.html) is a robust, enterprise-level distributed database service, meticulously developed and fully managed by Baidu Intelligent Cloud. It stands out for its exceptional ability to store, retrieve, and analyze multi-dimensional vector data. At its core, VectorDB operates on Baidu's proprietary \"Mochow\" vector database kernel, which ensures high performance, availability, and security, alongside remarkable scalability and user-friendliness.\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the Tencent vector database.\n",
|
||||
">This database service supports a diverse range of index types and similarity calculation methods, catering to various use cases. A standout feature of VectorDB is its capacity to manage an immense vector scale of up to 10 billion, while maintaining impressive query performance, supporting millions of queries per second (QPS) with millisecond-level query latency.\n",
|
||||
"\n",
|
||||
"To run, you should have a [Database instance.](https://cloud.tencent.com/document/product/1709/95101)."
|
||||
"This notebook shows how to use functionality related to the Baidu VectorDB. \n",
|
||||
"\n",
|
||||
"To run, you should have a [Database instance.](https://cloud.baidu.com/doc/VDB/s/hlrsoazuf)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -24,20 +26,22 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip3 install tcvectordb"
|
||||
"!pip3 install pymochow"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"from langchain_community.embeddings.fake import FakeEmbeddings\n",
|
||||
"from langchain_community.vectorstores import TencentVectorDB\n",
|
||||
"from langchain_community.vectorstores.tencentvectordb import ConnectionParams\n",
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
"from langchain_community.vectorstores import BaiduVectorDB\n",
|
||||
"from langchain_community.vectorstores.baiduvectordb import ConnectionParams"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -60,17 +64,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"conn_params = ConnectionParams(\n",
|
||||
" url=\"http://10.0.X.X\",\n",
|
||||
" key=\"eC4bLRy2va******************************\",\n",
|
||||
" username=\"root\",\n",
|
||||
" timeout=20,\n",
|
||||
" endpoint=\"http://192.168.xx.xx:xxxx\", account=\"root\", api_key=\"****\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"vector_db = TencentVectorDB.from_documents(\n",
|
||||
" docs,\n",
|
||||
" embeddings,\n",
|
||||
" connection_params=conn_params,\n",
|
||||
" # drop_old=True,\n",
|
||||
"vector_db = BaiduVectorDB.from_documents(\n",
|
||||
" docs, embeddings, connection_params=conn_params, drop=True\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -91,8 +89,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_db = TencentVectorDB(embeddings, conn_params)\n",
|
||||
"\n",
|
||||
"vector_db = BaiduVectorDB(embeddings, conn_params)\n",
|
||||
"vector_db.add_texts([\"Ankush went to Princeton\"])\n",
|
||||
"query = \"Where did Ankush go to college?\"\n",
|
||||
"docs = vector_db.max_marginal_relevance_search(query)\n",
|
||||
@@ -116,7 +113,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.9.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
787
docs/docs/integrations/vectorstores/couchbase.ipynb
Normal file
@@ -0,0 +1,787 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f63dfcf9-fd9d-4ac1-a0b3-c02d4dce7faf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Couchbase \n",
|
||||
"[Couchbase](http://couchbase.com/) is an award-winning distributed NoSQL cloud database that delivers unmatched versatility, performance, scalability, and financial value for all of your cloud, mobile, AI, and edge computing applications. Couchbase embraces AI with coding assistance for developers and vector search for their applications.\n",
|
||||
"\n",
|
||||
"Vector Search is a part of the [Full Text Search Service](https://docs.couchbase.com/server/current/learn/services-and-indexes/services/search-service.html) (Search Service) in Couchbase.\n",
|
||||
"\n",
|
||||
"This tutorial explains how to use Vector Search in Couchbase. You can work with both [Couchbase Capella](https://www.couchbase.com/products/capella/) and your self-managed Couchbase Server."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "43326be4-4433-4de2-ad42-6eb91a722bad",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bec8d532-fec7-4dc7-9be3-020aa7bdb01f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install --upgrade --quiet langchain langchain-openai couchbase"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "4a972cbc-bf59-46eb-9b50-e5dc3a69dcf0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "acf1b168-622f-465c-a9a5-d27a6d7e7a8f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Import the Vector Store and Embeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "23ce45ab-bfd2-42e1-b681-514a550f0232",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.vectorstores import CouchbaseVectorStore\n",
|
||||
"from langchain_openai import OpenAIEmbeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3144ba02-1eaa-4449-853e-f034ca5706bf",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Couchbase Connection Object\n",
|
||||
"We create a connection to the Couchbase cluster initially and then pass the cluster object to the Vector Store. \n",
|
||||
"\n",
|
||||
"Here, we are connecting using the username and password. You can also connect using any other supported way to your cluster. \n",
|
||||
"\n",
|
||||
"For more information on connecting to the Couchbase cluster, please check the [Python SDK documentation](https://docs.couchbase.com/python-sdk/current/hello-world/start-using-sdk.html#connect)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "52fe583a-12db-4dc2-9281-1174bf1d4e5c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"COUCHBASE_CONNECTION_STRING = (\n",
|
||||
" \"couchbase://localhost\" # or \"couchbases://localhost\" if using TLS\n",
|
||||
")\n",
|
||||
"DB_USERNAME = \"Administrator\"\n",
|
||||
"DB_PASSWORD = \"Password\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "9986c6b9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import timedelta\n",
|
||||
"\n",
|
||||
"from couchbase.auth import PasswordAuthenticator\n",
|
||||
"from couchbase.cluster import Cluster\n",
|
||||
"from couchbase.options import ClusterOptions\n",
|
||||
"\n",
|
||||
"auth = PasswordAuthenticator(DB_USERNAME, DB_PASSWORD)\n",
|
||||
"options = ClusterOptions(auth)\n",
|
||||
"cluster = Cluster(COUCHBASE_CONNECTION_STRING, options)\n",
|
||||
"\n",
|
||||
"# Wait until the cluster is ready for use.\n",
|
||||
"cluster.wait_until_ready(timedelta(seconds=5))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "90c5dec9-f6cb-41eb-9f30-13cab7b107db",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We will now set the bucket, scope, and collection names in the Couchbase cluster that we want to use for Vector Search. \n",
|
||||
"\n",
|
||||
"For this example, we are using the default scope & collections."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "1b1d0a26-e9d4-4823-9800-9549d24d3d16",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"BUCKET_NAME = \"testing\"\n",
|
||||
"SCOPE_NAME = \"_default\"\n",
|
||||
"COLLECTION_NAME = \"_default\"\n",
|
||||
"SEARCH_INDEX_NAME = \"vector-index\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "efbac6ff-c2ac-4443-9250-7cc88061346b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For this tutorial, we will use OpenAI embeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "87625579-86d7-4de4-8a4d-cee674a6b676",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3677b4b0-3711-419c-89ff-32ef4d3e3022",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create the Search Index\n",
|
||||
"Currently, the Search index needs to be created from the Couchbase Capella or Server UI or using the REST interface. \n",
|
||||
"\n",
|
||||
"Let us define a Search index with the name `vector-index` on the testing bucket\n",
|
||||
"\n",
|
||||
"For this example, let us use the Import Index feature on the Search Service on the UI. \n",
|
||||
"\n",
|
||||
"We are defining an index on the `testing` bucket's `_default` scope on the `_default` collection with the vector field set to `embedding` with 1536 dimensions and the text field set to `text`. We are also indexing and storing all the fields under `metadata` in the document as a dynamic mapping to account for varying document structures. The similarity metric is set to `dot_product`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "655117ae-9b1f-4139-b437-ca7685975a54",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### How to Import an Index to the Full Text Search service?\n",
|
||||
" - [Couchbase Server](https://docs.couchbase.com/server/current/search/import-search-index.html)\n",
|
||||
" - Click on Search -> Add Index -> Import\n",
|
||||
" - Copy the following Index definition in the Import screen\n",
|
||||
" - Click on Create Index to create the index.\n",
|
||||
" - [Couchbase Capella](https://docs.couchbase.com/cloud/search/import-search-index.html)\n",
|
||||
" - Copy the index definition to a new file `index.json`\n",
|
||||
" - Import the file in Capella using the instructions in the documentation.\n",
|
||||
" - Click on Create Index to create the index.\n",
|
||||
" \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f85bc468-d9b8-487d-999a-3b5d2fb78e41",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Index Definition\n",
|
||||
"```\n",
|
||||
"{\n",
|
||||
" \"name\": \"vector-index\",\n",
|
||||
" \"type\": \"fulltext-index\",\n",
|
||||
" \"params\": {\n",
|
||||
" \"doc_config\": {\n",
|
||||
" \"docid_prefix_delim\": \"\",\n",
|
||||
" \"docid_regexp\": \"\",\n",
|
||||
" \"mode\": \"type_field\",\n",
|
||||
" \"type_field\": \"type\"\n",
|
||||
" },\n",
|
||||
" \"mapping\": {\n",
|
||||
" \"default_analyzer\": \"standard\",\n",
|
||||
" \"default_datetime_parser\": \"dateTimeOptional\",\n",
|
||||
" \"default_field\": \"_all\",\n",
|
||||
" \"default_mapping\": {\n",
|
||||
" \"dynamic\": true,\n",
|
||||
" \"enabled\": true,\n",
|
||||
" \"properties\": {\n",
|
||||
" \"metadata\": {\n",
|
||||
" \"dynamic\": true,\n",
|
||||
" \"enabled\": true\n",
|
||||
" },\n",
|
||||
" \"embedding\": {\n",
|
||||
" \"enabled\": true,\n",
|
||||
" \"dynamic\": false,\n",
|
||||
" \"fields\": [\n",
|
||||
" {\n",
|
||||
" \"dims\": 1536,\n",
|
||||
" \"index\": true,\n",
|
||||
" \"name\": \"embedding\",\n",
|
||||
" \"similarity\": \"dot_product\",\n",
|
||||
" \"type\": \"vector\",\n",
|
||||
" \"vector_index_optimized_for\": \"recall\"\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
" \"text\": {\n",
|
||||
" \"enabled\": true,\n",
|
||||
" \"dynamic\": false,\n",
|
||||
" \"fields\": [\n",
|
||||
" {\n",
|
||||
" \"index\": true,\n",
|
||||
" \"name\": \"text\",\n",
|
||||
" \"store\": true,\n",
|
||||
" \"type\": \"text\"\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"default_type\": \"_default\",\n",
|
||||
" \"docvalues_dynamic\": false,\n",
|
||||
" \"index_dynamic\": true,\n",
|
||||
" \"store_dynamic\": true,\n",
|
||||
" \"type_field\": \"_type\"\n",
|
||||
" },\n",
|
||||
" \"store\": {\n",
|
||||
" \"indexType\": \"scorch\",\n",
|
||||
" \"segmentVersion\": 16\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
" \"sourceType\": \"gocbcore\",\n",
|
||||
" \"sourceName\": \"testing\",\n",
|
||||
" \"sourceParams\": {},\n",
|
||||
" \"planParams\": {\n",
|
||||
" \"maxPartitionsPerPIndex\": 103,\n",
|
||||
" \"indexPartitions\": 10,\n",
|
||||
" \"numReplicas\": 0\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "556dc68c-9089-4390-8dc9-b77051e7fc34",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"For more details on how to create a Search index with support for Vector fields, please refer to the documentation.\n",
|
||||
"\n",
|
||||
"- [Couchbase Capella](https://docs.couchbase.com/cloud/vector-search/create-vector-search-index-ui.html)\n",
|
||||
" \n",
|
||||
"- [Couchbase Server](https://docs.couchbase.com/server/current/vector-search/create-vector-search-index-ui.html)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "75f4037d-e509-4de7-a8d1-63a05de24e9d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Create Vector Store\n",
|
||||
"We create the vector store object with the cluster information and the search index name."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "33db4670-76c5-49ba-94d6-a8fa35583058",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store = CouchbaseVectorStore(\n",
|
||||
" cluster=cluster,\n",
|
||||
" bucket_name=BUCKET_NAME,\n",
|
||||
" scope_name=SCOPE_NAME,\n",
|
||||
" collection_name=COLLECTION_NAME,\n",
|
||||
" embedding=embeddings,\n",
|
||||
" index_name=SEARCH_INDEX_NAME,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0aa98793-5ac2-4f76-bbba-2d40856c2d58",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Specify the Text & Embeddings Field\n",
|
||||
"You can optionally specify the text & embeddings field for the document using the `text_key` and `embedding_key` fields.\n",
|
||||
"```\n",
|
||||
"vector_store = CouchbaseVectorStore(\n",
|
||||
" cluster=cluster,\n",
|
||||
" bucket_name=BUCKET_NAME,\n",
|
||||
" scope_name=SCOPE_NAME,\n",
|
||||
" collection_name=COLLECTION_NAME,\n",
|
||||
" embedding=embeddings,\n",
|
||||
" index_name=SEARCH_INDEX_NAME,\n",
|
||||
" text_key=\"text\",\n",
|
||||
" embedding_key=\"embedding\",\n",
|
||||
")\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "790dc1ac-0ab8-4cb5-989d-31ca7c241068",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Basic Vector Search Example\n",
|
||||
"For this example, we are going to load the \"state_of_the_union.txt\" file via the TextLoader, chunk the text into 500 character chunks with no overlaps and index all these chunks into Couchbase.\n",
|
||||
"\n",
|
||||
"After the data is indexed, we perform a simple query to find the top 4 chunks that are similar to the query \"What did president say about Ketanji Brown Jackson\".\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "440350df-cbc6-48f7-8009-2e783be18306",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"from langchain_community.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "9d3b4c7c-abd6-4dfa-ad63-470f16661319",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vector_store = CouchbaseVectorStore.from_documents(\n",
|
||||
" documents=docs,\n",
|
||||
" embedding=embeddings,\n",
|
||||
" cluster=cluster,\n",
|
||||
" bucket_name=BUCKET_NAME,\n",
|
||||
" scope_name=SCOPE_NAME,\n",
|
||||
" collection_name=COLLECTION_NAME,\n",
|
||||
" index_name=SEARCH_INDEX_NAME,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "91fdce6c-8f7c-4060-865a-2fd742846664",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.' metadata={'source': '../../modules/state_of_the_union.txt'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did president say about Ketanji Brown Jackson\"\n",
|
||||
"results = vector_store.similarity_search(query)\n",
|
||||
"print(results[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d9b46c93-65f6-4e4f-87a2-5cebea3b7a6b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Similarity Search with Score\n",
|
||||
"You can fetch the scores for the results by calling the `similarity_search_with_score` method."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "24b146b2-55a2-4fe8-8659-3649032f5dc7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.' metadata={'source': '../../modules/state_of_the_union.txt'}\n",
|
||||
"Score: 0.8211871385574341\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did president say about Ketanji Brown Jackson\"\n",
|
||||
"results = vector_store.similarity_search_with_score(query)\n",
|
||||
"document, score = results[0]\n",
|
||||
"print(document)\n",
|
||||
"print(f\"Score: {score}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9983e83d-efd0-4b75-80db-150e0694e822",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Specifying Fields to Return\n",
|
||||
"You can specify the fields to return from the document using `fields` parameter in the searches. These fields are returned as part of the `metadata` object in the returned Document. You can fetch any field that is stored in the Search index. The `text_key` of the document is returned as part of the document's `page_content`.\n",
|
||||
"\n",
|
||||
"If you do not specify any fields to be fetched, all the fields stored in the index are returned.\n",
|
||||
"\n",
|
||||
"If you want to fetch one of the fields in the metadata, you need to specify it using `.`\n",
|
||||
"\n",
|
||||
"For example, to fetch the `source` field in the metadata, you need to specify `metadata.source`.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "ffa743dc-4e89-405b-ad71-7390338889e6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.' metadata={'source': '../../modules/state_of_the_union.txt'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did president say about Ketanji Brown Jackson\"\n",
|
||||
"results = vector_store.similarity_search(query, fields=[\"metadata.source\"])\n",
|
||||
"print(results[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a5e45eb2-aa97-45df-bcc5-410e9626e506",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Hybrid Search\n",
|
||||
"Couchbase allows you to do hybrid searches by combining Vector Search results with searches on non-vector fields of the document like the `metadata` object. \n",
|
||||
"\n",
|
||||
"The results will be based on the combination of the results from both Vector Search and the searches supported by Search Service. The scores of each of the component searches are added up to get the total score of the result.\n",
|
||||
"\n",
|
||||
"To perform hybrid searches, there is an optional parameter, `search_options` that can be passed to all the similarity searches. \n",
|
||||
"The different search/query possibilities for the `search_options` can be found [here](https://docs.couchbase.com/server/current/search/search-request-params.html#query-object)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a5db3685-1918-4c63-8148-0bb3a71ea677",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create Diverse Metadata for Hybrid Search\n",
|
||||
"In order to simulate hybrid search, let us create some random metadata from the existing documents. \n",
|
||||
"We uniformly add three fields to the metadata, `date` between 2010 & 2020, `rating` between 1 & 5 and `author` set to either John Doe or Jane Doe. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "7d2e607d-6bbc-4cef-83e3-b6a28bb269ea",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{'author': 'John Doe', 'date': '2016-01-01', 'rating': 2, 'source': '../../modules/state_of_the_union.txt'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Adding metadata to documents\n",
|
||||
"for i, doc in enumerate(docs):\n",
|
||||
" doc.metadata[\"date\"] = f\"{range(2010, 2020)[i % 10]}-01-01\"\n",
|
||||
" doc.metadata[\"rating\"] = range(1, 6)[i % 5]\n",
|
||||
" doc.metadata[\"author\"] = [\"John Doe\", \"Jane Doe\"][i % 2]\n",
|
||||
"\n",
|
||||
"vector_store.add_documents(docs)\n",
|
||||
"\n",
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"results = vector_store.similarity_search(query)\n",
|
||||
"print(results[0].metadata)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6cad893b-3977-4556-ab1d-d12bce68b306",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example: Search by Exact Value\n",
|
||||
"We can search for exact matches on a textual field like the author in the `metadata` object."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "dc06ba4a-8a6b-4c55-bb69-95cd92db273f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='This is personal to me and Jill, to Kamala, and to so many of you. \\n\\nCancer is the #2 cause of death in America–second only to heart disease. \\n\\nLast month, I announced our plan to supercharge \\nthe Cancer Moonshot that President Obama asked me to lead six years ago. \\n\\nOur goal is to cut the cancer death rate by at least 50% over the next 25 years, turn more cancers from death sentences into treatable diseases. \\n\\nMore support for patients and families.' metadata={'author': 'John Doe'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" query,\n",
|
||||
" search_options={\"query\": {\"field\": \"metadata.author\", \"match\": \"John Doe\"}},\n",
|
||||
" fields=[\"metadata.author\"],\n",
|
||||
")\n",
|
||||
"print(results[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9106b594-b41e-4329-b98c-9b9f8a34d6f7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example: Search by Partial Match\n",
|
||||
"We can search for partial matches by specifying a fuzziness for the search. This is useful when you want to search for slight variations or misspellings of a search query.\n",
|
||||
"\n",
|
||||
"Here, \"Jae\" is close (fuzziness of 1) to \"Jane\"."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "fd4749e6-ef4f-4cb5-95ff-37c4fa8283d8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system.' metadata={'author': 'Jane Doe'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" query,\n",
|
||||
" search_options={\n",
|
||||
" \"query\": {\"field\": \"metadata.author\", \"match\": \"Jae\", \"fuzziness\": 1}\n",
|
||||
" },\n",
|
||||
" fields=[\"metadata.author\"],\n",
|
||||
")\n",
|
||||
"print(results[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1bbf9449-6e30-4bd1-9eeb-f3b60952fcab",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example: Search by Date Range Query\n",
|
||||
"We can search for documents that are within a date range query on a date field like `metadata.date`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "b7b47e7d-c32f-4999-bce9-3c3c3cebffd0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='He will never extinguish their love of freedom. He will never weaken the resolve of the free world. \\n\\nWe meet tonight in an America that has lived through two of the hardest years this nation has ever faced. \\n\\nThe pandemic has been punishing. \\n\\nAnd so many families are living paycheck to paycheck, struggling to keep up with the rising cost of food, gas, housing, and so much more. \\n\\nI understand.' metadata={'author': 'Jane Doe', 'date': '2017-01-01', 'rating': 3, 'source': '../../modules/state_of_the_union.txt'}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"Any mention about independence?\"\n",
|
||||
"results = vector_store.similarity_search(\n",
|
||||
" query,\n",
|
||||
" search_options={\n",
|
||||
" \"query\": {\n",
|
||||
" \"start\": \"2016-12-31\",\n",
|
||||
" \"end\": \"2017-01-02\",\n",
|
||||
" \"inclusive_start\": True,\n",
|
||||
" \"inclusive_end\": False,\n",
|
||||
" \"field\": \"metadata.date\",\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"print(results[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a18d4ea2-bfab-4f15-9839-674faf1c6f0d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example: Search by Numeric Range Query\n",
|
||||
"We can search for documents that are within a range for a numeric field like `metadata.rating`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "7e8bf7c5-07d1-4c3f-86d7-1fa3a454dc7f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(Document(page_content='He will never extinguish their love of freedom. He will never weaken the resolve of the free world. \\n\\nWe meet tonight in an America that has lived through two of the hardest years this nation has ever faced. \\n\\nThe pandemic has been punishing. \\n\\nAnd so many families are living paycheck to paycheck, struggling to keep up with the rising cost of food, gas, housing, and so much more. \\n\\nI understand.', metadata={'author': 'Jane Doe', 'date': '2017-01-01', 'rating': 3, 'source': '../../modules/state_of_the_union.txt'}), 0.9000703597577832)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"Any mention about independence?\"\n",
|
||||
"results = vector_store.similarity_search_with_score(\n",
|
||||
" query,\n",
|
||||
" search_options={\n",
|
||||
" \"query\": {\n",
|
||||
" \"min\": 3,\n",
|
||||
" \"max\": 5,\n",
|
||||
" \"inclusive_min\": True,\n",
|
||||
" \"inclusive_max\": True,\n",
|
||||
" \"field\": \"metadata.rating\",\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"print(results[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0f16bf86-f01c-4a77-8406-275f7313f493",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Example: Combining Multiple Search Queries\n",
|
||||
"Different search queries can be combined using AND (conjuncts) or OR (disjuncts) operators.\n",
|
||||
"\n",
|
||||
"In this example, we are checking for documents with a rating between 3 & 4 and dated between 2015 & 2018."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "dd0fe7f1-aa40-4c6f-889b-99ad5efcd88b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(Document(page_content='He will never extinguish their love of freedom. He will never weaken the resolve of the free world. \\n\\nWe meet tonight in an America that has lived through two of the hardest years this nation has ever faced. \\n\\nThe pandemic has been punishing. \\n\\nAnd so many families are living paycheck to paycheck, struggling to keep up with the rising cost of food, gas, housing, and so much more. \\n\\nI understand.', metadata={'author': 'Jane Doe', 'date': '2017-01-01', 'rating': 3, 'source': '../../modules/state_of_the_union.txt'}), 1.3598770370389914)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"Any mention about independence?\"\n",
|
||||
"results = vector_store.similarity_search_with_score(\n",
|
||||
" query,\n",
|
||||
" search_options={\n",
|
||||
" \"query\": {\n",
|
||||
" \"conjuncts\": [\n",
|
||||
" {\"min\": 3, \"max\": 4, \"inclusive_max\": True, \"field\": \"metadata.rating\"},\n",
|
||||
" {\"start\": \"2016-12-31\", \"end\": \"2017-01-02\", \"field\": \"metadata.date\"},\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" },\n",
|
||||
")\n",
|
||||
"print(results[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "39258571-3233-45c3-a6ad-5c3c90ea2b1c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Other Queries\n",
|
||||
"Similarly, you can use any of the supported Query methods like Geo Distance, Polygon Search, Wildcard, Regular Expressions, etc in the `search_options` parameter. Please refer to the documentation for more details on the available query methods and their syntax.\n",
|
||||
"\n",
|
||||
"- [Couchbase Capella](https://docs.couchbase.com/cloud/search/search-request-params.html#query-object)\n",
|
||||
"- [Couchbase Server](https://docs.couchbase.com/server/current/search/search-request-params.html#query-object)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "80958c2b-6a67-45e6-b7f0-fd2461d75e0f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Frequently Asked Questions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4f7f9838-cc20-44bc-a72d-06f2cb6c3fca",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Question: Should I create the Search index before creating the CouchbaseVectorStore object?\n",
|
||||
"Yes, currently you need to create the Search index before creating the `CouchbaseVectoreStore` object.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3f0dbc1b-9e82-4ec3-9330-6b54de00661e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Question: I am not seeing all the fields that I specified in my search results. \n",
|
||||
"\n",
|
||||
"In Couchbase, we can only return the fields stored in the Search index. Please ensure that the field that you are trying to access in the search results is part of the Search index.\n",
|
||||
"\n",
|
||||
"One way to handle this is to index and store a document's fields dynamically in the index. \n",
|
||||
"\n",
|
||||
"- In Capella, you need to go to \"Advanced Mode\" then under the chevron \"General Settings\" you can check \"[X] Store Dynamic Fields\" or \"[X] Index Dynamic Fields\"\n",
|
||||
"- In Couchbase Server, in the Index Editor (not Quick Editor) under the chevron \"Advanced\" you can check \"[X] Store Dynamic Fields\" or \"[X] Index Dynamic Fields\"\n",
|
||||
"\n",
|
||||
"Note that these options will increase the size of the index.\n",
|
||||
"\n",
|
||||
"For more details on dynamic mappings, please refer to the [documentation](https://docs.couchbase.com/cloud/search/customize-index.html).\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3702977a-2e25-48b6-b662-edd5cb94cdec",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Question: I am unable to see the metadata object in my search results. \n",
|
||||
"This is most likely due to the `metadata` field in the document not being indexed and/or stored by the Couchbase Search index. In order to index the `metadata` field in the document, you need to add it to the index as a child mapping. \n",
|
||||
"\n",
|
||||
"If you select to map all the fields in the mapping, you will be able to search by all metadata fields. Alternatively, to optimize the index, you can select the specific fields inside `metadata` object to be indexed. You can refer to the [docs](https://docs.couchbase.com/cloud/search/customize-index.html) to learn more about indexing child mappings.\n",
|
||||
"\n",
|
||||
"Creating Child Mappings\n",
|
||||
"\n",
|
||||
"* [Couchbase Capella](https://docs.couchbase.com/cloud/search/create-child-mapping.html)\n",
|
||||
"* [Couchbase Server](https://docs.couchbase.com/server/current/search/create-child-mapping.html)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -9,7 +9,7 @@
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the `Google Cloud Vertex AI Vector Search` vector database.\n",
|
||||
"\n",
|
||||
"> [Google Vertex AI Vector Search](https://cloud.google.com/vertex-ai/docs/matching-engine/overview), formerly known as Vertex AI Matching Engine, provides the industry's leading high-scale low latency vector database. These vector databases are commonly referred to as vector similarity-matching or an approximate nearest neighbor (ANN) service.\n",
|
||||
"> [Google Vertex AI Vector Search](https://cloud.google.com/vertex-ai/docs/vector-search/overview), formerly known as Vertex AI Matching Engine, provides the industry's leading high-scale low latency vector database. These vector databases are commonly referred to as vector similarity-matching or an approximate nearest neighbor (ANN) service.\n",
|
||||
"\n",
|
||||
"**Note**: This module expects an endpoint and deployed index already created as the creation time takes close to one hour. To see how to create an index refer to the section [Create Index and deploy it to an Endpoint](#create-index-and-deploy-it-to-an-endpoint)"
|
||||
]
|
||||
@@ -29,7 +29,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_community.vectorstores import MatchingEngine"
|
||||
"from langchain_google_vertexai import VectorSearchVectorStore"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -50,7 +50,7 @@
|
||||
"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"vector_store = MatchingEngine.from_components(\n",
|
||||
"vector_store = VectorSearchVectorStore.from_components(\n",
|
||||
" texts=texts,\n",
|
||||
" project_id=\"<my_project_id>\",\n",
|
||||
" region=\"<my_region>\",\n",
|
||||
|
||||
@@ -37,9 +37,21 @@
|
||||
"\n",
|
||||
"To run this demo we need a running Infinispan instance without authentication and a data file.\n",
|
||||
"In the next three cells we're going to:\n",
|
||||
"- download the data file\n",
|
||||
"- create the configuration\n",
|
||||
"- run Infinispan in docker\n",
|
||||
"- download the data file"
|
||||
"- run Infinispan in docker"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9678d5ce-894c-4e28-bf68-20d45507122f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%bash\n",
|
||||
"#get an archive of news\n",
|
||||
"wget https://raw.githubusercontent.com/rigazilla/infinispan-vector/main/bbc_news.csv.gz"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -76,18 +88,6 @@
|
||||
"' > infinispan-noauth.yaml"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9678d5ce-894c-4e28-bf68-20d45507122f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%%bash\n",
|
||||
"#get an archive of news\n",
|
||||
"wget https://raw.githubusercontent.com/rigazilla/infinispan-vector/main/bbc_news.csv.gz"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -95,7 +95,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!docker run -d --name infinispanvs-demo -v $(pwd):/user-config -p 11222:11222 infinispan/server:15.0.0.Dev09 -c /user-config/infinispan-noauth.yaml "
|
||||
"!docker rm --force infinispanvs-demo\n",
|
||||
"!docker run -d --name infinispanvs-demo -v $(pwd):/user-config -p 11222:11222 infinispan/server:15.0 -c /user-config/infinispan-noauth.yaml"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -133,80 +134,8 @@
|
||||
"## Setup Infinispan cache\n",
|
||||
"\n",
|
||||
"Infinispan is a very flexible key-value store, it can store raw bits as well as complex data type.\n",
|
||||
"We need to configure it to store data containing embedded vectors.\n",
|
||||
"\n",
|
||||
"In the next cells we're going to:\n",
|
||||
"- create an empty Infinispan VectoreStore\n",
|
||||
"- deploy a protobuf definition of our data\n",
|
||||
"- create a cache"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "49668bf1-778b-466d-86fb-41747ed52b74",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Creating a langchain_core.VectorStore\n",
|
||||
"from langchain_community.vectorstores import InfinispanVS\n",
|
||||
"\n",
|
||||
"ispnvs = InfinispanVS.from_texts(\n",
|
||||
" texts={}, embedding=hf, cache_name=\"demo_cache\", entity_name=\"demo_entity\"\n",
|
||||
")\n",
|
||||
"ispn = ispnvs.ispn"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0cedf066-aaab-4185-b049-93eea9b48329",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Protobuf definition\n",
|
||||
"\n",
|
||||
"Below there's the protobuf definition of our data type that contains:\n",
|
||||
"- embedded vector (field 1)\n",
|
||||
"- text of the news (2)\n",
|
||||
"- title of the news (3)\n",
|
||||
"\n",
|
||||
"As you can see, there are additional annotations in the comments that tell Infinispan that:\n",
|
||||
"- data type must be indexed (`@Indexed`)\n",
|
||||
"- field 1 is an embeddeded vector (`@Vector`)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1fa0add0-8317-4667-9b8c-5d91c47f752a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import json\n",
|
||||
"\n",
|
||||
"# Infinispan supports protobuf schemas\n",
|
||||
"schema_vector = \"\"\"\n",
|
||||
"/**\n",
|
||||
" * @Indexed\n",
|
||||
" */\n",
|
||||
"message demo_entity {\n",
|
||||
"/**\n",
|
||||
" * @Vector(dimension=384)\n",
|
||||
" */\n",
|
||||
"repeated float vector = 1;\n",
|
||||
"optional string text = 2;\n",
|
||||
"optional string title = 3;\n",
|
||||
"}\n",
|
||||
"\"\"\"\n",
|
||||
"# Cleanup before deploy a new schema\n",
|
||||
"ispnvs.schema_delete()\n",
|
||||
"output = ispnvs.schema_create(schema_vector)\n",
|
||||
"assert output.status_code == 200\n",
|
||||
"assert json.loads(output.text)[\"error\"] is None\n",
|
||||
"# Create the cache\n",
|
||||
"ispnvs.cache_create()\n",
|
||||
"# Cleanup old data and index\n",
|
||||
"ispnvs.cache_clear()\n",
|
||||
"ispnvs.cache_index_reindex()"
|
||||
"User has complete freedom in the datagrid configuration, but for simple data type everything is automatically\n",
|
||||
"configured by the python layer. We take advantage of this feature so we can focus on our application."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -216,8 +145,7 @@
|
||||
"source": [
|
||||
"## Prepare the data\n",
|
||||
"\n",
|
||||
"In this demo we choose to store text,vector and metadata in the same cache, but other options\n",
|
||||
"are possible: i.e. content can be store somewhere else and vector store could contain only a reference to the actual content."
|
||||
"In this demo we rely on the default configuration, thus texts, metadatas and vectors in the same cache, but other options are possible: i.e. content can be store somewhere else and vector store could contain only a reference to the actual content."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -239,15 +167,12 @@
|
||||
" metas = []\n",
|
||||
" embeds = []\n",
|
||||
" for row in spamreader:\n",
|
||||
" # first and fifth value are joined to form the content\n",
|
||||
" # first and fifth values are joined to form the content\n",
|
||||
" # to be processed\n",
|
||||
" text = row[0] + \".\" + row[4]\n",
|
||||
" texts.append(text)\n",
|
||||
" # Storing meta\n",
|
||||
" # Store text and title as metadata\n",
|
||||
" meta = {}\n",
|
||||
" meta[\"text\"] = row[4]\n",
|
||||
" meta[\"title\"] = row[0]\n",
|
||||
" meta = {\"text\": row[4], \"title\": row[0]}\n",
|
||||
" metas.append(meta)\n",
|
||||
" i = i + 1\n",
|
||||
" # Change this to change the number of news you want to load\n",
|
||||
@@ -271,7 +196,10 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# add texts and fill vector db\n",
|
||||
"keys = ispnvs.add_texts(texts, metas)"
|
||||
"\n",
|
||||
"from langchain_community.vectorstores import InfinispanVS\n",
|
||||
"\n",
|
||||
"ispnvs = InfinispanVS.from_texts(texts, hf, metas)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -361,18 +289,6 @@
|
||||
"print_docs(ispnvs.similarity_search(\"How to stay young\", 5))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "862e4af2-9f8a-4985-90cb-997477901b1e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Clean up\n",
|
||||
"ispnvs.schema_delete()\n",
|
||||
"ispnvs.cache_delete()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
@@ -400,7 +316,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.18"
|
||||
"version": "3.9.18"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
"\n",
|
||||
"* `content` of type \"Text\". This is used to store the `Document.pageContent` values.\n",
|
||||
"* `embedding` of type \"Vector\". Use the dimension used by the model you plan to use. In this notebook we use OpenAI embeddings, which have 1536 dimensions.\n",
|
||||
"* `search` of type \"Text\". This is used as a metadata column by this example.\n",
|
||||
"* `source` of type \"Text\". This is used as a metadata column by this example.\n",
|
||||
"* any other columns you want to use as metadata. They are populated from the `Document.metadata` object. For example, if in the `Document.metadata` object you have a `title` property, you can create a `title` column in the table and it will be populated.\n"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
"Splits the text based on semantic similarity.\n",
|
||||
"\n",
|
||||
"Taken from Greg Kamradt's wonderful notebook:\n",
|
||||
"https://github.com/FullStackRetrieval-com/RetrievalTutorials/blob/main/5_Levels_Of_Text_Splitting.ipynb\n",
|
||||
"[5_Levels_Of_Text_Splitting](https://github.com/FullStackRetrieval-com/RetrievalTutorials/blob/main/5_Levels_Of_Text_Splitting.ipynb)\n",
|
||||
"\n",
|
||||
"All credit to him.\n",
|
||||
"\n",
|
||||
|
||||
@@ -49,6 +49,14 @@
|
||||
"from langchain_text_splitters import CharacterTextSplitter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a3ba1d8a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The `.from_tiktoken_encoder()` method takes either `encoding` as an argument (e.g. `cl100k_base`), or the `model_name` (e.g. `gpt-4`). All additional arguments like `chunk_size`, `chunk_overlap`, and `separators` are used to instantiate `CharacterTextSplitter`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
@@ -57,7 +65,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"text_splitter = CharacterTextSplitter.from_tiktoken_encoder(\n",
|
||||
" chunk_size=100, chunk_overlap=0\n",
|
||||
" encoding=\"cl100k_base\", chunk_size=100, chunk_overlap=0\n",
|
||||
")\n",
|
||||
"texts = text_splitter.split_text(state_of_the_union)"
|
||||
]
|
||||
@@ -91,9 +99,31 @@
|
||||
"id": "de5b6a6e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Note that if we use `CharacterTextSplitter.from_tiktoken_encoder`, text is only split by `CharacterTextSplitter` and `tiktoken` tokenizer is used to merge splits. It means that split can be larger than chunk size measured by `tiktoken` tokenizer. We can use `RecursiveCharacterTextSplitter.from_tiktoken_encoder` to make sure splits are not larger than chunk size of tokens allowed by the language model, where each split will be recursively split if it has a larger size.\n",
|
||||
"Note that if we use `CharacterTextSplitter.from_tiktoken_encoder`, text is only split by `CharacterTextSplitter` and `tiktoken` tokenizer is used to merge splits. It means that split can be larger than chunk size measured by `tiktoken` tokenizer. We can use `RecursiveCharacterTextSplitter.from_tiktoken_encoder` to make sure splits are not larger than chunk size of tokens allowed by the language model, where each split will be recursively split if it has a larger size:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0262a991",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
||||
"\n",
|
||||
"We can also load a tiktoken splitter directly, which ensure each split is smaller than chunk size."
|
||||
"text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(\n",
|
||||
" model_name=\"gpt-4\",\n",
|
||||
" chunk_size=100,\n",
|
||||
" chunk_overlap=0,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "04457e3a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can also load a tiktoken splitter directly, which will ensure each split is smaller than chunk size."
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -111,6 +141,14 @@
|
||||
"print(texts[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3bc155d0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Some written languages (e.g. Chinese and Japanese) have characters which encode to 2 or more tokens. Using the `TokenTextSplitter` directly can split the tokens for a character between two chunks causing malformed Unicode characters. Use `RecursiveCharacterTextSplitter.from_tiktoken_encoder` or `CharacterTextSplitter.from_tiktoken_encoder` to ensure chunks contain valid Unicode strings."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "55f95f06",
|
||||
|
||||
@@ -60,7 +60,7 @@
|
||||
" * document addition by id (`add_documents` method with `ids` argument)\n",
|
||||
" * delete by id (`delete` method with `ids` argument)\n",
|
||||
"\n",
|
||||
"Compatible Vectorstores: `AnalyticDB`, `AstraDB`, `AwaDB`, `Bagel`, `Cassandra`, `Chroma`, `DashVector`, `DatabricksVectorSearch`, `DeepLake`, `Dingo`, `ElasticVectorSearch`, `ElasticsearchStore`, `FAISS`, `HanaDB`, `Milvus`, `MyScale`, `OpenSearchVectorSearch`, `PGVector`, `Pinecone`, `Qdrant`, `Redis`, `Rockset`, `ScaNN`, `SupabaseVectorStore`, `SurrealDBStore`, `TimescaleVector`, `Vald`, `Vearch`, `VespaStore`, `Weaviate`, `ZepVectorStore`.\n",
|
||||
"Compatible Vectorstores: `AnalyticDB`, `AstraDB`, `AwaDB`, `Bagel`, `Cassandra`, `Chroma`, `CouchbaseVectorStore`, `DashVector`, `DatabricksVectorSearch`, `DeepLake`, `Dingo`, `ElasticVectorSearch`, `ElasticsearchStore`, `FAISS`, `HanaDB`, `Milvus`, `MyScale`, `OpenSearchVectorSearch`, `PGVector`, `Pinecone`, `Qdrant`, `Redis`, `Rockset`, `ScaNN`, `SupabaseVectorStore`, `SurrealDBStore`, `TimescaleVector`, `Vald`, `Vearch`, `VespaStore`, `Weaviate`, `ZepVectorStore`.\n",
|
||||
" \n",
|
||||
"## Caution\n",
|
||||
"\n",
|
||||
@@ -85,7 +85,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 1,
|
||||
"id": "15f7263e-c82e-4914-874f-9699ea4de93e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -192,7 +192,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 6,
|
||||
"id": "67d2a5c8-f2bd-489a-b58e-2c7ba7fefe6f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -724,7 +724,7 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'num_added': 0, 'num_updated': 0, 'num_skipped': 2, 'num_deleted': 2}"
|
||||
"{'num_added': 2, 'num_updated': 0, 'num_skipped': 0, 'num_deleted': 2}"
|
||||
]
|
||||
},
|
||||
"execution_count": 30,
|
||||
@@ -751,7 +751,9 @@
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='tty kitty', metadata={'source': 'kitty.txt'}),\n",
|
||||
"[Document(page_content='woof woof', metadata={'source': 'doggy.txt'}),\n",
|
||||
" Document(page_content='woof woof woof', metadata={'source': 'doggy.txt'}),\n",
|
||||
" Document(page_content='tty kitty', metadata={'source': 'kitty.txt'}),\n",
|
||||
" Document(page_content='tty kitty ki', metadata={'source': 'kitty.txt'}),\n",
|
||||
" Document(page_content='kitty kit', metadata={'source': 'kitty.txt'})]"
|
||||
]
|
||||
@@ -904,7 +906,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.9.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -22,10 +22,11 @@
|
||||
"Caching embeddings can be done using a `CacheBackedEmbeddings`. The cache backed embedder is a wrapper around an embedder that caches\n",
|
||||
"embeddings in a key-value store. The text is hashed and the hash is used as the key in the cache.\n",
|
||||
"\n",
|
||||
"The main supported way to initialized a `CacheBackedEmbeddings` is `from_bytes_store`. This takes in the following parameters:\n",
|
||||
"The main supported way to initialize a `CacheBackedEmbeddings` is `from_bytes_store`. It takes the following parameters:\n",
|
||||
"\n",
|
||||
"- underlying_embedder: The embedder to use for embedding.\n",
|
||||
"- document_embedding_cache: Any [`ByteStore`](/docs/integrations/stores/) for caching document embeddings.\n",
|
||||
"- batch_size: (optional, defaults to `None`) The number of documents to embed between store updates.\n",
|
||||
"- namespace: (optional, defaults to `\"\"`) The namespace to use for document cache. This namespace is used to avoid collisions with other caches. For example, set it to the name of the embedding model used.\n",
|
||||
"\n",
|
||||
"**Attention**: Be sure to set the `namespace` parameter to avoid collisions of the same text embedded using different embeddings models."
|
||||
|
||||
@@ -24,7 +24,7 @@ they take a list of chat messages as input and they return an AI message as outp
|
||||
|
||||
These two API types have pretty different input and output schemas. This means that best way to interact with them may be quite different. Although LangChain makes it possible to treat them interchangeably, that doesn't mean you **should**. In particular, the prompting strategies for LLMs vs ChatModels may be quite different. This means that you will want to make sure the prompt you are using is designed for the model type you are working with.
|
||||
|
||||
Additionally, not all models are the same. Different models have different prompting strategies that work best for them. For example, Anthropic's models work best with XML while OpenAI's work best with JSON. This means that the prompt you use for one model may not transfer to other ones. LangChain provides a lot of default prompts, however these are not guaranteed to work well with the model are you using. Historically speaking, most prompts work well with OpenAI but are not heavily tested on other models. This is something we are working to address, but it is something you should keep in mind.
|
||||
Additionally, not all models are the same. Different models have different prompting strategies that work best for them. For example, Anthropic's models work best with XML while OpenAI's work best with JSON. This means that the prompt you use for one model may not transfer to other ones. LangChain provides a lot of default prompts, however these are not guaranteed to work well with the model you are using. Historically speaking, most prompts work well with OpenAI but are not heavily tested on other models. This is something we are working to address, but it is something you should keep in mind.
|
||||
|
||||
|
||||
## Messages
|
||||
@@ -68,11 +68,11 @@ ChatModels and LLMs take different input types. PromptValue is a class designed
|
||||
|
||||
### PromptTemplate
|
||||
|
||||
This is an example of a prompt template. This consists of a template string. This string is then formatted with user inputs to produce a final string.
|
||||
[This](/docs/modules/model_io/prompts/quick_start#prompttemplate) is an example of a prompt template. This consists of a template string. This string is then formatted with user inputs to produce a final string.
|
||||
|
||||
### MessagePromptTemplate
|
||||
|
||||
This is an example of a prompt template. This consists of a template **message** - meaning a specific role and a PromptTemplate. This PromptTemplate is then formatted with user inputs to produce a final string that becomes the `content` of this message.
|
||||
[This](/docs/modules/model_io/prompts/message_prompts) is an example of a prompt template. This consists of a template **message** - meaning a specific role and a PromptTemplate. This PromptTemplate is then formatted with user inputs to produce a final string that becomes the `content` of this message.
|
||||
|
||||
#### HumanMessagePromptTemplate
|
||||
|
||||
@@ -92,7 +92,7 @@ Oftentimes inputs to prompts can be a list of messages. This is when you would u
|
||||
|
||||
### ChatPromptTemplate
|
||||
|
||||
This is an example of a prompt template. This consists of a list of MessagePromptTemplates or MessagePlaceholders. These are then formatted with user inputs to produce a final list of messages.
|
||||
[This](/docs/modules/model_io/prompts/quick_start#chatprompttemplate) is an example of a prompt template. This consists of a list of MessagePromptTemplates or MessagePlaceholders. These are then formatted with user inputs to produce a final list of messages.
|
||||
|
||||
## Output Parsers
|
||||
|
||||
|
||||
@@ -253,7 +253,7 @@ In
|
||||
Detailed documentation on how to use `DocumentLoaders`.
|
||||
- [Integrations](../../../docs/integrations/document_loaders/): 160+
|
||||
integrations to choose from.
|
||||
- [Interface](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.base.BaseLoader.html):
|
||||
- [Interface](https://api.python.langchain.com/en/latest/document_loaders/langchain_core.document_loaders.base.BaseLoader.html):
|
||||
API reference for the base interface.
|
||||
|
||||
## 2. Indexing: Split {#indexing-split}
|
||||
@@ -324,7 +324,7 @@ split in the original `Document`: - [Markdown
|
||||
files](../../../docs/modules/data_connection/document_transformers/markdown_header_metadata)
|
||||
- [Code (py or js)](../../../docs/integrations/document_loaders/source_code)
|
||||
- [Scientific papers](../../../docs/integrations/document_loaders/grobid)
|
||||
- [Interface](https://api.python.langchain.com/en/latest/text_splitter/langchain_text_splitters.TextSplitter.html): API reference for the base interface.
|
||||
- [Interface](https://api.python.langchain.com/en/latest/base/langchain_text_splitters.base.TextSplitter.html): API reference for the base interface.
|
||||
|
||||
`DocumentTransformer`: Object that performs a transformation on a list
|
||||
of `Document`s.
|
||||
|
||||
@@ -82,8 +82,6 @@ const config = {
|
||||
({
|
||||
docs: {
|
||||
sidebarPath: require.resolve("./sidebars.js"),
|
||||
lastVersion: "current",
|
||||
versions: {current: {label: "0.2.x", path: "0.2.x"}},
|
||||
remarkPlugins: [
|
||||
[require("@docusaurus/remark-plugin-npm2yarn"), { sync: true }],
|
||||
],
|
||||
@@ -219,12 +217,6 @@ const config = {
|
||||
},
|
||||
]
|
||||
},
|
||||
{
|
||||
type: 'docsVersionDropdown',
|
||||
position: 'left',
|
||||
dropdownItemsAfter: [{to: '/versions', label: 'All versions'}],
|
||||
dropdownActiveClassDisabled: true,
|
||||
},
|
||||
{
|
||||
type: "dropdown",
|
||||
label: "🦜️🔗",
|
||||
|
||||
@@ -18,7 +18,8 @@
|
||||
"format": "prettier --write \"**/*.{js,jsx,ts,tsx,md,mdx}\"",
|
||||
"format:check": "prettier --check \"**/*.{js,jsx,ts,tsx,md,mdx}\"",
|
||||
"gen": "yarn gen:supabase",
|
||||
"gen:supabase": "npx supabase gen types typescript --project-id 'xsqpnijvmbodcxyapnyq' --schema public > ./src/supabase.d.ts"
|
||||
"gen:supabase": "npx supabase gen types typescript --project-id 'xsqpnijvmbodcxyapnyq' --schema public > ./src/supabase.d.ts",
|
||||
"check-broken-links": "bash vercel_build.sh && node ./scripts/check-broken-links.js"
|
||||
},
|
||||
"dependencies": {
|
||||
"@docusaurus/core": "2.4.3",
|
||||
@@ -38,6 +39,7 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@babel/eslint-parser": "^7.18.2",
|
||||
"@langchain/scripts": "^0.0.10",
|
||||
"docusaurus-plugin-typedoc": "next",
|
||||
"dotenv": "^16.4.5",
|
||||
"eslint": "^8.19.0",
|
||||
|
||||
7
docs/scripts/check-broken-links.js
Normal file
@@ -0,0 +1,7 @@
|
||||
// Sorry py folks, gotta be js for this one
|
||||
const { checkBrokenLinks } = require("@langchain/scripts/check_broken_links");
|
||||
|
||||
checkBrokenLinks("docs", {
|
||||
timeout: 10000,
|
||||
retryFailed: true,
|
||||
});
|
||||
103
docs/src/theme/ChatModelTabs.js
Normal file
@@ -0,0 +1,103 @@
|
||||
/* eslint-disable react/jsx-props-no-spreading */
|
||||
import React from "react";
|
||||
import Tabs from "@theme/Tabs";
|
||||
import TabItem from "@theme/TabItem";
|
||||
import CodeBlock from "@theme-original/CodeBlock";
|
||||
|
||||
function Setup({ apiKeyName, packageName }) {
|
||||
const apiKeyText = `import getpass
|
||||
import os
|
||||
|
||||
os.environ["${apiKeyName}"] = getpass.getpass()`;
|
||||
return (
|
||||
<>
|
||||
<h5>Install dependencies</h5>
|
||||
<CodeBlock language="bash">{`pip install -qU ${packageName}`}</CodeBlock>
|
||||
<h5>Set environment variables</h5>
|
||||
<CodeBlock language="python">{apiKeyText}</CodeBlock>
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {{ openaiParams?: string, anthropicParams?: string, fireworksParams?: string, mistralParams?: string, googleParams?: string, hideOpenai?: boolean, hideAnthropic?: boolean, hideFireworks?: boolean, hideMistral?: boolean, hideGoogle?: boolean }} props
|
||||
*/
|
||||
export default function ChatModelTabs(props) {
|
||||
const {
|
||||
openaiParams,
|
||||
anthropicParams,
|
||||
fireworksParams,
|
||||
mistralParams,
|
||||
googleParams,
|
||||
hideOpenai,
|
||||
hideAnthropic,
|
||||
hideFireworks,
|
||||
hideMistral,
|
||||
hideGoogle,
|
||||
} = props;
|
||||
|
||||
const openAIParamsOrDefault = openaiParams ?? `model="gpt-3.5-turbo-0125"`
|
||||
const anthropicParamsOrDefault = anthropicParams ?? `model="claude-3-sonnet-20240229"`
|
||||
const fireworksParamsOrDefault = fireworksParams ?? `model="accounts/fireworks/models/mixtral-8x7b-instruct"`
|
||||
const mistralParamsOrDefault = mistralParams ?? `model="mistral-large-latest"`
|
||||
const googleParamsOrDefault = googleParams ?? `model="gemini-pro"`
|
||||
|
||||
const tabItems = [
|
||||
{
|
||||
value: "OpenAI",
|
||||
label: "OpenAI",
|
||||
text: `from langchain_openai import ChatOpenAI\n\nmodel = ChatOpenAI(${openAIParamsOrDefault})`,
|
||||
apiKeyName: "OPENAI_API_KEY",
|
||||
packageName: "langchain-openai",
|
||||
default: true,
|
||||
shouldHide: hideOpenai,
|
||||
},
|
||||
{
|
||||
value: "Anthropic",
|
||||
label: "Anthropic",
|
||||
text: `from langchain_anthropic import ChatAnthropic\n\nmodel = ChatAnthropic(${anthropicParamsOrDefault})`,
|
||||
apiKeyName: "ANTHROPIC_API_KEY",
|
||||
packageName: "langchain-anthropic",
|
||||
default: false,
|
||||
shouldHide: hideAnthropic,
|
||||
},
|
||||
{
|
||||
value: "FireworksAI",
|
||||
label: "FireworksAI",
|
||||
text: `from langchain_fireworks import ChatFireworks\n\nmodel = ChatFireworks(${fireworksParamsOrDefault})`,
|
||||
apiKeyName: "FIREWORKS_API_KEY",
|
||||
packageName: "langchain-fireworks",
|
||||
default: false,
|
||||
shouldHide: hideFireworks,
|
||||
},
|
||||
{
|
||||
value: "MistralAI",
|
||||
label: "MistralAI",
|
||||
text: `from langchain_mistralai import ChatMistralAI\n\nmodel = ChatMistralAI(${mistralParamsOrDefault})`,
|
||||
apiKeyName: "MISTRAL_API_KEY",
|
||||
packageName: "langchain-mistralai",
|
||||
default: false,
|
||||
shouldHide: hideMistral,
|
||||
},
|
||||
{
|
||||
value: "Google",
|
||||
label: "Google",
|
||||
text: `from langchain_google_genai import ChatGoogleGenerativeAI\n\nmodel = ChatGoogleGenerativeAI(${googleParamsOrDefault})`,
|
||||
apiKeyName: "GOOGLE_API_KEY",
|
||||
packageName: "langchain-google-genai",
|
||||
default: false,
|
||||
shouldHide: hideGoogle,
|
||||
}
|
||||
]
|
||||
|
||||
return (
|
||||
<Tabs groupId="modelTabs">
|
||||
{tabItems.filter((tabItem) => !tabItem.shouldHide).map((tabItem) => (
|
||||
<TabItem value={tabItem.value} label={tabItem.label} default={tabItem.default}>
|
||||
<Setup apiKeyName={tabItem.apiKeyName} packageName={tabItem.packageName} />
|
||||
<CodeBlock language="python">{tabItem.text}</CodeBlock>
|
||||
</TabItem>
|
||||
))}
|
||||
</Tabs>
|
||||
);
|
||||
}
|
||||
|
Before Width: | Height: | Size: 147 KiB |
|
Before Width: | Height: | Size: 56 KiB |
|
Before Width: | Height: | Size: 148 KiB |
|
Before Width: | Height: | Size: 193 KiB |
|
Before Width: | Height: | Size: 64 KiB |
|
Before Width: | Height: | Size: 42 KiB |
|
Before Width: | Height: | Size: 190 KiB |
|
Before Width: | Height: | Size: 121 KiB |
|
Before Width: | Height: | Size: 168 KiB |
|
Before Width: | Height: | Size: 52 KiB |
|
Before Width: | Height: | Size: 74 KiB |
|
Before Width: | Height: | Size: 166 KiB |
|
Before Width: | Height: | Size: 42 KiB |
|
Before Width: | Height: | Size: 150 KiB |
|
Before Width: | Height: | Size: 167 KiB |
|
Before Width: | Height: | Size: 98 KiB |
|
Before Width: | Height: | Size: 117 KiB |
|
Before Width: | Height: | Size: 16 KiB |
|
Before Width: | Height: | Size: 777 B |
|
Before Width: | Height: | Size: 192 KiB |
|
Before Width: | Height: | Size: 20 KiB |
|
Before Width: | Height: | Size: 22 KiB |
|
Before Width: | Height: | Size: 93 KiB |
|
Before Width: | Height: | Size: 102 KiB |
|
Before Width: | Height: | Size: 84 KiB |
|
Before Width: | Height: | Size: 54 KiB |
|
Before Width: | Height: | Size: 78 KiB |
|
Before Width: | Height: | Size: 116 KiB |
|
Before Width: | Height: | Size: 54 KiB |
|
Before Width: | Height: | Size: 164 KiB |
|
Before Width: | Height: | Size: 125 KiB |
|
Before Width: | Height: | Size: 325 KiB |
|
Before Width: | Height: | Size: 131 KiB |
|
Before Width: | Height: | Size: 432 KiB |
|
Before Width: | Height: | Size: 336 KiB |
|
Before Width: | Height: | Size: 542 B |
|
Before Width: | Height: | Size: 1.2 KiB |
|
Before Width: | Height: | Size: 15 KiB |
|
Before Width: | Height: | Size: 13 KiB |
|
Before Width: | Height: | Size: 67 KiB |
|
Before Width: | Height: | Size: 59 KiB |
|
Before Width: | Height: | Size: 74 KiB |
|
Before Width: | Height: | Size: 47 KiB |